--- license: llama3.2 --- This is an experimental orpo finetune of llama 3.2 1b using our autoredteaming method, in addition to helpful instructions. ``` { "cache_dir": "/leonardo_work/EUHPC_E03_068/.cache", "method": "orpo", "dataset": "autoredteam", "lr": 0.01, "train_batch_size": 5, "eval_batch_size": 5, "num_epochs": 3, "seed": 42, "eval_only": false, "evaluation_size": 2000, "checkpoint_path": null, "experiment_group": "results", "context_length": 1024, "train_summarization": "", "dpo_beta": 0.1, "orpo_beta": 0.1, "kl_coef": 0.0, "reward_model": "", "bestofn_size": 4, "train_reward_model": "" } ```