|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.984, |
|
"eval_steps": 100, |
|
"global_step": 124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.846153846153847e-07, |
|
"logits/chosen": 0.9600980877876282, |
|
"logits/rejected": 1.094868540763855, |
|
"logps/chosen": -119.03445434570312, |
|
"logps/rejected": -112.18000030517578, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 0.8411452770233154, |
|
"logits/rejected": 0.8130205273628235, |
|
"logps/chosen": -142.57501220703125, |
|
"logps/rejected": -130.0977325439453, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.3472222089767456, |
|
"rewards/chosen": 0.00034333759685978293, |
|
"rewards/margins": -0.00018329803424421698, |
|
"rewards/rejected": 0.0005266356747597456, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.951096619903317e-06, |
|
"logits/chosen": 0.8094542622566223, |
|
"logits/rejected": 0.8163386583328247, |
|
"logps/chosen": -155.31309509277344, |
|
"logps/rejected": -143.37014770507812, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.000520143483299762, |
|
"rewards/margins": -0.0002155094116460532, |
|
"rewards/rejected": 0.0007356529822573066, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.716164218065246e-06, |
|
"logits/chosen": 0.8769502639770508, |
|
"logits/rejected": 0.9219558835029602, |
|
"logps/chosen": -154.8149871826172, |
|
"logps/rejected": -141.13705444335938, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": 0.00016751736984588206, |
|
"rewards/margins": -0.000498370616696775, |
|
"rewards/rejected": 0.0006658880738541484, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3048902348863116e-06, |
|
"logits/chosen": 0.8127928972244263, |
|
"logits/rejected": 0.8382323384284973, |
|
"logps/chosen": -172.5569610595703, |
|
"logps/rejected": -157.7318115234375, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.00022131178411655128, |
|
"rewards/margins": 0.00017877723439596593, |
|
"rewards/rejected": 4.253460792824626e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 0.8299118280410767, |
|
"logits/rejected": 0.8132155537605286, |
|
"logps/chosen": -160.2462615966797, |
|
"logps/rejected": -146.29754638671875, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.00015722319949418306, |
|
"rewards/margins": 7.946729601826519e-05, |
|
"rewards/rejected": 7.775588892400265e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.0956464785579125e-06, |
|
"logits/chosen": 0.7370177507400513, |
|
"logits/rejected": 0.7227329611778259, |
|
"logps/chosen": -147.09487915039062, |
|
"logps/rejected": -134.1732940673828, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.000294997647870332, |
|
"rewards/margins": 6.74725151839084e-06, |
|
"rewards/rejected": -0.00030174493440426886, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.39389699200963e-06, |
|
"logits/chosen": 0.8728678822517395, |
|
"logits/rejected": 0.8462715148925781, |
|
"logps/chosen": -146.96482849121094, |
|
"logps/rejected": -137.31175231933594, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.00032994337379932404, |
|
"rewards/margins": 0.00023689583758823574, |
|
"rewards/rejected": -0.0005668391240760684, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.700590188571887e-06, |
|
"logits/chosen": 0.852059543132782, |
|
"logits/rejected": 0.896367073059082, |
|
"logps/chosen": -156.40220642089844, |
|
"logps/rejected": -141.6897430419922, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0007336369017139077, |
|
"rewards/margins": 0.00012243367382325232, |
|
"rewards/rejected": -0.0008560704882256687, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0708929268538034e-06, |
|
"logits/chosen": 0.8062575459480286, |
|
"logits/rejected": 0.8183335065841675, |
|
"logps/chosen": -159.78892517089844, |
|
"logps/rejected": -148.1704864501953, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.00028093549190089107, |
|
"rewards/margins": 0.0007433668943122029, |
|
"rewards/rejected": -0.001024302444420755, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.549106142039018e-07, |
|
"logits/chosen": 0.7814786434173584, |
|
"logits/rejected": 0.8064650297164917, |
|
"logps/chosen": -149.96612548828125, |
|
"logps/rejected": -137.0812225341797, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.00046937749721109867, |
|
"rewards/margins": 0.0005115901003591716, |
|
"rewards/rejected": -0.0009809675393626094, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": 0.8348284363746643, |
|
"eval_logits/rejected": 0.8922409415245056, |
|
"eval_logps/chosen": -256.5745544433594, |
|
"eval_logps/rejected": -233.52647399902344, |
|
"eval_loss": 0.010232986882328987, |
|
"eval_rewards/accuracies": 0.5019999742507935, |
|
"eval_rewards/chosen": 0.00039203467895276845, |
|
"eval_rewards/margins": 0.0002707619860302657, |
|
"eval_rewards/rejected": 0.00012127268564654514, |
|
"eval_runtime": 532.7373, |
|
"eval_samples_per_second": 3.754, |
|
"eval_steps_per_second": 0.939, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.937002879188285e-07, |
|
"logits/chosen": 0.8264827728271484, |
|
"logits/rejected": 0.797345757484436, |
|
"logps/chosen": -160.50473022460938, |
|
"logps/rejected": -143.83724975585938, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0012062744935974479, |
|
"rewards/margins": 5.5822358262958005e-05, |
|
"rewards/rejected": -0.0012620969209820032, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.6003680950742728e-08, |
|
"logits/chosen": 0.8524961471557617, |
|
"logits/rejected": 0.8347142934799194, |
|
"logps/chosen": -145.30233764648438, |
|
"logps/rejected": -132.08731079101562, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0007318368298001587, |
|
"rewards/margins": 0.0001280030992347747, |
|
"rewards/rejected": -0.0008598399581387639, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"step": 124, |
|
"total_flos": 0.0, |
|
"train_loss": 0.01022457688926689, |
|
"train_runtime": 1632.1621, |
|
"train_samples_per_second": 1.225, |
|
"train_steps_per_second": 0.076 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 124, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|