|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.722433567047119, |
|
"logits/rejected": -2.7098374366760254, |
|
"logps/chosen": -422.9447937011719, |
|
"logps/rejected": -430.49359130859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6438896656036377, |
|
"logits/rejected": -2.6239864826202393, |
|
"logps/chosen": -323.1748046875, |
|
"logps/rejected": -388.55389404296875, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": -0.015710754320025444, |
|
"rewards/margins": 0.010628411546349525, |
|
"rewards/rejected": -0.02633916586637497, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.6440882682800293, |
|
"logits/rejected": -2.619230031967163, |
|
"logps/chosen": -336.45513916015625, |
|
"logps/rejected": -428.5270080566406, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.40671101212501526, |
|
"rewards/margins": 0.20794150233268738, |
|
"rewards/rejected": -0.6146525144577026, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.466252088546753, |
|
"logits/rejected": -2.364461898803711, |
|
"logps/chosen": -401.71636962890625, |
|
"logps/rejected": -522.7274169921875, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9803802371025085, |
|
"rewards/margins": 0.5280221104621887, |
|
"rewards/rejected": -1.5084022283554077, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.2471065521240234, |
|
"logits/rejected": -2.2367751598358154, |
|
"logps/chosen": -439.9005432128906, |
|
"logps/rejected": -537.7637939453125, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.036055326461792, |
|
"rewards/margins": 0.5358790755271912, |
|
"rewards/rejected": -1.571934461593628, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.159681797027588, |
|
"logits/rejected": -2.126121759414673, |
|
"logps/chosen": -450.20904541015625, |
|
"logps/rejected": -595.7091064453125, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3249537944793701, |
|
"rewards/margins": 0.7140070199966431, |
|
"rewards/rejected": -2.0389609336853027, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -1.9871399402618408, |
|
"logits/rejected": -1.9477945566177368, |
|
"logps/chosen": -417.7908630371094, |
|
"logps/rejected": -548.0614013671875, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.445603370666504, |
|
"rewards/margins": 0.5314251780509949, |
|
"rewards/rejected": -1.9770286083221436, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -1.9721695184707642, |
|
"logits/rejected": -1.92262864112854, |
|
"logps/chosen": -486.48779296875, |
|
"logps/rejected": -657.6774291992188, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9522823095321655, |
|
"rewards/margins": 0.8621411323547363, |
|
"rewards/rejected": -2.814423084259033, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.071289300918579, |
|
"logits/rejected": -1.9509702920913696, |
|
"logps/chosen": -483.87335205078125, |
|
"logps/rejected": -599.316162109375, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6747407913208008, |
|
"rewards/margins": 0.8010322451591492, |
|
"rewards/rejected": -2.475773334503174, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.0607149600982666, |
|
"logits/rejected": -1.9980506896972656, |
|
"logps/chosen": -486.66217041015625, |
|
"logps/rejected": -610.5108642578125, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.4693559408187866, |
|
"rewards/margins": 0.7737256288528442, |
|
"rewards/rejected": -2.2430813312530518, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.062950611114502, |
|
"logits/rejected": -1.944451928138733, |
|
"logps/chosen": -476.5865173339844, |
|
"logps/rejected": -568.955078125, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5338852405548096, |
|
"rewards/margins": 0.7051337957382202, |
|
"rewards/rejected": -2.2390189170837402, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -1.9535932540893555, |
|
"logits/rejected": -1.8764221668243408, |
|
"logps/chosen": -428.9962463378906, |
|
"logps/rejected": -579.9666137695312, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5599712133407593, |
|
"rewards/margins": 0.7531036138534546, |
|
"rewards/rejected": -2.313074827194214, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -1.986311674118042, |
|
"logits/rejected": -1.9622013568878174, |
|
"logps/chosen": -454.23095703125, |
|
"logps/rejected": -558.356201171875, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4659466743469238, |
|
"rewards/margins": 0.6345506310462952, |
|
"rewards/rejected": -2.100497007369995, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -1.972738265991211, |
|
"logits/rejected": -1.9125683307647705, |
|
"logps/chosen": -487.3465881347656, |
|
"logps/rejected": -553.6285400390625, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.476697564125061, |
|
"rewards/margins": 0.5875382423400879, |
|
"rewards/rejected": -2.0642354488372803, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -1.9565547704696655, |
|
"logits/rejected": -1.8518273830413818, |
|
"logps/chosen": -491.41961669921875, |
|
"logps/rejected": -588.5744018554688, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5963155031204224, |
|
"rewards/margins": 0.6913558840751648, |
|
"rewards/rejected": -2.2876713275909424, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -1.9728143215179443, |
|
"logits/rejected": -1.9153906106948853, |
|
"logps/chosen": -478.5552673339844, |
|
"logps/rejected": -588.2154541015625, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5690878629684448, |
|
"rewards/margins": 0.5977516770362854, |
|
"rewards/rejected": -2.166839361190796, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5527268985532364, |
|
"train_runtime": 2634.2992, |
|
"train_samples_per_second": 7.736, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|