0.0_continue_iter_2 / trainer_state.json
ShenaoZ's picture
Model save
cd81479 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-08,
"logits/chosen": -2.722433567047119,
"logits/rejected": -2.7098374366760254,
"logps/chosen": -422.9447937011719,
"logps/rejected": -430.49359130859375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.6438896656036377,
"logits/rejected": -2.6239864826202393,
"logps/chosen": -323.1748046875,
"logps/rejected": -388.55389404296875,
"loss": 0.6891,
"rewards/accuracies": 0.4791666567325592,
"rewards/chosen": -0.015710754320025444,
"rewards/margins": 0.010628411546349525,
"rewards/rejected": -0.02633916586637497,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -2.6440882682800293,
"logits/rejected": -2.619230031967163,
"logps/chosen": -336.45513916015625,
"logps/rejected": -428.5270080566406,
"loss": 0.6406,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.40671101212501526,
"rewards/margins": 0.20794150233268738,
"rewards/rejected": -0.6146525144577026,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -2.466252088546753,
"logits/rejected": -2.364461898803711,
"logps/chosen": -401.71636962890625,
"logps/rejected": -522.7274169921875,
"loss": 0.6048,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.9803802371025085,
"rewards/margins": 0.5280221104621887,
"rewards/rejected": -1.5084022283554077,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -2.2471065521240234,
"logits/rejected": -2.2367751598358154,
"logps/chosen": -439.9005432128906,
"logps/rejected": -537.7637939453125,
"loss": 0.5401,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.036055326461792,
"rewards/margins": 0.5358790755271912,
"rewards/rejected": -1.571934461593628,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -2.159681797027588,
"logits/rejected": -2.126121759414673,
"logps/chosen": -450.20904541015625,
"logps/rejected": -595.7091064453125,
"loss": 0.5235,
"rewards/accuracies": 0.78125,
"rewards/chosen": -1.3249537944793701,
"rewards/margins": 0.7140070199966431,
"rewards/rejected": -2.0389609336853027,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -1.9871399402618408,
"logits/rejected": -1.9477945566177368,
"logps/chosen": -417.7908630371094,
"logps/rejected": -548.0614013671875,
"loss": 0.5139,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.445603370666504,
"rewards/margins": 0.5314251780509949,
"rewards/rejected": -1.9770286083221436,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -1.9721695184707642,
"logits/rejected": -1.92262864112854,
"logps/chosen": -486.48779296875,
"logps/rejected": -657.6774291992188,
"loss": 0.483,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -1.9522823095321655,
"rewards/margins": 0.8621411323547363,
"rewards/rejected": -2.814423084259033,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -2.071289300918579,
"logits/rejected": -1.9509702920913696,
"logps/chosen": -483.87335205078125,
"logps/rejected": -599.316162109375,
"loss": 0.5271,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.6747407913208008,
"rewards/margins": 0.8010322451591492,
"rewards/rejected": -2.475773334503174,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -2.0607149600982666,
"logits/rejected": -1.9980506896972656,
"logps/chosen": -486.66217041015625,
"logps/rejected": -610.5108642578125,
"loss": 0.4938,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.4693559408187866,
"rewards/margins": 0.7737256288528442,
"rewards/rejected": -2.2430813312530518,
"step": 90
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -2.062950611114502,
"logits/rejected": -1.944451928138733,
"logps/chosen": -476.5865173339844,
"logps/rejected": -568.955078125,
"loss": 0.5177,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.5338852405548096,
"rewards/margins": 0.7051337957382202,
"rewards/rejected": -2.2390189170837402,
"step": 100
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -1.9535932540893555,
"logits/rejected": -1.8764221668243408,
"logps/chosen": -428.9962463378906,
"logps/rejected": -579.9666137695312,
"loss": 0.5193,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.5599712133407593,
"rewards/margins": 0.7531036138534546,
"rewards/rejected": -2.313074827194214,
"step": 110
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -1.986311674118042,
"logits/rejected": -1.9622013568878174,
"logps/chosen": -454.23095703125,
"logps/rejected": -558.356201171875,
"loss": 0.5527,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -1.4659466743469238,
"rewards/margins": 0.6345506310462952,
"rewards/rejected": -2.100497007369995,
"step": 120
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -1.972738265991211,
"logits/rejected": -1.9125683307647705,
"logps/chosen": -487.3465881347656,
"logps/rejected": -553.6285400390625,
"loss": 0.5399,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -1.476697564125061,
"rewards/margins": 0.5875382423400879,
"rewards/rejected": -2.0642354488372803,
"step": 130
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -1.9565547704696655,
"logits/rejected": -1.8518273830413818,
"logps/chosen": -491.41961669921875,
"logps/rejected": -588.5744018554688,
"loss": 0.5317,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -1.5963155031204224,
"rewards/margins": 0.6913558840751648,
"rewards/rejected": -2.2876713275909424,
"step": 140
},
{
"epoch": 0.94,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -1.9728143215179443,
"logits/rejected": -1.9153906106948853,
"logps/chosen": -478.5552673339844,
"logps/rejected": -588.2154541015625,
"loss": 0.5563,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -1.5690878629684448,
"rewards/margins": 0.5977516770362854,
"rewards/rejected": -2.166839361190796,
"step": 150
},
{
"epoch": 1.0,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5527268985532364,
"train_runtime": 2634.2992,
"train_samples_per_second": 7.736,
"train_steps_per_second": 0.06
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}