|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99581589958159, |
|
"eval_steps": 500, |
|
"global_step": 119, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.4389588832855225, |
|
"logits/rejected": -2.3863701820373535, |
|
"logps/chosen": -220.68759155273438, |
|
"logps/rejected": -373.33087158203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.3459177017211914, |
|
"logits/rejected": -2.334230422973633, |
|
"logps/chosen": -247.85926818847656, |
|
"logps/rejected": -363.747802734375, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": -0.021831953898072243, |
|
"rewards/margins": 0.03538733720779419, |
|
"rewards/rejected": -0.05721929296851158, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.238157272338867, |
|
"logits/rejected": -2.2056198120117188, |
|
"logps/chosen": -283.0589294433594, |
|
"logps/rejected": -371.25146484375, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3717283308506012, |
|
"rewards/margins": 0.2823185622692108, |
|
"rewards/rejected": -0.654046893119812, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -2.2126076221466064, |
|
"logits/rejected": -2.165159225463867, |
|
"logps/chosen": -284.18988037109375, |
|
"logps/rejected": -394.07562255859375, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.2739036977291107, |
|
"rewards/margins": 0.25531530380249023, |
|
"rewards/rejected": -0.5292190313339233, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -2.2903313636779785, |
|
"logits/rejected": -2.253678798675537, |
|
"logps/chosen": -268.6778259277344, |
|
"logps/rejected": -406.4809875488281, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3238675594329834, |
|
"rewards/margins": 0.3434136211872101, |
|
"rewards/rejected": -0.6672812104225159, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -2.216749668121338, |
|
"logits/rejected": -2.1586577892303467, |
|
"logps/chosen": -270.4320373535156, |
|
"logps/rejected": -431.34161376953125, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3394399583339691, |
|
"rewards/margins": 0.34264153242111206, |
|
"rewards/rejected": -0.6820814609527588, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -2.3308839797973633, |
|
"logits/rejected": -2.299959659576416, |
|
"logps/chosen": -260.9419860839844, |
|
"logps/rejected": -367.1455383300781, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.20153513550758362, |
|
"rewards/margins": 0.20082291960716248, |
|
"rewards/rejected": -0.4023580551147461, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -2.3276865482330322, |
|
"logits/rejected": -2.293214797973633, |
|
"logps/chosen": -288.04248046875, |
|
"logps/rejected": -417.49383544921875, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.24966976046562195, |
|
"rewards/margins": 0.32144370675086975, |
|
"rewards/rejected": -0.5711134076118469, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -2.3125128746032715, |
|
"logits/rejected": -2.2743871212005615, |
|
"logps/chosen": -275.89019775390625, |
|
"logps/rejected": -385.26055908203125, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2819868326187134, |
|
"rewards/margins": 0.24118752777576447, |
|
"rewards/rejected": -0.5231744050979614, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -2.294402837753296, |
|
"logits/rejected": -2.2615461349487305, |
|
"logps/chosen": -298.8416748046875, |
|
"logps/rejected": -409.93212890625, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.285415917634964, |
|
"rewards/margins": 0.26987579464912415, |
|
"rewards/rejected": -0.5552917718887329, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -2.341721534729004, |
|
"logits/rejected": -2.3034510612487793, |
|
"logps/chosen": -271.6078186035156, |
|
"logps/rejected": -407.69989013671875, |
|
"loss": 0.606, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.26334214210510254, |
|
"rewards/margins": 0.33658233284950256, |
|
"rewards/rejected": -0.5999244451522827, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -2.2649519443511963, |
|
"logits/rejected": -2.2306463718414307, |
|
"logps/chosen": -257.1134338378906, |
|
"logps/rejected": -387.23760986328125, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.28197985887527466, |
|
"rewards/margins": 0.3022700846195221, |
|
"rewards/rejected": -0.5842499732971191, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 119, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6313913529660521, |
|
"train_runtime": 1987.426, |
|
"train_samples_per_second": 7.69, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 119, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|