|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993002099370188, |
|
"eval_steps": 500, |
|
"global_step": 357, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 750.0178985595703, |
|
"epoch": 0.0027991602519244225, |
|
"grad_norm": 0.30192625522613525, |
|
"kl": 0.0, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.0293, |
|
"reward": 0.3750000186264515, |
|
"reward_std": 0.3078143782913685, |
|
"rewards/accuracy_reward": 0.3750000186264515, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 731.9107496473524, |
|
"epoch": 0.02799160251924423, |
|
"grad_norm": 0.2259158194065094, |
|
"kl": 0.00010485781563652886, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.0128, |
|
"reward": 0.3670635099212329, |
|
"reward_std": 0.26350560991300476, |
|
"rewards/accuracy_reward": 0.3670635099212329, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 716.9598579406738, |
|
"epoch": 0.05598320503848846, |
|
"grad_norm": 0.3309926688671112, |
|
"kl": 0.0035821676254272463, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.0246, |
|
"reward": 0.40535716228187085, |
|
"reward_std": 0.2991223815828562, |
|
"rewards/accuracy_reward": 0.40535716228187085, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 743.5920013427734, |
|
"epoch": 0.08397480755773268, |
|
"grad_norm": 0.30614855885505676, |
|
"kl": 0.01827545166015625, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.015, |
|
"reward": 0.48482145443558694, |
|
"reward_std": 0.2562454042956233, |
|
"rewards/accuracy_reward": 0.48482145443558694, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 675.5830673217773, |
|
"epoch": 0.11196641007697691, |
|
"grad_norm": 0.2699500024318695, |
|
"kl": 0.03763885498046875, |
|
"learning_rate": 1.999310448492752e-05, |
|
"loss": 0.0166, |
|
"reward": 0.4187500230967999, |
|
"reward_std": 0.28400791343301535, |
|
"rewards/accuracy_reward": 0.4187500230967999, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 724.2911056518554, |
|
"epoch": 0.13995801259622112, |
|
"grad_norm": 0.28290238976478577, |
|
"kl": 0.054229736328125, |
|
"learning_rate": 1.9915651236017307e-05, |
|
"loss": 0.0087, |
|
"reward": 0.41517859101295473, |
|
"reward_std": 0.2900457665324211, |
|
"rewards/accuracy_reward": 0.41517859101295473, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 648.35092086792, |
|
"epoch": 0.16794961511546536, |
|
"grad_norm": 0.26611465215682983, |
|
"kl": 0.063665771484375, |
|
"learning_rate": 1.975286910165463e-05, |
|
"loss": 0.016, |
|
"reward": 0.46607145220041274, |
|
"reward_std": 0.23811200838536023, |
|
"rewards/accuracy_reward": 0.46607145220041274, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 653.6018226623535, |
|
"epoch": 0.1959412176347096, |
|
"grad_norm": 0.2749662697315216, |
|
"kl": 0.077093505859375, |
|
"learning_rate": 1.95063160182963e-05, |
|
"loss": 0.0167, |
|
"reward": 0.46250002086162567, |
|
"reward_std": 0.26145450249314306, |
|
"rewards/accuracy_reward": 0.46250002086162567, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 849.4866439819336, |
|
"epoch": 0.22393282015395383, |
|
"grad_norm": 0.19597865641117096, |
|
"kl": 0.08182373046875, |
|
"learning_rate": 1.917835166772562e-05, |
|
"loss": 0.0064, |
|
"reward": 0.5196428813040257, |
|
"reward_std": 0.26366451028734444, |
|
"rewards/accuracy_reward": 0.5196428813040257, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 893.0500381469726, |
|
"epoch": 0.25192442267319803, |
|
"grad_norm": 0.2700682282447815, |
|
"kl": 0.07144775390625, |
|
"learning_rate": 1.877211489328239e-05, |
|
"loss": 0.0087, |
|
"reward": 0.45446430817246436, |
|
"reward_std": 0.2592643300071359, |
|
"rewards/accuracy_reward": 0.45446430817246436, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 749.3607467651367, |
|
"epoch": 0.27991602519244224, |
|
"grad_norm": 0.2792201042175293, |
|
"kl": 0.079168701171875, |
|
"learning_rate": 1.829149365898355e-05, |
|
"loss": 0.0121, |
|
"reward": 0.4687500227242708, |
|
"reward_std": 0.2029303913936019, |
|
"rewards/accuracy_reward": 0.4687500227242708, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 666.171460723877, |
|
"epoch": 0.3079076277116865, |
|
"grad_norm": 0.24913422763347626, |
|
"kl": 0.0840087890625, |
|
"learning_rate": 1.7741087839045992e-05, |
|
"loss": -0.0013, |
|
"reward": 0.47857145592570305, |
|
"reward_std": 0.2354975413531065, |
|
"rewards/accuracy_reward": 0.47857145592570305, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 733.9053924560546, |
|
"epoch": 0.3358992302309307, |
|
"grad_norm": 0.23493996262550354, |
|
"kl": 0.078680419921875, |
|
"learning_rate": 1.712616519394157e-05, |
|
"loss": 0.0031, |
|
"reward": 0.4696428798139095, |
|
"reward_std": 0.246359870582819, |
|
"rewards/accuracy_reward": 0.4696428798139095, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 966.488444519043, |
|
"epoch": 0.363890832750175, |
|
"grad_norm": 0.24625414609909058, |
|
"kl": 0.087646484375, |
|
"learning_rate": 1.6452610954323337e-05, |
|
"loss": 0.0079, |
|
"reward": 0.42410715818405154, |
|
"reward_std": 0.2372832555323839, |
|
"rewards/accuracy_reward": 0.42410715818405154, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 932.5196838378906, |
|
"epoch": 0.3918824352694192, |
|
"grad_norm": 0.3386368751525879, |
|
"kl": 0.0993896484375, |
|
"learning_rate": 1.5726871495339563e-05, |
|
"loss": 0.0242, |
|
"reward": 0.43839287767186763, |
|
"reward_std": 0.23617825247347354, |
|
"rewards/accuracy_reward": 0.43839287767186763, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 888.8741516113281, |
|
"epoch": 0.4198740377886634, |
|
"grad_norm": 0.4019307792186737, |
|
"kl": 0.1232421875, |
|
"learning_rate": 1.4955892640410717e-05, |
|
"loss": 0.0233, |
|
"reward": 0.4660714529454708, |
|
"reward_std": 0.2454029094427824, |
|
"rewards/accuracy_reward": 0.4660714529454708, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 801.5536071777344, |
|
"epoch": 0.44786564030790765, |
|
"grad_norm": 0.3658553957939148, |
|
"kl": 0.1235595703125, |
|
"learning_rate": 1.4147053184944674e-05, |
|
"loss": 0.016, |
|
"reward": 0.4410714492201805, |
|
"reward_std": 0.25928416270762683, |
|
"rewards/accuracy_reward": 0.4410714492201805, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 689.7009254455567, |
|
"epoch": 0.47585724282715186, |
|
"grad_norm": 0.5175524950027466, |
|
"kl": 0.17607421875, |
|
"learning_rate": 1.3308094276213557e-05, |
|
"loss": 0.0262, |
|
"reward": 0.4401785898953676, |
|
"reward_std": 0.23054485712200404, |
|
"rewards/accuracy_reward": 0.4401785898953676, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 613.9518119812012, |
|
"epoch": 0.5038488453463961, |
|
"grad_norm": 0.38585546612739563, |
|
"kl": 0.1792724609375, |
|
"learning_rate": 1.2447045325275215e-05, |
|
"loss": 0.0113, |
|
"reward": 0.4428571630269289, |
|
"reward_std": 0.2558211121708155, |
|
"rewards/accuracy_reward": 0.4428571630269289, |
|
"step": 180 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 767.7786056518555, |
|
"epoch": 0.5318404478656403, |
|
"grad_norm": 0.22975799441337585, |
|
"kl": 0.10023193359375, |
|
"learning_rate": 1.1572147160012956e-05, |
|
"loss": 0.0294, |
|
"reward": 0.45357144996523857, |
|
"reward_std": 0.2570741597563028, |
|
"rewards/accuracy_reward": 0.45357144996523857, |
|
"step": 190 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1036.6339736938476, |
|
"epoch": 0.5598320503848845, |
|
"grad_norm": 0.3325261175632477, |
|
"kl": 0.1287353515625, |
|
"learning_rate": 1.0691773154771508e-05, |
|
"loss": 0.0119, |
|
"reward": 0.4821428835391998, |
|
"reward_std": 0.2324786176905036, |
|
"rewards/accuracy_reward": 0.4821428835391998, |
|
"step": 200 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 868.197361755371, |
|
"epoch": 0.5878236529041287, |
|
"grad_norm": 0.6180446743965149, |
|
"kl": 0.1508056640625, |
|
"learning_rate": 9.814349091432634e-06, |
|
"loss": 0.0277, |
|
"reward": 0.4821428798139095, |
|
"reward_std": 0.2132600512355566, |
|
"rewards/accuracy_reward": 0.4821428798139095, |
|
"step": 210 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 818.4768188476562, |
|
"epoch": 0.615815255423373, |
|
"grad_norm": 0.2782590687274933, |
|
"kl": 0.127099609375, |
|
"learning_rate": 8.948272518914737e-06, |
|
"loss": 0.0071, |
|
"reward": 0.4758928822353482, |
|
"reward_std": 0.22520754840224982, |
|
"rewards/accuracy_reward": 0.4758928822353482, |
|
"step": 220 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 785.9794929504394, |
|
"epoch": 0.6438068579426172, |
|
"grad_norm": 0.2999161183834076, |
|
"kl": 0.10723876953125, |
|
"learning_rate": 8.101832382881249e-06, |
|
"loss": 0.0243, |
|
"reward": 0.4767857313156128, |
|
"reward_std": 0.2523977212607861, |
|
"rewards/accuracy_reward": 0.4767857313156128, |
|
"step": 230 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 833.9795013427735, |
|
"epoch": 0.6717984604618614, |
|
"grad_norm": 0.28707244992256165, |
|
"kl": 0.1548828125, |
|
"learning_rate": 7.283129694856508e-06, |
|
"loss": 0.0181, |
|
"reward": 0.420535734295845, |
|
"reward_std": 0.24855004157871008, |
|
"rewards/accuracy_reward": 0.420535734295845, |
|
"step": 240 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 825.6973556518554, |
|
"epoch": 0.6997900629811057, |
|
"grad_norm": 0.2718600630760193, |
|
"kl": 0.2059814453125, |
|
"learning_rate": 6.500000000000003e-06, |
|
"loss": 0.0192, |
|
"reward": 0.46696431189775467, |
|
"reward_std": 0.2507600516080856, |
|
"rewards/accuracy_reward": 0.46696431189775467, |
|
"step": 250 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 729.3732452392578, |
|
"epoch": 0.72778166550035, |
|
"grad_norm": 0.327871173620224, |
|
"kl": 0.1481689453125, |
|
"learning_rate": 5.759938385575454e-06, |
|
"loss": 0.0284, |
|
"reward": 0.48750002318993213, |
|
"reward_std": 0.25541665144264697, |
|
"rewards/accuracy_reward": 0.48750002318993213, |
|
"step": 260 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 769.4178924560547, |
|
"epoch": 0.7557732680195941, |
|
"grad_norm": 0.3249359428882599, |
|
"kl": 0.23892822265625, |
|
"learning_rate": 5.070027747835002e-06, |
|
"loss": 0.024, |
|
"reward": 0.48839287757873534, |
|
"reward_std": 0.2081593234091997, |
|
"rewards/accuracy_reward": 0.48839287757873534, |
|
"step": 270 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 738.6937805175781, |
|
"epoch": 0.7837648705388384, |
|
"grad_norm": 0.2741276025772095, |
|
"kl": 0.20296630859375, |
|
"learning_rate": 4.436871003853553e-06, |
|
"loss": 0.0376, |
|
"reward": 0.47500001937150954, |
|
"reward_std": 0.26311200819909575, |
|
"rewards/accuracy_reward": 0.47500001937150954, |
|
"step": 280 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 706.5491378784179, |
|
"epoch": 0.8117564730580826, |
|
"grad_norm": 0.29115381836891174, |
|
"kl": 0.1580322265625, |
|
"learning_rate": 3.866527897092401e-06, |
|
"loss": 0.0139, |
|
"reward": 0.4812500238418579, |
|
"reward_std": 0.2547359408810735, |
|
"rewards/accuracy_reward": 0.4812500238418579, |
|
"step": 290 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 751.8768203735351, |
|
"epoch": 0.8397480755773268, |
|
"grad_norm": 0.26477473974227905, |
|
"kl": 0.1802001953125, |
|
"learning_rate": 3.364457001506166e-06, |
|
"loss": 0.0253, |
|
"reward": 0.4580357393249869, |
|
"reward_std": 0.2625793442130089, |
|
"rewards/accuracy_reward": 0.4580357393249869, |
|
"step": 300 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 732.6241462707519, |
|
"epoch": 0.867739678096571, |
|
"grad_norm": 0.3064480125904083, |
|
"kl": 0.19808349609375, |
|
"learning_rate": 2.935463479253442e-06, |
|
"loss": 0.02, |
|
"reward": 0.40446430407464506, |
|
"reward_std": 0.2393452214077115, |
|
"rewards/accuracy_reward": 0.40446430407464506, |
|
"step": 310 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 715.6580657958984, |
|
"epoch": 0.8957312806158153, |
|
"grad_norm": 0.37576210498809814, |
|
"kl": 0.19476318359375, |
|
"learning_rate": 2.5836530920055976e-06, |
|
"loss": 0.0327, |
|
"reward": 0.40625001918524506, |
|
"reward_std": 0.23536933306604624, |
|
"rewards/accuracy_reward": 0.40625001918524506, |
|
"step": 320 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 731.3232460021973, |
|
"epoch": 0.9237228831350595, |
|
"grad_norm": 0.3001156151294708, |
|
"kl": 0.3147216796875, |
|
"learning_rate": 2.3123929059970286e-06, |
|
"loss": 0.0557, |
|
"reward": 0.4071428745985031, |
|
"reward_std": 0.22367824967950584, |
|
"rewards/accuracy_reward": 0.4071428745985031, |
|
"step": 330 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 669.9786071777344, |
|
"epoch": 0.9517144856543037, |
|
"grad_norm": 0.504320502281189, |
|
"kl": 0.3324951171875, |
|
"learning_rate": 2.1242790668964046e-06, |
|
"loss": 0.0244, |
|
"reward": 0.42232145071029664, |
|
"reward_std": 0.2736980877816677, |
|
"rewards/accuracy_reward": 0.42232145071029664, |
|
"step": 340 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 650.3339591979981, |
|
"epoch": 0.979706088173548, |
|
"grad_norm": 0.5142662525177002, |
|
"kl": 0.21497802734375, |
|
"learning_rate": 2.021111952915447e-06, |
|
"loss": 0.0222, |
|
"reward": 0.43214288018643854, |
|
"reward_std": 0.2758882647380233, |
|
"rewards/accuracy_reward": 0.43214288018643854, |
|
"step": 350 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 621.5370178222656, |
|
"epoch": 0.9993002099370188, |
|
"kl": 0.20172991071428573, |
|
"reward": 0.45025512549493996, |
|
"reward_std": 0.22983166708477906, |
|
"rewards/accuracy_reward": 0.45025512549493996, |
|
"step": 357, |
|
"total_flos": 0.0, |
|
"train_loss": 0.01889443341694793, |
|
"train_runtime": 24438.0983, |
|
"train_samples_per_second": 0.409, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 357, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|