|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 49, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 379.71429443359375, |
|
"epoch": 0.02, |
|
"grad_norm": 2.3801450729370117, |
|
"kl": 0.0, |
|
"learning_rate": 5e-08, |
|
"loss": -0.0295, |
|
"reward": 0.8928571939468384, |
|
"reward_std": 0.26375192403793335, |
|
"rewards/accuracy_reward": 0.8571429252624512, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0357142873108387, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 602.6250305175781, |
|
"epoch": 0.1, |
|
"grad_norm": 1.251340627670288, |
|
"kl": 0.0002231597900390625, |
|
"learning_rate": 2.5e-07, |
|
"loss": -0.004, |
|
"reward": 0.5044643133878708, |
|
"reward_std": 0.4692006930708885, |
|
"rewards/accuracy_reward": 0.5000000298023224, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.004464285913854837, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 700.3143188476563, |
|
"epoch": 0.2, |
|
"grad_norm": 3.892239809036255, |
|
"kl": 0.00029048919677734377, |
|
"learning_rate": 5e-07, |
|
"loss": -0.0521, |
|
"reward": 0.5571428656578064, |
|
"reward_std": 0.30040043592453003, |
|
"rewards/accuracy_reward": 0.5285714507102967, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.02857142984867096, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 622.2857299804688, |
|
"epoch": 0.3, |
|
"grad_norm": 1.9563440084457397, |
|
"kl": 0.000347137451171875, |
|
"learning_rate": 4.96201938253052e-07, |
|
"loss": -0.0156, |
|
"reward": 0.5642857372760772, |
|
"reward_std": 0.4562141001224518, |
|
"rewards/accuracy_reward": 0.5000000238418579, |
|
"rewards/format_reward": 0.01428571492433548, |
|
"rewards/tag_count_reward": 0.05000000074505806, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 623.8857543945312, |
|
"epoch": 0.4, |
|
"grad_norm": 3.7430624961853027, |
|
"kl": 0.0006534576416015625, |
|
"learning_rate": 4.849231551964771e-07, |
|
"loss": -0.0311, |
|
"reward": 0.5000000238418579, |
|
"reward_std": 0.39751608967781066, |
|
"rewards/accuracy_reward": 0.4428571581840515, |
|
"rewards/format_reward": 0.01428571492433548, |
|
"rewards/tag_count_reward": 0.04285714328289032, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 519.84287109375, |
|
"epoch": 0.5, |
|
"grad_norm": 2.7288520336151123, |
|
"kl": 0.007835006713867188, |
|
"learning_rate": 4.6650635094610966e-07, |
|
"loss": 0.083, |
|
"reward": 0.6428571701049804, |
|
"reward_std": 0.4448284685611725, |
|
"rewards/accuracy_reward": 0.5714285880327225, |
|
"rewards/format_reward": 0.02857142984867096, |
|
"rewards/tag_count_reward": 0.04285714626312256, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 572.8857360839844, |
|
"epoch": 0.6, |
|
"grad_norm": 8.5879487991333, |
|
"kl": 0.0009708404541015625, |
|
"learning_rate": 4.415111107797445e-07, |
|
"loss": -0.056, |
|
"reward": 0.8035714507102967, |
|
"reward_std": 0.42971263229846957, |
|
"rewards/accuracy_reward": 0.7142857491970063, |
|
"rewards/format_reward": 0.01428571492433548, |
|
"rewards/tag_count_reward": 0.07500000335276127, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 606.6429016113282, |
|
"epoch": 0.7, |
|
"grad_norm": 1.614426612854004, |
|
"kl": 0.00061492919921875, |
|
"learning_rate": 4.106969024216348e-07, |
|
"loss": 0.0218, |
|
"reward": 0.7428571701049804, |
|
"reward_std": 0.49451608657836915, |
|
"rewards/accuracy_reward": 0.6571428775787354, |
|
"rewards/format_reward": 0.02857142984867096, |
|
"rewards/tag_count_reward": 0.05714286118745804, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 659.6000305175781, |
|
"epoch": 0.8, |
|
"grad_norm": 0.00019049388356506824, |
|
"kl": 0.008877372741699219, |
|
"learning_rate": 3.75e-07, |
|
"loss": 0.0187, |
|
"reward": 0.45714287757873534, |
|
"reward_std": 0.3365379124879837, |
|
"rewards/accuracy_reward": 0.4285714507102966, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.02857142984867096, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 681.0714599609375, |
|
"epoch": 0.9, |
|
"grad_norm": 3.8056037425994873, |
|
"kl": 0.0019466400146484375, |
|
"learning_rate": 3.355050358314172e-07, |
|
"loss": 0.0472, |
|
"reward": 0.5178571701049804, |
|
"reward_std": 0.3290700912475586, |
|
"rewards/accuracy_reward": 0.4571428894996643, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.06071428954601288, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completion_length": 594.2248850097657, |
|
"eval_kl": 0.003310443878173828, |
|
"eval_loss": -0.02919997088611126, |
|
"eval_reward": 0.534714311145246, |
|
"eval_reward_std": 0.3895806306153536, |
|
"eval_rewards/accuracy_reward": 0.4940000242590904, |
|
"eval_rewards/format_reward": 0.006000000268220901, |
|
"eval_rewards/tag_count_reward": 0.03471428740769625, |
|
"eval_runtime": 2236.5694, |
|
"eval_samples_per_second": 0.224, |
|
"eval_steps_per_second": 0.016, |
|
"step": 49 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 594.1571655273438, |
|
"epoch": 1.0, |
|
"grad_norm": 0.0004400305333547294, |
|
"kl": 0.001186370849609375, |
|
"learning_rate": 2.934120444167326e-07, |
|
"loss": -0.0472, |
|
"reward": 0.4107142984867096, |
|
"reward_std": 0.2270268350839615, |
|
"rewards/accuracy_reward": 0.40000001192092893, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.01071428656578064, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 699.0428955078125, |
|
"epoch": 1.1, |
|
"grad_norm": 0.6008018851280212, |
|
"kl": 0.0012775421142578124, |
|
"learning_rate": 2.5e-07, |
|
"loss": -0.0518, |
|
"reward": 0.5821428835391999, |
|
"reward_std": 0.44157396256923676, |
|
"rewards/accuracy_reward": 0.5428571760654449, |
|
"rewards/format_reward": 0.01428571492433548, |
|
"rewards/tag_count_reward": 0.02500000111758709, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 676.3571655273438, |
|
"epoch": 1.2, |
|
"grad_norm": 0.7341341376304626, |
|
"kl": 0.0003894805908203125, |
|
"learning_rate": 2.065879555832674e-07, |
|
"loss": -0.0145, |
|
"reward": 0.6964286088943481, |
|
"reward_std": 0.32321630120277406, |
|
"rewards/accuracy_reward": 0.685714328289032, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.010714286193251609, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 484.5143188476562, |
|
"epoch": 1.3, |
|
"grad_norm": 2.903294086456299, |
|
"kl": 0.003038787841796875, |
|
"learning_rate": 1.6449496416858282e-07, |
|
"loss": -0.0505, |
|
"reward": 0.6428571611642837, |
|
"reward_std": 0.3603375285863876, |
|
"rewards/accuracy_reward": 0.5857143193483353, |
|
"rewards/format_reward": 0.02857142984867096, |
|
"rewards/tag_count_reward": 0.02857142984867096, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 664.8000244140625, |
|
"epoch": 1.4, |
|
"grad_norm": 10.34648609161377, |
|
"kl": 0.0013919830322265624, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"loss": 0.0629, |
|
"reward": 0.4928571581840515, |
|
"reward_std": 0.5296794831752777, |
|
"rewards/accuracy_reward": 0.45714287757873534, |
|
"rewards/format_reward": 0.01428571492433548, |
|
"rewards/tag_count_reward": 0.021428572386503218, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 597.857177734375, |
|
"epoch": 1.5, |
|
"grad_norm": 1.6099767684936523, |
|
"kl": 0.0036956787109375, |
|
"learning_rate": 8.930309757836516e-08, |
|
"loss": 0.0399, |
|
"reward": 0.4571428716182709, |
|
"reward_std": 0.3776773989200592, |
|
"rewards/accuracy_reward": 0.4571428716182709, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 570.3000366210938, |
|
"epoch": 1.6, |
|
"grad_norm": 0.7647049427032471, |
|
"kl": 0.0009387969970703125, |
|
"learning_rate": 5.848888922025552e-08, |
|
"loss": -0.0569, |
|
"reward": 0.6071428894996643, |
|
"reward_std": 0.3395166456699371, |
|
"rewards/accuracy_reward": 0.5714285969734192, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.035714288055896756, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 619.4714538574219, |
|
"epoch": 1.7, |
|
"grad_norm": 1.8258100748062134, |
|
"kl": 0.002353668212890625, |
|
"learning_rate": 3.349364905389032e-08, |
|
"loss": 0.0276, |
|
"reward": 0.44642859399318696, |
|
"reward_std": 0.27503437697887423, |
|
"rewards/accuracy_reward": 0.4428571656346321, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.00357142873108387, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 535.4000244140625, |
|
"epoch": 1.8, |
|
"grad_norm": 3.521362066268921, |
|
"kl": 0.0031871795654296875, |
|
"learning_rate": 1.507684480352292e-08, |
|
"loss": 0.0045, |
|
"reward": 0.5464286029338836, |
|
"reward_std": 0.48063153624534605, |
|
"rewards/accuracy_reward": 0.5000000238418579, |
|
"rewards/format_reward": 0.01428571492433548, |
|
"rewards/tag_count_reward": 0.03214285895228386, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 741.9714477539062, |
|
"epoch": 1.9, |
|
"grad_norm": 2.457162857055664, |
|
"kl": 0.0006832122802734375, |
|
"learning_rate": 3.798061746947995e-09, |
|
"loss": 0.0415, |
|
"reward": 0.5500000238418579, |
|
"reward_std": 0.369550421833992, |
|
"rewards/accuracy_reward": 0.5285714507102967, |
|
"rewards/format_reward": 0.0, |
|
"rewards/tag_count_reward": 0.02142857201397419, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completion_length": 588.6673402709961, |
|
"eval_kl": 0.004097911834716797, |
|
"eval_loss": -0.02776484563946724, |
|
"eval_reward": 0.5458571693897247, |
|
"eval_reward_std": 0.39641910094022753, |
|
"eval_rewards/accuracy_reward": 0.5062857375442982, |
|
"eval_rewards/format_reward": 0.006285714566707611, |
|
"eval_rewards/tag_count_reward": 0.03328571600466967, |
|
"eval_runtime": 2226.3251, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.016, |
|
"step": 98 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 761.4000244140625, |
|
"epoch": 2.0, |
|
"grad_norm": 0.7996540069580078, |
|
"kl": 0.0015918731689453125, |
|
"learning_rate": 0.0, |
|
"loss": -0.0241, |
|
"reward": 0.6500000298023224, |
|
"reward_std": 0.4324204444885254, |
|
"rewards/accuracy_reward": 0.5571428954601287, |
|
"rewards/format_reward": 0.042857144773006436, |
|
"rewards/tag_count_reward": 0.05000000223517418, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 100, |
|
"total_flos": 0.0, |
|
"train_loss": -0.0030980921536684037, |
|
"train_runtime": 7058.3489, |
|
"train_samples_per_second": 0.028, |
|
"train_steps_per_second": 0.014 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|