diff --git "a/checkpoint-10000/trainer_state.json" "b/checkpoint-10000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-10000/trainer_state.json" @@ -0,0 +1,88488 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7944073721004131, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 7.936507936507936e-06, + "loss": 1.0932, + "theoretical_loss": 14.920781838632275, + "tokens_seen": 262144 + }, + { + "epoch": 0.0, + "learning_rate": 1.5873015873015872e-05, + "loss": 1.0937, + "theoretical_loss": 12.718594708127029, + "tokens_seen": 524288 + }, + { + "epoch": 0.0, + "learning_rate": 2.380952380952381e-05, + "loss": 1.0519, + "theoretical_loss": 11.615184291350435, + "tokens_seen": 786432 + }, + { + "epoch": 0.0, + "learning_rate": 3.1746031746031745e-05, + "loss": 1.0058, + "theoretical_loss": 10.904893169100655, + "tokens_seen": 1048576 + }, + { + "epoch": 0.0, + "learning_rate": 3.968253968253968e-05, + "loss": 0.976, + "theoretical_loss": 10.392029026407034, + "tokens_seen": 1310720 + }, + { + "epoch": 0.0, + "learning_rate": 4.761904761904762e-05, + "loss": 0.9521, + "theoretical_loss": 9.996134261483984, + "tokens_seen": 1572864 + }, + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 0.9364, + "theoretical_loss": 9.67682184172525, + "tokens_seen": 1835008 + }, + { + "epoch": 0.0, + "learning_rate": 6.349206349206349e-05, + "loss": 0.9245, + "theoretical_loss": 9.41114487355416, + "tokens_seen": 2097152 + }, + { + "epoch": 0.0, + "learning_rate": 7.142857142857142e-05, + "loss": 0.9103, + "theoretical_loss": 9.184905895151996, + "tokens_seen": 2359296 + }, + { + "epoch": 0.0, + "learning_rate": 7.936507936507937e-05, + "loss": 0.8933, + "theoretical_loss": 8.988754572553061, + "tokens_seen": 2621440 + }, + { + "epoch": 0.0, + "learning_rate": 8.73015873015873e-05, + "loss": 0.8721, + "theoretical_loss": 8.816230875422118, + "tokens_seen": 2883584 + }, + { + "epoch": 0.0, + "learning_rate": 9.523809523809524e-05, + "loss": 0.8552, + "theoretical_loss": 8.66269920037918, + "tokens_seen": 3145728 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.43383753299713135, + "objective/train/docs_used": 8371, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 8.528481483459473, + "objective/train/original_loss": 8.528482437133789, + "objective/train/theoretical_loss": 8.591947747254773, + "objective/train/tokens_used": 23736800, + "objective/train/value_avg": -0.4384765625, + "objective/train/value_loss": 0.1883670836687088, + "objective/train/value_max": -0.4375, + "objective/train/value_min": -0.441650390625, + "objective/train/value_reward_corr": -0.01855261992160691, + "objective/train/value_std": 0.0006289482116699219, + "objective/train/weight_avg": 1.5432828664779663, + "objective/train/weighted_lm_loss": 13.15995979309082, + "objective/train/weights_max": 1.555271863937378, + "objective/train/weights_min": 1.1650478839874268, + "theoretical_loss": 8.591947747254773, + "tokens_seen": 3276800 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010317460317460317, + "loss": 0.8283, + "theoretical_loss": 8.524729102289708, + "tokens_seen": 3407872 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001111111111111111, + "loss": 0.8015, + "theoretical_loss": 8.399716359763914, + "tokens_seen": 3670016 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011904761904761905, + "loss": 0.7709, + "theoretical_loss": 8.285641004895568, + "tokens_seen": 3932160 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012698412698412698, + "loss": 0.7402, + "theoretical_loss": 8.180907195283321, + "tokens_seen": 4194304 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001349206349206349, + "loss": 0.7102, + "theoretical_loss": 8.084233979345122, + "tokens_seen": 4456448 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014285714285714284, + "loss": 0.6751, + "theoretical_loss": 7.9945788049155055, + "tokens_seen": 4718592 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001507936507936508, + "loss": 0.6503, + "theoretical_loss": 7.911082722632908, + "tokens_seen": 4980736 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015873015873015873, + "loss": 0.6183, + "theoretical_loss": 7.83303033759787, + "tokens_seen": 5242880 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016666666666666666, + "loss": 0.5919, + "theoretical_loss": 7.759820016443023, + "tokens_seen": 5505024 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001746031746031746, + "loss": 0.5614, + "theoretical_loss": 7.690941370375033, + "tokens_seen": 5767168 + }, + { + "epoch": 0.0, + "learning_rate": 0.00018253968253968252, + "loss": 0.5314, + "theoretical_loss": 7.6259579939239845, + "tokens_seen": 6029312 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019047619047619048, + "loss": 0.5048, + "theoretical_loss": 7.564494061943624, + "tokens_seen": 6291456 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.19523106515407562, + "objective/train/docs_used": 9704, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 7.315228462219238, + "objective/train/original_loss": 7.315227508544922, + "objective/train/theoretical_loss": 7.5062238006917354, + "objective/train/tokens_used": 27013600, + "objective/train/value_avg": -0.2044677734375, + "objective/train/value_loss": 0.03861678019165993, + "objective/train/value_max": -0.1988525390625, + "objective/train/value_min": -0.251220703125, + "objective/train/value_reward_corr": -0.023237293515685216, + "objective/train/value_std": 0.01251983642578125, + "objective/train/weight_avg": 1.2158902883529663, + "objective/train/weighted_lm_loss": 8.87794303894043, + "objective/train/weights_max": 1.285593867301941, + "objective/train/weights_min": 0.46789029240608215, + "theoretical_loss": 7.5062238006917354, + "tokens_seen": 6553600 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001984126984126984, + "loss": 0.4798, + "theoretical_loss": 7.5062238006917354, + "tokens_seen": 6553600 + }, + { + "epoch": 0.0, + "learning_rate": 0.00020634920634920634, + "loss": 0.4564, + "theoretical_loss": 7.45086312850561, + "tokens_seen": 6815744 + }, + { + "epoch": 0.0, + "learning_rate": 0.00021428571428571427, + "loss": 0.4397, + "theoretical_loss": 7.398162954262078, + "tokens_seen": 7077888 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002222222222222222, + "loss": 0.4264, + "theoretical_loss": 7.347903756717382, + "tokens_seen": 7340032 + }, + { + "epoch": 0.0, + "learning_rate": 0.00023015873015873016, + "loss": 0.401, + "theoretical_loss": 7.299891163694537, + "tokens_seen": 7602176 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002380952380952381, + "loss": 0.3912, + "theoretical_loss": 7.253952319156202, + "tokens_seen": 7864320 + }, + { + "epoch": 0.0, + "learning_rate": 0.000246031746031746, + "loss": 0.3754, + "theoretical_loss": 7.2099328765932205, + "tokens_seen": 8126464 + }, + { + "epoch": 0.0, + "learning_rate": 0.00025396825396825396, + "loss": 0.366, + "theoretical_loss": 7.167694494355343, + "tokens_seen": 8388608 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002619047619047619, + "loss": 0.3575, + "theoretical_loss": 7.127112736305475, + "tokens_seen": 8650752 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002698412698412698, + "loss": 0.3477, + "theoretical_loss": 7.0880753020982725, + "tokens_seen": 8912896 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002777777777777778, + "loss": 0.3462, + "theoretical_loss": 7.050480527300383, + "tokens_seen": 9175040 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002857142857142857, + "loss": 0.3397, + "theoretical_loss": 7.014236105786485, + "tokens_seen": 9437184 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002936507936507937, + "loss": 0.3387, + "theoretical_loss": 6.979257996300014, + "tokens_seen": 9699328 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.035979945212602615, + "objective/train/docs_used": 11015, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.488903999328613, + "objective/train/original_loss": 6.488903522491455, + "objective/train/theoretical_loss": 6.962219571538605, + "objective/train/tokens_used": 30290400, + "objective/train/value_avg": -0.044891357421875, + "objective/train/value_loss": 0.002101501217111945, + "objective/train/value_max": -0.038330078125, + "objective/train/value_min": -0.092529296875, + "objective/train/value_reward_corr": -0.002001422374817204, + "objective/train/value_std": 0.0086822509765625, + "objective/train/weight_avg": 1.0370042324066162, + "objective/train/weighted_lm_loss": 6.7182722091674805, + "objective/train/weights_max": 1.0969452857971191, + "objective/train/weights_min": 0.3830130398273468, + "theoretical_loss": 6.962219571538605, + "tokens_seen": 9830400 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003015873015873016, + "loss": 0.3357, + "theoretical_loss": 6.945469482441503, + "tokens_seen": 9961472 + }, + { + "epoch": 0.0, + "learning_rate": 0.00030952380952380956, + "loss": 0.332, + "theoretical_loss": 6.912800361140576, + "tokens_seen": 10223616 + }, + { + "epoch": 0.0, + "learning_rate": 0.00031746031746031746, + "loss": 0.3301, + "theoretical_loss": 6.881186239250335, + "tokens_seen": 10485760 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003253968253968254, + "loss": 0.3283, + "theoretical_loss": 6.8505679215514235, + "tokens_seen": 10747904 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003333333333333333, + "loss": 0.3248, + "theoretical_loss": 6.8208908763759295, + "tokens_seen": 11010048 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003412698412698413, + "loss": 0.3232, + "theoretical_loss": 6.79210476741633, + "tokens_seen": 11272192 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003492063492063492, + "loss": 0.3221, + "theoretical_loss": 6.76416304219278, + "tokens_seen": 11534336 + }, + { + "epoch": 0.0, + "learning_rate": 0.00035714285714285714, + "loss": 0.3173, + "theoretical_loss": 6.737022569206117, + "tokens_seen": 11796480 + }, + { + "epoch": 0.0, + "learning_rate": 0.00036507936507936505, + "loss": 0.3175, + "theoretical_loss": 6.710643317075979, + "tokens_seen": 12058624 + }, + { + "epoch": 0.0, + "learning_rate": 0.000373015873015873, + "loss": 0.3144, + "theoretical_loss": 6.684988070009584, + "tokens_seen": 12320768 + }, + { + "epoch": 0.0, + "learning_rate": 0.00038095238095238096, + "loss": 0.3176, + "theoretical_loss": 6.660022174811009, + "tokens_seen": 12582912 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003888888888888889, + "loss": 0.3123, + "theoretical_loss": 6.6357133153579175, + "tokens_seen": 12845056 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.02608620934188366, + "objective/train/docs_used": 12170, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 6.028903007507324, + "objective/train/original_loss": 6.028903961181641, + "objective/train/theoretical_loss": 6.612031311070119, + "objective/train/tokens_used": 33567200, + "objective/train/value_avg": -0.03485107421875, + "objective/train/value_loss": 0.0012108575319871306, + "objective/train/value_max": -0.0136871337890625, + "objective/train/value_min": -0.05511474609375, + "objective/train/value_reward_corr": -0.03430143501280518, + "objective/train/value_std": 0.00576019287109375, + "objective/train/weight_avg": 1.0266841650009155, + "objective/train/weighted_lm_loss": 6.190183162689209, + "objective/train/weights_max": 1.05666184425354, + "objective/train/weights_min": 0.3770461976528168, + "theoretical_loss": 6.612031311070119, + "tokens_seen": 13107200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003968253968253968, + "loss": 0.3097, + "theoretical_loss": 6.612031311070119, + "tokens_seen": 13107200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004047619047619048, + "loss": 0.3104, + "theoretical_loss": 6.588947936394168, + "tokens_seen": 13369344 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004126984126984127, + "loss": 0.3022, + "theoretical_loss": 6.566436758747731, + "tokens_seen": 13631488 + }, + { + "epoch": 0.0, + "learning_rate": 0.00042063492063492065, + "loss": 0.3027, + "theoretical_loss": 6.544472992721121, + "tokens_seen": 13893632 + }, + { + "epoch": 0.0, + "learning_rate": 0.00042857142857142855, + "loss": 0.3044, + "theoretical_loss": 6.523033368632323, + "tokens_seen": 14155776 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004365079365079365, + "loss": 0.3036, + "theoretical_loss": 6.502096013785574, + "tokens_seen": 14417920 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004444444444444444, + "loss": 0.3003, + "theoretical_loss": 6.481640344999435, + "tokens_seen": 14680064 + }, + { + "epoch": 0.0, + "learning_rate": 0.00045238095238095237, + "loss": 0.3021, + "theoretical_loss": 6.461646971154669, + "tokens_seen": 14942208 + }, + { + "epoch": 0.0, + "learning_rate": 0.00046031746031746033, + "loss": 0.3006, + "theoretical_loss": 6.442097604670096, + "tokens_seen": 15204352 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004682539682539683, + "loss": 0.2922, + "theoretical_loss": 6.422974980950157, + "tokens_seen": 15466496 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004761904761904762, + "loss": 0.2958, + "theoretical_loss": 6.404262784964672, + "tokens_seen": 15728640 + }, + { + "epoch": 0.0, + "learning_rate": 0.00048412698412698415, + "loss": 0.2952, + "theoretical_loss": 6.3859455842220765, + "tokens_seen": 15990784 + }, + { + "epoch": 0.0, + "learning_rate": 0.000492063492063492, + "loss": 0.2945, + "theoretical_loss": 6.368008767484675, + "tokens_seen": 16252928 + }, + { + "epoch": 0.0, + "objective/train/advantage_avg": 0.028485985472798347, + "objective/train/docs_used": 13382, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.559032440185547, + "objective/train/original_loss": 5.559032917022705, + "objective/train/theoretical_loss": 6.359178647759789, + "objective/train/tokens_used": 36844000, + "objective/train/value_avg": -0.03570556640625, + "objective/train/value_loss": 0.001410536002367735, + "objective/train/value_max": -0.01111602783203125, + "objective/train/value_min": -0.0867919921875, + "objective/train/value_reward_corr": 0.08617836470205797, + "objective/train/value_std": 0.01192474365234375, + "objective/train/weight_avg": 1.0291692018508911, + "objective/train/weighted_lm_loss": 5.7365899085998535, + "objective/train/weights_max": 1.0881431102752686, + "objective/train/weights_min": 0.378071665763855, + "theoretical_loss": 6.359178647759789, + "tokens_seen": 16384000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005, + "loss": 0.293, + "theoretical_loss": 6.350438488650175, + "tokens_seen": 16515072 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005079365079365079, + "loss": 0.2876, + "theoretical_loss": 6.333221615289645, + "tokens_seen": 16777216 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005158730158730159, + "loss": 0.2909, + "theoretical_loss": 6.316345681389436, + "tokens_seen": 17039360 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005238095238095238, + "loss": 0.2851, + "theoretical_loss": 6.2997988438948465, + "tokens_seen": 17301504 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005317460317460317, + "loss": 0.2807, + "theoretical_loss": 6.283569842697203, + "tokens_seen": 17563648 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005396825396825396, + "loss": 0.2855, + "theoretical_loss": 6.26764796374462, + "tokens_seen": 17825792 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005476190476190477, + "loss": 0.2822, + "theoretical_loss": 6.25202300499066, + "tokens_seen": 18087936 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005555555555555556, + "loss": 0.2818, + "theoretical_loss": 6.236685244924882, + "tokens_seen": 18350080 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005634920634920635, + "loss": 0.2847, + "theoretical_loss": 6.2216254134558024, + "tokens_seen": 18612224 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005714285714285714, + "loss": 0.2838, + "theoretical_loss": 6.206834664939976, + "tokens_seen": 18874368 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005793650793650794, + "loss": 0.2875, + "theoretical_loss": 6.192304553171669, + "tokens_seen": 19136512 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005873015873015874, + "loss": 0.2852, + "theoretical_loss": 6.178027008165916, + "tokens_seen": 19398656 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.01716114766895771, + "objective/train/docs_used": 14656, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.421322822570801, + "objective/train/original_loss": 5.421322822570801, + "objective/train/theoretical_loss": 6.163994314584031, + "objective/train/tokens_used": 40120800, + "objective/train/value_avg": -0.0253753662109375, + "objective/train/value_loss": 0.0010442916536703706, + "objective/train/value_max": -0.0090179443359375, + "objective/train/value_min": -0.04986572265625, + "objective/train/value_reward_corr": -0.02662585421402189, + "objective/train/value_std": 0.0084381103515625, + "objective/train/weight_avg": 1.0176551342010498, + "objective/train/weighted_lm_loss": 5.523168087005615, + "objective/train/weights_max": 1.051129937171936, + "objective/train/weights_min": 0.37920987606048584, + "theoretical_loss": 6.163994314584031, + "tokens_seen": 19660800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005952380952380953, + "loss": 0.283, + "theoretical_loss": 6.163994314584031, + "tokens_seen": 19660800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006031746031746032, + "loss": 0.2769, + "theoretical_loss": 6.150199091665225, + "tokens_seen": 19922944 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006111111111111112, + "loss": 0.2773, + "theoretical_loss": 6.136634274540901, + "tokens_seen": 20185088 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006190476190476191, + "loss": 0.2813, + "theoretical_loss": 6.123293096819758, + "tokens_seen": 20447232 + }, + { + "epoch": 0.01, + "learning_rate": 0.000626984126984127, + "loss": 0.2788, + "theoretical_loss": 6.1101690743422505, + "tokens_seen": 20709376 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006349206349206349, + "loss": 0.2779, + "theoretical_loss": 6.097255990012153, + "tokens_seen": 20971520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006428571428571429, + "loss": 0.2727, + "theoretical_loss": 6.084547879621354, + "tokens_seen": 21233664 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006507936507936508, + "loss": 0.2732, + "theoretical_loss": 6.072039018591484, + "tokens_seen": 21495808 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006587301587301587, + "loss": 0.2743, + "theoretical_loss": 6.059723909562683, + "tokens_seen": 21757952 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006666666666666666, + "loss": 0.2703, + "theoretical_loss": 6.047597270765904, + "tokens_seen": 22020096 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006746031746031747, + "loss": 0.2718, + "theoretical_loss": 6.035654025120612, + "tokens_seen": 22282240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006825396825396826, + "loss": 0.269, + "theoretical_loss": 6.023889290004692, + "tokens_seen": 22544384 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006904761904761905, + "loss": 0.269, + "theoretical_loss": 6.012298367647816, + "tokens_seen": 22806528 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.018673593178391457, + "objective/train/docs_used": 15907, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.2787933349609375, + "objective/train/original_loss": 5.2787933349609375, + "objective/train/theoretical_loss": 6.006566666513313, + "objective/train/tokens_used": 43397600, + "objective/train/value_avg": -0.0252227783203125, + "objective/train/value_loss": 0.0007268089102581143, + "objective/train/value_max": -0.00916290283203125, + "objective/train/value_min": -0.059967041015625, + "objective/train/value_reward_corr": -0.009804935861391459, + "objective/train/value_std": 0.0091552734375, + "objective/train/weight_avg": 1.0190335512161255, + "objective/train/weighted_lm_loss": 5.3884196281433105, + "objective/train/weights_max": 1.0590507984161377, + "objective/train/weights_min": 0.37753555178642273, + "theoretical_loss": 6.006566666513313, + "tokens_seen": 22937600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006984126984126984, + "loss": 0.266, + "theoretical_loss": 6.000876736103618, + "tokens_seen": 23068672 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007063492063492064, + "loss": 0.2679, + "theoretical_loss": 5.989620040759641, + "tokens_seen": 23330816 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007142857142857143, + "loss": 0.2638, + "theoretical_loss": 5.978524086347409, + "tokens_seen": 23592960 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007222222222222222, + "loss": 0.2676, + "theoretical_loss": 5.967584829417934, + "tokens_seen": 23855104 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007301587301587301, + "loss": 0.2632, + "theoretical_loss": 5.956798371250791, + "tokens_seen": 24117248 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007380952380952381, + "loss": 0.2593, + "theoretical_loss": 5.9461609511673625, + "tokens_seen": 24379392 + }, + { + "epoch": 0.01, + "learning_rate": 0.000746031746031746, + "loss": 0.2628, + "theoretical_loss": 5.935668940221127, + "tokens_seen": 24641536 + }, + { + "epoch": 0.01, + "learning_rate": 0.000753968253968254, + "loss": 0.261, + "theoretical_loss": 5.92531883523999, + "tokens_seen": 24903680 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007619047619047619, + "loss": 0.2601, + "theoretical_loss": 5.915107253197538, + "tokens_seen": 25165824 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007698412698412699, + "loss": 0.2627, + "theoretical_loss": 5.905030925891829, + "tokens_seen": 25427968 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007777777777777778, + "loss": 0.2632, + "theoretical_loss": 5.895086694911951, + "tokens_seen": 25690112 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007857142857142857, + "loss": 0.261, + "theoretical_loss": 5.88527150687402, + "tokens_seen": 25952256 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.014981591142714024, + "objective/train/docs_used": 17033, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.067455291748047, + "objective/train/original_loss": 5.067455291748047, + "objective/train/theoretical_loss": 5.8755824089096285, + "objective/train/tokens_used": 46674400, + "objective/train/value_avg": -0.0223236083984375, + "objective/train/value_loss": 0.001089372206479311, + "objective/train/value_max": -0.007785797119140625, + "objective/train/value_min": -0.08538818359375, + "objective/train/value_reward_corr": 0.11159853979639632, + "objective/train/value_std": 0.008026123046875, + "objective/train/weight_avg": 1.0154664516448975, + "objective/train/weighted_lm_loss": 5.153756618499756, + "objective/train/weights_max": 1.0678131580352783, + "objective/train/weights_min": 0.37597760558128357, + "theoretical_loss": 5.8755824089096285, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007936507936507937, + "loss": 0.2609, + "theoretical_loss": 5.8755824089096285, + "tokens_seen": 26214400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008015873015873017, + "loss": 0.2559, + "theoretical_loss": 5.866016544391016, + "tokens_seen": 26476544 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008095238095238096, + "loss": 0.2535, + "theoretical_loss": 5.856571148878293, + "tokens_seen": 26738688 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008174603174603175, + "loss": 0.2534, + "theoretical_loss": 5.847243546275179, + "tokens_seen": 27000832 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008253968253968254, + "loss": 0.2527, + "theoretical_loss": 5.838031145180573, + "tokens_seen": 27262976 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008333333333333334, + "loss": 0.2561, + "theoretical_loss": 5.82893143542425, + "tokens_seen": 27525120 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008412698412698413, + "loss": 0.2535, + "theoretical_loss": 5.81994198477569, + "tokens_seen": 27787264 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008492063492063492, + "loss": 0.2519, + "theoretical_loss": 5.811060435815881, + "tokens_seen": 28049408 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008571428571428571, + "loss": 0.2538, + "theoretical_loss": 5.802284502962563, + "tokens_seen": 28311552 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008650793650793651, + "loss": 0.2515, + "theoretical_loss": 5.793611969640068, + "tokens_seen": 28573696 + }, + { + "epoch": 0.01, + "learning_rate": 0.000873015873015873, + "loss": 0.2494, + "theoretical_loss": 5.785040685585437, + "tokens_seen": 28835840 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008809523809523809, + "loss": 0.2496, + "theoretical_loss": 5.7765685642831155, + "tokens_seen": 29097984 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008888888888888888, + "loss": 0.2526, + "theoretical_loss": 5.768193580520972, + "tokens_seen": 29360128 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.015116632916033268, + "objective/train/docs_used": 18174, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 5.077436923980713, + "objective/train/original_loss": 5.0774359703063965, + "objective/train/theoretical_loss": 5.7640418985258295, + "objective/train/tokens_used": 49951200, + "objective/train/value_avg": -0.0243377685546875, + "objective/train/value_loss": 0.0008194705005735159, + "objective/train/value_max": -0.006877899169921875, + "objective/train/value_min": -0.139404296875, + "objective/train/value_reward_corr": 0.07501882751184434, + "objective/train/value_std": 0.01377105712890625, + "objective/train/weight_avg": 1.0155125856399536, + "objective/train/weighted_lm_loss": 5.159419536590576, + "objective/train/weights_max": 1.1385329961776733, + "objective/train/weights_min": 0.3745060861110687, + "theoretical_loss": 5.7640418985258295, + "tokens_seen": 29491200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008968253968253968, + "loss": 0.249, + "theoretical_loss": 5.759913768060882, + "tokens_seen": 29622272 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009047619047619047, + "loss": 0.2492, + "theoretical_loss": 5.7517272174175496, + "tokens_seen": 29884416 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009126984126984126, + "loss": 0.2506, + "theoretical_loss": 5.743632073739626, + "tokens_seen": 30146560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009206349206349207, + "loss": 0.2489, + "theoretical_loss": 5.735626534787584, + "tokens_seen": 30408704 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009285714285714287, + "loss": 0.2431, + "theoretical_loss": 5.727708849003127, + "tokens_seen": 30670848 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009365079365079366, + "loss": 0.2469, + "theoretical_loss": 5.719877313665254, + "tokens_seen": 30932992 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009444444444444445, + "loss": 0.2464, + "theoretical_loss": 5.712130273128388, + "tokens_seen": 31195136 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009523809523809524, + "loss": 0.2492, + "theoretical_loss": 5.704466117138258, + "tokens_seen": 31457280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009603174603174604, + "loss": 0.2453, + "theoretical_loss": 5.696883279221504, + "tokens_seen": 31719424 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009682539682539683, + "loss": 0.243, + "theoretical_loss": 5.689380235145171, + "tokens_seen": 31981568 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009761904761904762, + "loss": 0.2464, + "theoretical_loss": 5.6819555014425305, + "tokens_seen": 32243712 + }, + { + "epoch": 0.01, + "learning_rate": 0.000984126984126984, + "loss": 0.2427, + "theoretical_loss": 5.674607634001871, + "tokens_seen": 32505856 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.014621232636272907, + "objective/train/docs_used": 19239, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.8382439613342285, + "objective/train/original_loss": 4.838243007659912, + "objective/train/theoretical_loss": 5.667335226715059, + "objective/train/tokens_used": 53228000, + "objective/train/value_avg": -0.02044677734375, + "objective/train/value_loss": 0.0005950998747721314, + "objective/train/value_max": -0.006191253662109375, + "objective/train/value_min": -0.11181640625, + "objective/train/value_reward_corr": 0.10875102217088843, + "objective/train/value_std": 0.00927734375, + "objective/train/weight_avg": 1.0149059295654297, + "objective/train/weighted_lm_loss": 4.914798736572266, + "objective/train/weights_max": 1.1008343696594238, + "objective/train/weights_min": 0.37406063079833984, + "theoretical_loss": 5.667335226715059, + "tokens_seen": 32768000 + }, + { + "epoch": 0.01, + "learning_rate": 0.000992063492063492, + "loss": 0.249, + "theoretical_loss": 5.667335226715059, + "tokens_seen": 32768000 + }, + { + "epoch": 0.01, + "learning_rate": 0.001, + "loss": 0.2444, + "theoretical_loss": 5.6601369101828904, + "tokens_seen": 33030144 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009999197560584176, + "loss": 0.244, + "theoretical_loss": 5.6530113504744435, + "tokens_seen": 33292288 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009998395121168352, + "loss": 0.2429, + "theoretical_loss": 5.645957247937725, + "tokens_seen": 33554432 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009997592681752529, + "loss": 0.2414, + "theoretical_loss": 5.638973336059157, + "tokens_seen": 33816576 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009996790242336705, + "loss": 0.2359, + "theoretical_loss": 5.632058380369512, + "tokens_seen": 34078720 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009995987802920879, + "loss": 0.2366, + "theoretical_loss": 5.625211177394046, + "tokens_seen": 34340864 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009995185363505057, + "loss": 0.2361, + "theoretical_loss": 5.618430553644782, + "tokens_seen": 34603008 + }, + { + "epoch": 0.01, + "learning_rate": 0.000999438292408923, + "loss": 0.2379, + "theoretical_loss": 5.611715364652864, + "tokens_seen": 34865152 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009993580484673407, + "loss": 0.2324, + "theoretical_loss": 5.605064494039176, + "tokens_seen": 35127296 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009992778045257583, + "loss": 0.2315, + "theoretical_loss": 5.598476852621397, + "tokens_seen": 35389440 + }, + { + "epoch": 0.01, + "learning_rate": 0.000999197560584176, + "loss": 0.2364, + "theoretical_loss": 5.591951377555809, + "tokens_seen": 35651584 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009991173166425935, + "loss": 0.2376, + "theoretical_loss": 5.585487031512276, + "tokens_seen": 35913728 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.008301016874611378, + "objective/train/docs_used": 20470, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.471249103546143, + "objective/train/original_loss": 4.471248626708984, + "objective/train/theoretical_loss": 5.582277464488612, + "objective/train/tokens_used": 56504800, + "objective/train/value_avg": -0.017913818359375, + "objective/train/value_loss": 0.00041489681461825967, + "objective/train/value_max": -0.00595855712890625, + "objective/train/value_min": -0.11065673828125, + "objective/train/value_reward_corr": 0.3373516244538544, + "objective/train/value_std": 0.0095672607421875, + "objective/train/weight_avg": 1.0085070133209229, + "objective/train/weighted_lm_loss": 4.517763614654541, + "objective/train/weights_max": 1.0755970478057861, + "objective/train/weights_min": 0.7941007018089294, + "theoretical_loss": 5.582277464488612, + "tokens_seen": 36044800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009990370727010112, + "loss": 0.2363, + "theoretical_loss": 5.579082801880871, + "tokens_seen": 36175872 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009989568287594288, + "loss": 0.234, + "theoretical_loss": 5.572737700008718, + "tokens_seen": 36438016 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009988765848178462, + "loss": 0.2357, + "theoretical_loss": 5.56645076046569, + "tokens_seen": 36700160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009987963408762638, + "loss": 0.2324, + "theoretical_loss": 5.5602210403376775, + "tokens_seen": 36962304 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009987160969346814, + "loss": 0.2362, + "theoretical_loss": 5.554047618546193, + "tokens_seen": 37224448 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998635852993099, + "loss": 0.2376, + "theoretical_loss": 5.547929595193182, + "tokens_seen": 37486592 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009985556090515166, + "loss": 0.2321, + "theoretical_loss": 5.5418660909298945, + "tokens_seen": 37748736 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009984753651099342, + "loss": 0.2291, + "theoretical_loss": 5.535856246348814, + "tokens_seen": 38010880 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009983951211683518, + "loss": 0.2304, + "theoretical_loss": 5.529899221397624, + "tokens_seen": 38273024 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009983148772267695, + "loss": 0.2279, + "theoretical_loss": 5.523994194814273, + "tokens_seen": 38535168 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998234633285187, + "loss": 0.229, + "theoretical_loss": 5.518140363582252, + "tokens_seen": 38797312 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009981543893436047, + "loss": 0.2318, + "theoretical_loss": 5.512336942405216, + "tokens_seen": 39059456 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.0074028936214745045, + "objective/train/docs_used": 21760, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.182587146759033, + "objective/train/original_loss": 4.182587623596191, + "objective/train/theoretical_loss": 5.506583163200142, + "objective/train/tokens_used": 59781600, + "objective/train/value_avg": -0.0196990966796875, + "objective/train/value_loss": 0.0009806023444980383, + "objective/train/value_max": -0.00634002685546875, + "objective/train/value_min": -0.1546630859375, + "objective/train/value_reward_corr": 0.24312695151689936, + "objective/train/value_std": 0.0124359130859375, + "objective/train/weight_avg": 1.0078480243682861, + "objective/train/weighted_lm_loss": 4.2220258712768555, + "objective/train/weights_max": 1.1438496112823486, + "objective/train/weights_min": 0.3742261826992035, + "theoretical_loss": 5.506583163200142, + "tokens_seen": 39321600 + }, + { + "epoch": 0.01, + "learning_rate": 0.000998074145402022, + "loss": 0.2256, + "theoretical_loss": 5.506583163200142, + "tokens_seen": 39321600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009979939014604397, + "loss": 0.2304, + "theoretical_loss": 5.5008782746082625, + "tokens_seen": 39583744 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009979136575188573, + "loss": 0.2272, + "theoretical_loss": 5.495221541523011, + "tokens_seen": 39845888 + }, + { + "epoch": 0.01, + "learning_rate": 0.000997833413577275, + "loss": 0.2288, + "theoretical_loss": 5.489612244634316, + "tokens_seen": 40108032 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009977531696356925, + "loss": 0.2282, + "theoretical_loss": 5.48404967998854, + "tokens_seen": 40370176 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009976729256941101, + "loss": 0.2278, + "theoretical_loss": 5.478533158563456, + "tokens_seen": 40632320 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009975926817525277, + "loss": 0.2312, + "theoretical_loss": 5.473062005857637, + "tokens_seen": 40894464 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009975124378109451, + "loss": 0.2301, + "theoretical_loss": 5.467635561493681, + "tokens_seen": 41156608 + }, + { + "epoch": 0.01, + "learning_rate": 0.000997432193869363, + "loss": 0.2279, + "theoretical_loss": 5.462253178834744, + "tokens_seen": 41418752 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009973519499277804, + "loss": 0.2244, + "theoretical_loss": 5.456914224613812, + "tokens_seen": 41680896 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009972717059861982, + "loss": 0.2276, + "theoretical_loss": 5.451618078575256, + "tokens_seen": 41943040 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009971914620446156, + "loss": 0.2234, + "theoretical_loss": 5.446364133128155, + "tokens_seen": 42205184 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009971112181030332, + "loss": 0.2237, + "theoretical_loss": 5.44115179301095, + "tokens_seen": 42467328 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.012641006149351597, + "objective/train/docs_used": 22938, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.515160083770752, + "objective/train/original_loss": 4.51516056060791, + "objective/train/theoretical_loss": 5.438561042223066, + "objective/train/tokens_used": 63058400, + "objective/train/value_avg": -0.01983642578125, + "objective/train/value_loss": 0.0004896495374850929, + "objective/train/value_max": -0.00661468505859375, + "objective/train/value_min": -0.0869140625, + "objective/train/value_reward_corr": 0.07471170255551907, + "objective/train/value_std": 0.0099029541015625, + "objective/train/weight_avg": 1.012885570526123, + "objective/train/weighted_lm_loss": 4.5774455070495605, + "objective/train/weights_max": 1.0841727256774902, + "objective/train/weights_min": 0.7922973036766052, + "theoretical_loss": 5.438561042223066, + "tokens_seen": 42598400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009970309741614508, + "loss": 0.2268, + "theoretical_loss": 5.435980474966981, + "tokens_seen": 42729472 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009969507302198684, + "loss": 0.2231, + "theoretical_loss": 5.430849607430501, + "tokens_seen": 42991616 + }, + { + "epoch": 0.01, + "learning_rate": 0.000996870486278286, + "loss": 0.2219, + "theoretical_loss": 5.425758630222747, + "tokens_seen": 43253760 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009967902423367037, + "loss": 0.2263, + "theoretical_loss": 5.42070699425771, + "tokens_seen": 43515904 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009967099983951213, + "loss": 0.2268, + "theoretical_loss": 5.415694161257225, + "tokens_seen": 43778048 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009966297544535387, + "loss": 0.2216, + "theoretical_loss": 5.410719603475034, + "tokens_seen": 44040192 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009965495105119565, + "loss": 0.224, + "theoretical_loss": 5.405782803429483, + "tokens_seen": 44302336 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009964692665703739, + "loss": 0.2221, + "theoretical_loss": 5.400883253644551, + "tokens_seen": 44564480 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009963890226287915, + "loss": 0.226, + "theoretical_loss": 5.396020456398885, + "tokens_seen": 44826624 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009963087786872091, + "loss": 0.2213, + "theoretical_loss": 5.391193923482547, + "tokens_seen": 45088768 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009962285347456267, + "loss": 0.2216, + "theoretical_loss": 5.386403175961223, + "tokens_seen": 45350912 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009961482908040443, + "loss": 0.2215, + "theoretical_loss": 5.381647743947578, + "tokens_seen": 45613056 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.011346699669957161, + "objective/train/docs_used": 24173, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.3351922035217285, + "objective/train/original_loss": 4.335192680358887, + "objective/train/theoretical_loss": 5.37692716637954, + "objective/train/tokens_used": 66335200, + "objective/train/value_avg": -0.02081298828125, + "objective/train/value_loss": 0.0009375278605148196, + "objective/train/value_max": -0.006389617919921875, + "objective/train/value_min": -0.09466552734375, + "objective/train/value_reward_corr": 0.22103111537465234, + "objective/train/value_std": 0.01107025146484375, + "objective/train/weight_avg": 1.0117790699005127, + "objective/train/weighted_lm_loss": 4.3885650634765625, + "objective/train/weights_max": 1.0966105461120605, + "objective/train/weights_min": 0.37649428844451904, + "theoretical_loss": 5.37692716637954, + "tokens_seen": 45875200 + }, + { + "epoch": 0.01, + "learning_rate": 0.000996068046862462, + "loss": 0.2257, + "theoretical_loss": 5.37692716637954, + "tokens_seen": 45875200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009959878029208796, + "loss": 0.2205, + "theoretical_loss": 5.372240990805237, + "tokens_seen": 46137344 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009959075589792972, + "loss": 0.2197, + "theoretical_loss": 5.367588773174377, + "tokens_seen": 46399488 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009958273150377146, + "loss": 0.225, + "theoretical_loss": 5.36297007763582, + "tokens_seen": 46661632 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009957470710961322, + "loss": 0.2232, + "theoretical_loss": 5.358384476341126, + "tokens_seen": 46923776 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009956668271545498, + "loss": 0.2187, + "theoretical_loss": 5.353831549253895, + "tokens_seen": 47185920 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009955865832129674, + "loss": 0.2213, + "theoretical_loss": 5.349310883964664, + "tokens_seen": 47448064 + }, + { + "epoch": 0.01, + "learning_rate": 0.000995506339271385, + "loss": 0.2195, + "theoretical_loss": 5.344822075511196, + "tokens_seen": 47710208 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009954260953298026, + "loss": 0.2177, + "theoretical_loss": 5.340364726203955, + "tokens_seen": 47972352 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009953458513882203, + "loss": 0.2156, + "theoretical_loss": 5.3359384454566055, + "tokens_seen": 48234496 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009952656074466376, + "loss": 0.2146, + "theoretical_loss": 5.331542849621357, + "tokens_seen": 48496640 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009951853635050555, + "loss": 0.2172, + "theoretical_loss": 5.327177561828993, + "tokens_seen": 48758784 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009951051195634729, + "loss": 0.219, + "theoretical_loss": 5.32284221183342, + "tokens_seen": 49020928 + }, + { + "epoch": 0.01, + "objective/train/advantage_avg": 0.0004111192829441279, + "objective/train/docs_used": 25394, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.30299186706543, + "objective/train/original_loss": 4.30299186706543, + "objective/train/theoretical_loss": 5.3206856495812715, + "objective/train/tokens_used": 69612000, + "objective/train/value_avg": -0.018402099609375, + "objective/train/value_loss": 0.001803424907848239, + "objective/train/value_max": -0.00555419921875, + "objective/train/value_min": -0.13427734375, + "objective/train/value_reward_corr": 0.352072119180548, + "objective/train/value_std": 0.01030731201171875, + "objective/train/weight_avg": 1.0012028217315674, + "objective/train/weighted_lm_loss": 4.307867527008057, + "objective/train/weights_max": 1.0850714445114136, + "objective/train/weights_min": 0.37296921014785767, + "theoretical_loss": 5.3206856495812715, + "tokens_seen": 49152000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009950248756218905, + "loss": 0.2198, + "theoretical_loss": 5.318536435860599, + "tokens_seen": 49283072 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994944631680308, + "loss": 0.2143, + "theoretical_loss": 5.314259876461705, + "tokens_seen": 49545216 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009948643877387257, + "loss": 0.2153, + "theoretical_loss": 5.310012182370359, + "tokens_seen": 49807360 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009947841437971433, + "loss": 0.2137, + "theoretical_loss": 5.305793008363841, + "tokens_seen": 50069504 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994703899855561, + "loss": 0.2132, + "theoretical_loss": 5.301602015128104, + "tokens_seen": 50331648 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009946236559139785, + "loss": 0.2117, + "theoretical_loss": 5.297438869126498, + "tokens_seen": 50593792 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009945434119723962, + "loss": 0.2191, + "theoretical_loss": 5.293303242472074, + "tokens_seen": 50855936 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009944631680308138, + "loss": 0.2133, + "theoretical_loss": 5.289194812803347, + "tokens_seen": 51118080 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009943829240892312, + "loss": 0.2112, + "theoretical_loss": 5.285113263163414, + "tokens_seen": 51380224 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994302680147649, + "loss": 0.2129, + "theoretical_loss": 5.2810582818823235, + "tokens_seen": 51642368 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009942224362060664, + "loss": 0.215, + "theoretical_loss": 5.27702956246258, + "tokens_seen": 51904512 + }, + { + "epoch": 0.02, + "learning_rate": 0.000994142192264484, + "loss": 0.2124, + "theoretical_loss": 5.273026803467695, + "tokens_seen": 52166656 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.01028979942202568, + "objective/train/docs_used": 26541, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.072467803955078, + "objective/train/original_loss": 4.072467803955078, + "objective/train/theoretical_loss": 5.269049708413682, + "objective/train/tokens_used": 72888800, + "objective/train/value_avg": -0.016021728515625, + "objective/train/value_loss": 0.00028009802917949855, + "objective/train/value_max": -0.004608154296875, + "objective/train/value_min": -0.0889892578125, + "objective/train/value_reward_corr": 0.4423854028983263, + "objective/train/value_std": 0.008819580078125, + "objective/train/weight_avg": 1.0104297399520874, + "objective/train/weighted_lm_loss": 4.120135307312012, + "objective/train/weights_max": 1.0731053352355957, + "objective/train/weights_min": 0.8264122009277344, + "theoretical_loss": 5.269049708413682, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009940619483229016, + "loss": 0.2105, + "theoretical_loss": 5.269049708413682, + "tokens_seen": 52428800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009939817043813192, + "loss": 0.2123, + "theoretical_loss": 5.265097985663418, + "tokens_seen": 52690944 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009939014604397368, + "loss": 0.2115, + "theoretical_loss": 5.261171348323755, + "tokens_seen": 52953088 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009938212164981545, + "loss": 0.2084, + "theoretical_loss": 5.257269514145324, + "tokens_seen": 53215232 + }, + { + "epoch": 0.02, + "learning_rate": 0.000993740972556572, + "loss": 0.2062, + "theoretical_loss": 5.2533922054249365, + "tokens_seen": 53477376 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009936607286149895, + "loss": 0.2101, + "theoretical_loss": 5.2495391489104986, + "tokens_seen": 53739520 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009935804846734073, + "loss": 0.2142, + "theoretical_loss": 5.24571007570837, + "tokens_seen": 54001664 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009935002407318247, + "loss": 0.2097, + "theoretical_loss": 5.2419047211930865, + "tokens_seen": 54263808 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009934199967902423, + "loss": 0.206, + "theoretical_loss": 5.238122824919387, + "tokens_seen": 54525952 + }, + { + "epoch": 0.02, + "learning_rate": 0.00099333975284866, + "loss": 0.2103, + "theoretical_loss": 5.234364130536457, + "tokens_seen": 54788096 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009932595089070775, + "loss": 0.21, + "theoretical_loss": 5.230628385704337, + "tokens_seen": 55050240 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009931792649654951, + "loss": 0.2083, + "theoretical_loss": 5.2269153420124255, + "tokens_seen": 55312384 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009930990210239128, + "loss": 0.2085, + "theoretical_loss": 5.223224754900014, + "tokens_seen": 55574528 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.01192543189972639, + "objective/train/docs_used": 27758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.997652769088745, + "objective/train/original_loss": 3.997653007507324, + "objective/train/theoretical_loss": 5.221387807169494, + "objective/train/tokens_used": 76165600, + "objective/train/value_avg": -0.0164794921875, + "objective/train/value_loss": 0.0003872321976814419, + "objective/train/value_max": -0.004451751708984375, + "objective/train/value_min": -0.07354736328125, + "objective/train/value_reward_corr": 0.07308995924059121, + "objective/train/value_std": 0.006809234619140625, + "objective/train/weight_avg": 1.0121105909347534, + "objective/train/weighted_lm_loss": 4.049195289611816, + "objective/train/weights_max": 1.0763195753097534, + "objective/train/weights_min": 0.37184974551200867, + "theoretical_loss": 5.221387807169494, + "tokens_seen": 55705600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009930187770823304, + "loss": 0.2098, + "theoretical_loss": 5.219556383578795, + "tokens_seen": 55836672 + }, + { + "epoch": 0.02, + "learning_rate": 0.000992938533140748, + "loss": 0.2069, + "theoretical_loss": 5.215909990957291, + "tokens_seen": 56098816 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009928582891991654, + "loss": 0.2075, + "theoretical_loss": 5.212285343567135, + "tokens_seen": 56360960 + }, + { + "epoch": 0.02, + "learning_rate": 0.000992778045257583, + "loss": 0.2067, + "theoretical_loss": 5.208682211491157, + "tokens_seen": 56623104 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009926978013160006, + "loss": 0.2043, + "theoretical_loss": 5.205100368293225, + "tokens_seen": 56885248 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009926175573744182, + "loss": 0.2042, + "theoretical_loss": 5.201539590949796, + "tokens_seen": 57147392 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009925373134328358, + "loss": 0.206, + "theoretical_loss": 5.1979996597831, + "tokens_seen": 57409536 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009924570694912534, + "loss": 0.2041, + "theoretical_loss": 5.1944803583959525, + "tokens_seen": 57671680 + }, + { + "epoch": 0.02, + "learning_rate": 0.000992376825549671, + "loss": 0.2063, + "theoretical_loss": 5.190981473608112, + "tokens_seen": 57933824 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009922965816080887, + "loss": 0.2063, + "theoretical_loss": 5.18750279539416, + "tokens_seen": 58195968 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009922163376665063, + "loss": 0.2021, + "theoretical_loss": 5.184044116822849, + "tokens_seen": 58458112 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009921360937249237, + "loss": 0.2067, + "theoretical_loss": 5.1806052339978965, + "tokens_seen": 58720256 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.008248094469308853, + "objective/train/docs_used": 29071, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.7817437648773193, + "objective/train/original_loss": 3.7817440032958984, + "objective/train/theoretical_loss": 5.1771859460001455, + "objective/train/tokens_used": 79442400, + "objective/train/value_avg": -0.0163726806640625, + "objective/train/value_loss": 0.00034792307997122407, + "objective/train/value_max": -0.003795623779296875, + "objective/train/value_min": -0.0994873046875, + "objective/train/value_reward_corr": 0.447085684972137, + "objective/train/value_std": 0.00957489013671875, + "objective/train/weight_avg": 1.0084155797958374, + "objective/train/weighted_lm_loss": 3.8198161125183105, + "objective/train/weights_max": 1.0903370380401611, + "objective/train/weights_min": 0.37122613191604614, + "theoretical_loss": 5.1771859460001455, + "tokens_seen": 58982400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009920558497833415, + "loss": 0.1986, + "theoretical_loss": 5.1771859460001455, + "tokens_seen": 58982400 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991975605841759, + "loss": 0.2047, + "theoretical_loss": 5.1737860548311065, + "tokens_seen": 59244544 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009918953619001765, + "loss": 0.2018, + "theoretical_loss": 5.170405365357794, + "tokens_seen": 59506688 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009918151179585941, + "loss": 0.2003, + "theoretical_loss": 5.167043685258852, + "tokens_seen": 59768832 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009917348740170117, + "loss": 0.2018, + "theoretical_loss": 5.163700824971922, + "tokens_seen": 60030976 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009916546300754293, + "loss": 0.2067, + "theoretical_loss": 5.160376597642223, + "tokens_seen": 60293120 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991574386133847, + "loss": 0.2066, + "theoretical_loss": 5.157070819072301, + "tokens_seen": 60555264 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009914941421922646, + "loss": 0.2065, + "theoretical_loss": 5.153783307672935, + "tokens_seen": 60817408 + }, + { + "epoch": 0.02, + "learning_rate": 0.000991413898250682, + "loss": 0.2048, + "theoretical_loss": 5.150513884415149, + "tokens_seen": 61079552 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009913336543090998, + "loss": 0.2018, + "theoretical_loss": 5.14726237278331, + "tokens_seen": 61341696 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009912534103675172, + "loss": 0.2058, + "theoretical_loss": 5.144028598729285, + "tokens_seen": 61603840 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009911731664259348, + "loss": 0.2015, + "theoretical_loss": 5.140812390627624, + "tokens_seen": 61865984 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009910929224843524, + "loss": 0.2039, + "theoretical_loss": 5.137613579231737, + "tokens_seen": 62128128 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.010975364595651627, + "objective/train/docs_used": 30276, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.172832489013672, + "objective/train/original_loss": 4.17283296585083, + "objective/train/theoretical_loss": 5.136020645048806, + "objective/train/tokens_used": 82719200, + "objective/train/value_avg": -0.016754150390625, + "objective/train/value_loss": 0.0010546700796112418, + "objective/train/value_max": -0.004718780517578125, + "objective/train/value_min": -0.08447265625, + "objective/train/value_reward_corr": 0.2364809296876953, + "objective/train/value_std": 0.007312774658203125, + "objective/train/weight_avg": 1.0114343166351318, + "objective/train/weighted_lm_loss": 4.22389554977417, + "objective/train/weights_max": 1.077765703201294, + "objective/train/weights_min": 0.372718870639801, + "theoretical_loss": 5.136020645048806, + "tokens_seen": 62259200 + }, + { + "epoch": 0.02, + "learning_rate": 0.00099101267854277, + "loss": 0.2036, + "theoretical_loss": 5.134431997631053, + "tokens_seen": 62390272 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009909324346011876, + "loss": 0.1996, + "theoretical_loss": 5.1312674812091235, + "tokens_seen": 62652416 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009908521906596053, + "loss": 0.1987, + "theoretical_loss": 5.128119867602646, + "tokens_seen": 62914560 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009907719467180229, + "loss": 0.2017, + "theoretical_loss": 5.124988996661393, + "tokens_seen": 63176704 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009906917027764405, + "loss": 0.1973, + "theoretical_loss": 5.121874710409012, + "tokens_seen": 63438848 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990611458834858, + "loss": 0.1999, + "theoretical_loss": 5.118776853004677, + "tokens_seen": 63700992 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009905312148932755, + "loss": 0.1977, + "theoretical_loss": 5.115695270705579, + "tokens_seen": 63963136 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990450970951693, + "loss": 0.1956, + "theoretical_loss": 5.112629811830217, + "tokens_seen": 64225280 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009903707270101107, + "loss": 0.1989, + "theoretical_loss": 5.10958032672248, + "tokens_seen": 64487424 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009902904830685283, + "loss": 0.1946, + "theoretical_loss": 5.106546667716508, + "tokens_seen": 64749568 + }, + { + "epoch": 0.02, + "learning_rate": 0.000990210239126946, + "loss": 0.2015, + "theoretical_loss": 5.103528689102281, + "tokens_seen": 65011712 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009901299951853636, + "loss": 0.1959, + "theoretical_loss": 5.100526247091967, + "tokens_seen": 65273856 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.007889259606599808, + "objective/train/docs_used": 31526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 4.06732177734375, + "objective/train/original_loss": 4.06732177734375, + "objective/train/theoretical_loss": 5.097539199786951, + "objective/train/tokens_used": 85996000, + "objective/train/value_avg": -0.0160980224609375, + "objective/train/value_loss": 0.0005863794358447194, + "objective/train/value_max": -0.00464630126953125, + "objective/train/value_min": -0.1026611328125, + "objective/train/value_reward_corr": 0.33268385277114015, + "objective/train/value_std": 0.00913238525390625, + "objective/train/weight_avg": 1.0081636905670166, + "objective/train/weighted_lm_loss": 4.102910041809082, + "objective/train/weights_max": 1.1006338596343994, + "objective/train/weights_min": 0.49862930178642273, + "theoretical_loss": 5.097539199786951, + "tokens_seen": 65536000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009900497512437812, + "loss": 0.199, + "theoretical_loss": 5.097539199786951, + "tokens_seen": 65536000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009899695073021988, + "loss": 0.2, + "theoretical_loss": 5.094567407145588, + "tokens_seen": 65798144 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009898892633606162, + "loss": 0.1976, + "theoretical_loss": 5.09161073095161, + "tokens_seen": 66060288 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989809019419034, + "loss": 0.1955, + "theoretical_loss": 5.0886690347832015, + "tokens_seen": 66322432 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009897287754774514, + "loss": 0.1975, + "theoretical_loss": 5.0857421839827275, + "tokens_seen": 66584576 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989648531535869, + "loss": 0.193, + "theoretical_loss": 5.082830045627072, + "tokens_seen": 66846720 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009895682875942866, + "loss": 0.1985, + "theoretical_loss": 5.079932488498602, + "tokens_seen": 67108864 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009894880436527042, + "loss": 0.1952, + "theoretical_loss": 5.077049383056725, + "tokens_seen": 67371008 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009894077997111218, + "loss": 0.1976, + "theoretical_loss": 5.074180601410026, + "tokens_seen": 67633152 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009893275557695395, + "loss": 0.1896, + "theoretical_loss": 5.0713260172889845, + "tokens_seen": 67895296 + }, + { + "epoch": 0.02, + "learning_rate": 0.000989247311827957, + "loss": 0.1925, + "theoretical_loss": 5.068485506019231, + "tokens_seen": 68157440 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009891670678863745, + "loss": 0.1949, + "theoretical_loss": 5.06565894449535, + "tokens_seen": 68419584 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009890868239447923, + "loss": 0.1939, + "theoretical_loss": 5.06284621115523, + "tokens_seen": 68681728 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.008360575884580612, + "objective/train/docs_used": 32629, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.77909517288208, + "objective/train/original_loss": 3.77909517288208, + "objective/train/theoretical_loss": 5.061444992481711, + "objective/train/tokens_used": 89272800, + "objective/train/value_avg": -0.016082763671875, + "objective/train/value_loss": 0.0004964357358403504, + "objective/train/value_max": -0.00473785400390625, + "objective/train/value_min": -0.08770751953125, + "objective/train/value_reward_corr": 0.21250019484264865, + "objective/train/value_std": 0.00782012939453125, + "objective/train/weight_avg": 1.0085920095443726, + "objective/train/weighted_lm_loss": 3.811274290084839, + "objective/train/weights_max": 1.0717307329177856, + "objective/train/weights_min": 0.37172776460647583, + "theoretical_loss": 5.061444992481711, + "tokens_seen": 68812800 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009890065800032097, + "loss": 0.1985, + "theoretical_loss": 5.060047185954893, + "tokens_seen": 68943872 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009889263360616273, + "loss": 0.1918, + "theoretical_loss": 5.057261750343864, + "tokens_seen": 69206016 + }, + { + "epoch": 0.02, + "learning_rate": 0.000988846092120045, + "loss": 0.1924, + "theoretical_loss": 5.0544897872410095, + "tokens_seen": 69468160 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009887658481784625, + "loss": 0.191, + "theoretical_loss": 5.051731181010866, + "tokens_seen": 69730304 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009886856042368801, + "loss": 0.1899, + "theoretical_loss": 5.048985817440432, + "tokens_seen": 69992448 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009886053602952978, + "loss": 0.1937, + "theoretical_loss": 5.046253583716425, + "tokens_seen": 70254592 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009885251163537154, + "loss": 0.1905, + "theoretical_loss": 5.043534368402973, + "tokens_seen": 70516736 + }, + { + "epoch": 0.02, + "learning_rate": 0.000988444872412133, + "loss": 0.1894, + "theoretical_loss": 5.040828061419762, + "tokens_seen": 70778880 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009883646284705506, + "loss": 0.1918, + "theoretical_loss": 5.038134554020587, + "tokens_seen": 71041024 + }, + { + "epoch": 0.02, + "learning_rate": 0.000988284384528968, + "loss": 0.1892, + "theoretical_loss": 5.03545373877234, + "tokens_seen": 71303168 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009882041405873858, + "loss": 0.191, + "theoretical_loss": 5.032785509534391, + "tokens_seen": 71565312 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009881238966458032, + "loss": 0.1886, + "theoretical_loss": 5.030129761438376, + "tokens_seen": 71827456 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.008393102325499058, + "objective/train/docs_used": 33987, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8430488109588623, + "objective/train/original_loss": 3.8430490493774414, + "objective/train/theoretical_loss": 5.0274863908683685, + "objective/train/tokens_used": 92549600, + "objective/train/value_avg": -0.014801025390625, + "objective/train/value_loss": 0.0004412951529957354, + "objective/train/value_max": -0.004314422607421875, + "objective/train/value_min": -0.091064453125, + "objective/train/value_reward_corr": 0.1426956661504004, + "objective/train/value_std": 0.006847381591796875, + "objective/train/weight_avg": 1.0085957050323486, + "objective/train/weighted_lm_loss": 3.876188039779663, + "objective/train/weights_max": 1.0953395366668701, + "objective/train/weights_min": 0.3716682195663452, + "theoretical_loss": 5.0274863908683685, + "tokens_seen": 72089600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009880436527042208, + "loss": 0.1877, + "theoretical_loss": 5.0274863908683685, + "tokens_seen": 72089600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009879634087626384, + "loss": 0.1913, + "theoretical_loss": 5.024855295441432, + "tokens_seen": 72351744 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987883164821056, + "loss": 0.1894, + "theoretical_loss": 5.022236373988544, + "tokens_seen": 72613888 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009878029208794737, + "loss": 0.1873, + "theoretical_loss": 5.01962952653588, + "tokens_seen": 72876032 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009877226769378913, + "loss": 0.1886, + "theoretical_loss": 5.017034654286462, + "tokens_seen": 73138176 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987642432996309, + "loss": 0.1903, + "theoretical_loss": 5.0144516596021385, + "tokens_seen": 73400320 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009875621890547263, + "loss": 0.183, + "theoretical_loss": 5.011880445985916, + "tokens_seen": 73662464 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987481945113144, + "loss": 0.1888, + "theoretical_loss": 5.009320918064615, + "tokens_seen": 73924608 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009874017011715615, + "loss": 0.1886, + "theoretical_loss": 5.006772981571855, + "tokens_seen": 74186752 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009873214572299791, + "loss": 0.1868, + "theoretical_loss": 5.004236543331345, + "tokens_seen": 74448896 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009872412132883967, + "loss": 0.1858, + "theoretical_loss": 5.001711511240506, + "tokens_seen": 74711040 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009871609693468144, + "loss": 0.1867, + "theoretical_loss": 4.999197794254371, + "tokens_seen": 74973184 + }, + { + "epoch": 0.02, + "learning_rate": 0.000987080725405232, + "loss": 0.1865, + "theoretical_loss": 4.9966953023697975, + "tokens_seen": 75235328 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.007199964951723814, + "objective/train/docs_used": 35047, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.54990816116333, + "objective/train/original_loss": 3.54990816116333, + "objective/train/theoretical_loss": 4.995448237995667, + "objective/train/tokens_used": 95826400, + "objective/train/value_avg": -0.01419830322265625, + "objective/train/value_loss": 0.0003746067523024976, + "objective/train/value_max": -0.003337860107421875, + "objective/train/value_min": -0.08599853515625, + "objective/train/value_reward_corr": 0.27188860037013274, + "objective/train/value_std": 0.007312774658203125, + "objective/train/weight_avg": 1.0073779821395874, + "objective/train/weighted_lm_loss": 3.5777623653411865, + "objective/train/weights_max": 1.0898046493530273, + "objective/train/weights_min": 0.3704579770565033, + "theoretical_loss": 4.995448237995667, + "tokens_seen": 75366400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009870004814636496, + "loss": 0.1861, + "theoretical_loss": 4.994203946609964, + "tokens_seen": 75497472 + }, + { + "epoch": 0.02, + "learning_rate": 0.000986920237522067, + "loss": 0.1833, + "theoretical_loss": 4.991723639009154, + "tokens_seen": 75759616 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009868399935804848, + "loss": 0.1844, + "theoretical_loss": 4.989254292597813, + "tokens_seen": 76021760 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009867597496389022, + "loss": 0.1873, + "theoretical_loss": 4.986795821387878, + "tokens_seen": 76283904 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009866795056973198, + "loss": 0.1853, + "theoretical_loss": 4.984348140358374, + "tokens_seen": 76546048 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009865992617557374, + "loss": 0.1862, + "theoretical_loss": 4.981911165441273, + "tokens_seen": 76808192 + }, + { + "epoch": 0.02, + "learning_rate": 0.000986519017814155, + "loss": 0.1847, + "theoretical_loss": 4.979484813507599, + "tokens_seen": 77070336 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009864387738725726, + "loss": 0.1831, + "theoretical_loss": 4.977069002353792, + "tokens_seen": 77332480 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009863585299309903, + "loss": 0.1819, + "theoretical_loss": 4.974663650688306, + "tokens_seen": 77594624 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009862782859894079, + "loss": 0.1851, + "theoretical_loss": 4.972268678118454, + "tokens_seen": 77856768 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009861980420478253, + "loss": 0.1813, + "theoretical_loss": 4.969884005137479, + "tokens_seen": 78118912 + }, + { + "epoch": 0.02, + "learning_rate": 0.000986117798106243, + "loss": 0.1859, + "theoretical_loss": 4.967509553111862, + "tokens_seen": 78381056 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.005118685308843851, + "objective/train/docs_used": 36308, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.8223462104797363, + "objective/train/original_loss": 3.8223462104797363, + "objective/train/theoretical_loss": 4.96514524426884, + "objective/train/tokens_used": 99103200, + "objective/train/value_avg": -0.0140838623046875, + "objective/train/value_loss": 0.00042888522148132324, + "objective/train/value_max": -0.00323486328125, + "objective/train/value_min": -0.122314453125, + "objective/train/value_reward_corr": 0.37372443614015616, + "objective/train/value_std": 0.00824737548828125, + "objective/train/weight_avg": 1.0053234100341797, + "objective/train/weighted_lm_loss": 3.844878673553467, + "objective/train/weights_max": 1.107279658317566, + "objective/train/weights_min": 0.3930663764476776, + "theoretical_loss": 4.96514524426884, + "tokens_seen": 78643200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009860375541646605, + "loss": 0.1816, + "theoretical_loss": 4.96514524426884, + "tokens_seen": 78643200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009859573102230783, + "loss": 0.184, + "theoretical_loss": 4.962791001684167, + "tokens_seen": 78905344 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009858770662814957, + "loss": 0.1822, + "theoretical_loss": 4.960446749270055, + "tokens_seen": 79167488 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009857968223399133, + "loss": 0.184, + "theoretical_loss": 4.958112411763365, + "tokens_seen": 79429632 + }, + { + "epoch": 0.02, + "learning_rate": 0.000985716578398331, + "loss": 0.1829, + "theoretical_loss": 4.955787914713962, + "tokens_seen": 79691776 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009856363344567486, + "loss": 0.1834, + "theoretical_loss": 4.953473184473312, + "tokens_seen": 79953920 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009855560905151662, + "loss": 0.183, + "theoretical_loss": 4.951168148183246, + "tokens_seen": 80216064 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009854758465735838, + "loss": 0.1818, + "theoretical_loss": 4.948872733764926, + "tokens_seen": 80478208 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009853956026320014, + "loss": 0.1799, + "theoretical_loss": 4.946586869908014, + "tokens_seen": 80740352 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009853153586904188, + "loss": 0.1828, + "theoretical_loss": 4.944310486060004, + "tokens_seen": 81002496 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009852351147488366, + "loss": 0.1825, + "theoretical_loss": 4.942043512415751, + "tokens_seen": 81264640 + }, + { + "epoch": 0.02, + "learning_rate": 0.000985154870807254, + "loss": 0.1838, + "theoretical_loss": 4.939785879907176, + "tokens_seen": 81526784 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009850746268656716, + "loss": 0.1813, + "theoretical_loss": 4.937537520193139, + "tokens_seen": 81788928 + }, + { + "epoch": 0.02, + "objective/train/advantage_avg": 0.004108819644898176, + "objective/train/docs_used": 37569, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.581354856491089, + "objective/train/original_loss": 3.5813543796539307, + "objective/train/theoretical_loss": 4.936416796473786, + "objective/train/tokens_used": 102380000, + "objective/train/value_avg": -0.0154571533203125, + "objective/train/value_loss": 0.001122858258895576, + "objective/train/value_max": -0.00327301025390625, + "objective/train/value_min": -0.1788330078125, + "objective/train/value_reward_corr": 0.38812370857659007, + "objective/train/value_std": 0.0128631591796875, + "objective/train/weight_avg": 1.004586100578308, + "objective/train/weighted_lm_loss": 3.600618600845337, + "objective/train/weights_max": 1.1683320999145508, + "objective/train/weights_min": 0.37106189131736755, + "theoretical_loss": 4.936416796473786, + "tokens_seen": 81920000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009849943829240892, + "loss": 0.1815, + "theoretical_loss": 4.93529836564949, + "tokens_seen": 82051072 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009849141389825069, + "loss": 0.1813, + "theoretical_loss": 4.933068349359283, + "tokens_seen": 82313216 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009848338950409245, + "loss": 0.1783, + "theoretical_loss": 4.93084740510316, + "tokens_seen": 82575360 + }, + { + "epoch": 0.03, + "learning_rate": 0.000984753651099342, + "loss": 0.1779, + "theoretical_loss": 4.928635467349885, + "tokens_seen": 82837504 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009846734071577597, + "loss": 0.1768, + "theoretical_loss": 4.92643247124705, + "tokens_seen": 83099648 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009845931632161773, + "loss": 0.1805, + "theoretical_loss": 4.924238352611924, + "tokens_seen": 83361792 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009845129192745947, + "loss": 0.1793, + "theoretical_loss": 4.922053047922455, + "tokens_seen": 83623936 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009844326753330123, + "loss": 0.1758, + "theoretical_loss": 4.919876494308432, + "tokens_seen": 83886080 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098435243139143, + "loss": 0.1795, + "theoretical_loss": 4.917708629542775, + "tokens_seen": 84148224 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009842721874498475, + "loss": 0.1775, + "theoretical_loss": 4.915549392032985, + "tokens_seen": 84410368 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009841919435082652, + "loss": 0.1797, + "theoretical_loss": 4.913398720812719, + "tokens_seen": 84672512 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009841116995666828, + "loss": 0.176, + "theoretical_loss": 4.9112565555335115, + "tokens_seen": 84934656 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.009054622612893581, + "objective/train/docs_used": 38678, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4969615936279297, + "objective/train/original_loss": 3.496961832046509, + "objective/train/theoretical_loss": 4.909122836456632, + "objective/train/tokens_used": 105656800, + "objective/train/value_avg": -0.013458251953125, + "objective/train/value_loss": 0.00028301688143983483, + "objective/train/value_max": -0.0036067962646484375, + "objective/train/value_min": -0.09588623046875, + "objective/train/value_reward_corr": 0.19258322737032904, + "objective/train/value_std": 0.00646209716796875, + "objective/train/weight_avg": 1.0091947317123413, + "objective/train/weighted_lm_loss": 3.5327301025390625, + "objective/train/weights_max": 1.0849108695983887, + "objective/train/weights_min": 0.8251521587371826, + "theoretical_loss": 4.909122836456632, + "tokens_seen": 85196800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009840314556251004, + "loss": 0.1783, + "theoretical_loss": 4.909122836456632, + "tokens_seen": 85196800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009839512116835178, + "loss": 0.1773, + "theoretical_loss": 4.906997504445066, + "tokens_seen": 85458944 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009838709677419356, + "loss": 0.1789, + "theoretical_loss": 4.904880500955633, + "tokens_seen": 85721088 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983790723800353, + "loss": 0.1752, + "theoretical_loss": 4.90277176803123, + "tokens_seen": 85983232 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009837104798587706, + "loss": 0.1746, + "theoretical_loss": 4.9006712482931984, + "tokens_seen": 86245376 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009836302359171882, + "loss": 0.1762, + "theoretical_loss": 4.89857888493381, + "tokens_seen": 86507520 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009835499919756058, + "loss": 0.1782, + "theoretical_loss": 4.896494621708882, + "tokens_seen": 86769664 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009834697480340234, + "loss": 0.1751, + "theoretical_loss": 4.8944184029305, + "tokens_seen": 87031808 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983389504092441, + "loss": 0.1761, + "theoretical_loss": 4.892350173459863, + "tokens_seen": 87293952 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009833092601508587, + "loss": 0.1734, + "theoretical_loss": 4.890289878700239, + "tokens_seen": 87556096 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009832290162092763, + "loss": 0.1735, + "theoretical_loss": 4.888237464590028, + "tokens_seen": 87818240 + }, + { + "epoch": 0.03, + "learning_rate": 0.000983148772267694, + "loss": 0.1766, + "theoretical_loss": 4.8861928775959464, + "tokens_seen": 88080384 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009830685283261113, + "loss": 0.1752, + "theoretical_loss": 4.884156064706302, + "tokens_seen": 88342528 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.008355207741260529, + "objective/train/docs_used": 39491, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.4316039085388184, + "objective/train/original_loss": 3.4316043853759766, + "objective/train/theoretical_loss": 4.8831405571254844, + "objective/train/tokens_used": 108933600, + "objective/train/value_avg": -0.01488494873046875, + "objective/train/value_loss": 0.0006291866302490234, + "objective/train/value_max": -0.0031604766845703125, + "objective/train/value_min": -0.15185546875, + "objective/train/value_reward_corr": 0.39128727304023586, + "objective/train/value_std": 0.00885772705078125, + "objective/train/weight_avg": 1.0086404085159302, + "objective/train/weighted_lm_loss": 3.462775230407715, + "objective/train/weights_max": 1.1140156984329224, + "objective/train/weights_min": 0.3899836838245392, + "theoretical_loss": 4.8831405571254844, + "tokens_seen": 88473600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009829882843845291, + "loss": 0.1745, + "theoretical_loss": 4.882126973424384, + "tokens_seen": 88604672 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009829080404429465, + "loss": 0.1729, + "theoretical_loss": 4.880105551761961, + "tokens_seen": 88866816 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009828277965013641, + "loss": 0.1784, + "theoretical_loss": 4.87809174823286, + "tokens_seen": 89128960 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009827475525597817, + "loss": 0.1722, + "theoretical_loss": 4.876085511846673, + "tokens_seen": 89391104 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009826673086181994, + "loss": 0.1772, + "theoretical_loss": 4.874086792102535, + "tokens_seen": 89653248 + }, + { + "epoch": 0.03, + "learning_rate": 0.000982587064676617, + "loss": 0.1766, + "theoretical_loss": 4.872095538983015, + "tokens_seen": 89915392 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009825068207350346, + "loss": 0.1784, + "theoretical_loss": 4.870111702948094, + "tokens_seen": 90177536 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009824265767934522, + "loss": 0.1736, + "theoretical_loss": 4.868135234929232, + "tokens_seen": 90439680 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009823463328518696, + "loss": 0.1743, + "theoretical_loss": 4.866166086323535, + "tokens_seen": 90701824 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009822660889102874, + "loss": 0.1779, + "theoretical_loss": 4.864204208988003, + "tokens_seen": 90963968 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009821858449687048, + "loss": 0.1733, + "theoretical_loss": 4.86224955523387, + "tokens_seen": 91226112 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009821056010271224, + "loss": 0.1766, + "theoretical_loss": 4.860302077821023, + "tokens_seen": 91488256 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.006558452267199755, + "objective/train/docs_used": 40748, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2776377201080322, + "objective/train/original_loss": 3.2776379585266113, + "objective/train/theoretical_loss": 4.858361729952518, + "objective/train/tokens_used": 112210400, + "objective/train/value_avg": -0.0146636962890625, + "objective/train/value_loss": 0.0004739796568173915, + "objective/train/value_max": -0.00359344482421875, + "objective/train/value_min": -0.0782470703125, + "objective/train/value_reward_corr": 0.21044601071028804, + "objective/train/value_std": 0.007022857666015625, + "objective/train/weight_avg": 1.0067769289016724, + "objective/train/weighted_lm_loss": 3.300762176513672, + "objective/train/weights_max": 1.0740199089050293, + "objective/train/weights_min": 0.3733791708946228, + "theoretical_loss": 4.858361729952518, + "tokens_seen": 91750400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098202535708554, + "loss": 0.1726, + "theoretical_loss": 4.858361729952518, + "tokens_seen": 91750400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009819451131439577, + "loss": 0.1719, + "theoretical_loss": 4.856428465269159, + "tokens_seen": 92012544 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009818648692023753, + "loss": 0.1728, + "theoretical_loss": 4.8545022378441836, + "tokens_seen": 92274688 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009817846252607929, + "loss": 0.174, + "theoretical_loss": 4.852583002178001, + "tokens_seen": 92536832 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009817043813192105, + "loss": 0.1715, + "theoretical_loss": 4.8506707131930344, + "tokens_seen": 92798976 + }, + { + "epoch": 0.03, + "learning_rate": 0.000981624137377628, + "loss": 0.1724, + "theoretical_loss": 4.848765326228618, + "tokens_seen": 93061120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009815438934360455, + "loss": 0.1693, + "theoretical_loss": 4.846866797035984, + "tokens_seen": 93323264 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009814636494944631, + "loss": 0.1727, + "theoretical_loss": 4.844975081773322, + "tokens_seen": 93585408 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009813834055528807, + "loss": 0.1724, + "theoretical_loss": 4.843090137000904, + "tokens_seen": 93847552 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009813031616112983, + "loss": 0.1726, + "theoretical_loss": 4.841211919676287, + "tokens_seen": 94109696 + }, + { + "epoch": 0.03, + "learning_rate": 0.000981222917669716, + "loss": 0.1679, + "theoretical_loss": 4.839340387149586, + "tokens_seen": 94371840 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009811426737281336, + "loss": 0.1715, + "theoretical_loss": 4.837475497158817, + "tokens_seen": 94633984 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009810624297865512, + "loss": 0.1691, + "theoretical_loss": 4.835617207825303, + "tokens_seen": 94896128 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": -0.0007320477161556482, + "objective/train/docs_used": 42039, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1431312561035156, + "objective/train/original_loss": 3.1431312561035156, + "objective/train/theoretical_loss": 4.8346905254216255, + "objective/train/tokens_used": 115487200, + "objective/train/value_avg": -0.01483154296875, + "objective/train/value_loss": 0.0013606772990897298, + "objective/train/value_max": -0.0031108856201171875, + "objective/train/value_min": -0.1416015625, + "objective/train/value_reward_corr": 0.5555549113783049, + "objective/train/value_std": 0.0105743408203125, + "objective/train/weight_avg": 0.9998704791069031, + "objective/train/weighted_lm_loss": 3.1514244079589844, + "objective/train/weights_max": 1.0741158723831177, + "objective/train/weights_min": 0.3726819157600403, + "theoretical_loss": 4.8346905254216255, + "tokens_seen": 95027200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009809821858449686, + "loss": 0.1675, + "theoretical_loss": 4.83376547764915, + "tokens_seen": 95158272 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009809019419033864, + "loss": 0.1731, + "theoretical_loss": 4.831920265504792, + "tokens_seen": 95420416 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009808216979618038, + "loss": 0.1728, + "theoretical_loss": 4.830081530636594, + "tokens_seen": 95682560 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009807414540202216, + "loss": 0.1722, + "theoretical_loss": 4.8282492326545245, + "tokens_seen": 95944704 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980661210078639, + "loss": 0.1718, + "theoretical_loss": 4.826423331529884, + "tokens_seen": 96206848 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009805809661370566, + "loss": 0.1696, + "theoretical_loss": 4.824603787591102, + "tokens_seen": 96468992 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009805007221954742, + "loss": 0.1729, + "theoretical_loss": 4.822790561519591, + "tokens_seen": 96731136 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009804204782538919, + "loss": 0.1719, + "theoretical_loss": 4.8209836143456535, + "tokens_seen": 96993280 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009803402343123095, + "loss": 0.1678, + "theoretical_loss": 4.81918290744446, + "tokens_seen": 97255424 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980259990370727, + "loss": 0.1712, + "theoretical_loss": 4.817388402532074, + "tokens_seen": 97517568 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009801797464291447, + "loss": 0.1768, + "theoretical_loss": 4.815600061661536, + "tokens_seen": 97779712 + }, + { + "epoch": 0.03, + "learning_rate": 0.000980099502487562, + "loss": 0.1706, + "theoretical_loss": 4.813817847219008, + "tokens_seen": 98041856 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.006740586832165718, + "objective/train/docs_used": 43181, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.3436176776885986, + "objective/train/original_loss": 3.3436179161071777, + "objective/train/theoretical_loss": 4.812041721919962, + "objective/train/tokens_used": 118764000, + "objective/train/value_avg": -0.013214111328125, + "objective/train/value_loss": 0.0005143888993188739, + "objective/train/value_max": -0.0032100677490234375, + "objective/train/value_min": -0.17919921875, + "objective/train/value_reward_corr": 0.2184640658155713, + "objective/train/value_std": 0.008026123046875, + "objective/train/weight_avg": 1.0069705247879028, + "objective/train/weighted_lm_loss": 3.366760492324829, + "objective/train/weights_max": 1.1044021844863892, + "objective/train/weights_min": 0.3713989555835724, + "theoretical_loss": 4.812041721919962, + "tokens_seen": 98304000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00098001925854598, + "loss": 0.1654, + "theoretical_loss": 4.812041721919962, + "tokens_seen": 98304000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009799390146043973, + "loss": 0.1666, + "theoretical_loss": 4.810271648805427, + "tokens_seen": 98566144 + }, + { + "epoch": 0.03, + "learning_rate": 0.000979858770662815, + "loss": 0.1702, + "theoretical_loss": 4.8085075912383015, + "tokens_seen": 98828288 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009797785267212325, + "loss": 0.1683, + "theoretical_loss": 4.806749512899687, + "tokens_seen": 99090432 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009796982827796502, + "loss": 0.1686, + "theoretical_loss": 4.804997377785307, + "tokens_seen": 99352576 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009796180388380678, + "loss": 0.1674, + "theoretical_loss": 4.8032511502019535, + "tokens_seen": 99614720 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009795377948964854, + "loss": 0.1677, + "theoretical_loss": 4.801510794763988, + "tokens_seen": 99876864 + }, + { + "epoch": 0.03, + "learning_rate": 0.000979457550954903, + "loss": 0.1666, + "theoretical_loss": 4.799776276389897, + "tokens_seen": 100139008 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009793773070133206, + "loss": 0.1693, + "theoretical_loss": 4.798047560298882, + "tokens_seen": 100401152 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009792970630717382, + "loss": 0.1695, + "theoretical_loss": 4.796324612007515, + "tokens_seen": 100663296 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009792168191301556, + "loss": 0.1668, + "theoretical_loss": 4.794607397326421, + "tokens_seen": 100925440 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009791365751885732, + "loss": 0.1685, + "theoretical_loss": 4.792895882357019, + "tokens_seen": 101187584 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009790563312469908, + "loss": 0.1679, + "theoretical_loss": 4.791190033488302, + "tokens_seen": 101449728 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.005793612916022539, + "objective/train/docs_used": 44310, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.2112574577331543, + "objective/train/original_loss": 3.2112574577331543, + "objective/train/theoretical_loss": 4.790339223416113, + "objective/train/tokens_used": 122040800, + "objective/train/value_avg": -0.01383209228515625, + "objective/train/value_loss": 0.0006092512048780918, + "objective/train/value_max": -0.0031719207763671875, + "objective/train/value_min": -0.1739501953125, + "objective/train/value_reward_corr": 0.3657779485356697, + "objective/train/value_std": 0.0101318359375, + "objective/train/weight_avg": 1.0060677528381348, + "objective/train/weighted_lm_loss": 3.2284328937530518, + "objective/train/weights_max": 1.1621463298797607, + "objective/train/weights_min": 0.3706558644771576, + "theoretical_loss": 4.790339223416113, + "tokens_seen": 101580800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009789760873054085, + "loss": 0.1651, + "theoretical_loss": 4.7894898173936635, + "tokens_seen": 101711872 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978895843363826, + "loss": 0.1639, + "theoretical_loss": 4.787795201027757, + "tokens_seen": 101974016 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009788155994222437, + "loss": 0.1675, + "theoretical_loss": 4.786106151623423, + "tokens_seen": 102236160 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009787353554806613, + "loss": 0.1616, + "theoretical_loss": 4.784422636688622, + "tokens_seen": 102498304 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978655111539079, + "loss": 0.1641, + "theoretical_loss": 4.782744624003442, + "tokens_seen": 102760448 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009785748675974963, + "loss": 0.167, + "theoretical_loss": 4.781072081617127, + "tokens_seen": 103022592 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978494623655914, + "loss": 0.166, + "theoretical_loss": 4.779404977845148, + "tokens_seen": 103284736 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009784143797143315, + "loss": 0.1642, + "theoretical_loss": 4.777743281266321, + "tokens_seen": 103546880 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009783341357727491, + "loss": 0.1611, + "theoretical_loss": 4.776086960719956, + "tokens_seen": 103809024 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009782538918311668, + "loss": 0.1675, + "theoretical_loss": 4.774435985303043, + "tokens_seen": 104071168 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009781736478895844, + "loss": 0.1605, + "theoretical_loss": 4.772790324367482, + "tokens_seen": 104333312 + }, + { + "epoch": 0.03, + "learning_rate": 0.000978093403948002, + "loss": 0.1659, + "theoretical_loss": 4.771149947517346, + "tokens_seen": 104595456 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.006030434276908636, + "objective/train/docs_used": 45444, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.323787212371826, + "objective/train/original_loss": 3.323786973953247, + "objective/train/theoretical_loss": 4.769514824606178, + "objective/train/tokens_used": 125317600, + "objective/train/value_avg": -0.01244354248046875, + "objective/train/value_loss": 0.0006740050157532096, + "objective/train/value_max": -0.00269317626953125, + "objective/train/value_min": -0.1654052734375, + "objective/train/value_reward_corr": 0.42685833190639455, + "objective/train/value_std": 0.008056640625, + "objective/train/weight_avg": 1.0063230991363525, + "objective/train/weighted_lm_loss": 3.346615791320801, + "objective/train/weights_max": 1.1140156984329224, + "objective/train/weights_min": 0.3717136085033417, + "theoretical_loss": 4.769514824606178, + "tokens_seen": 104857600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009780131600064196, + "loss": 0.1615, + "theoretical_loss": 4.769514824606178, + "tokens_seen": 104857600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009779329160648372, + "loss": 0.1595, + "theoretical_loss": 4.7678849257343305, + "tokens_seen": 105119744 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009778526721232546, + "loss": 0.1621, + "theoretical_loss": 4.766260221246329, + "tokens_seen": 105381888 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009777724281816724, + "loss": 0.1613, + "theoretical_loss": 4.764640681728281, + "tokens_seen": 105644032 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009776921842400898, + "loss": 0.1644, + "theoretical_loss": 4.763026278005314, + "tokens_seen": 105906176 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009776119402985074, + "loss": 0.1608, + "theoretical_loss": 4.761416981139046, + "tokens_seen": 106168320 + }, + { + "epoch": 0.03, + "learning_rate": 0.000977531696356925, + "loss": 0.1626, + "theoretical_loss": 4.75981276242509, + "tokens_seen": 106430464 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009774514524153427, + "loss": 0.162, + "theoretical_loss": 4.758213593390595, + "tokens_seen": 106692608 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009773712084737603, + "loss": 0.1606, + "theoretical_loss": 4.756619445791808, + "tokens_seen": 106954752 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009772909645321779, + "loss": 0.1626, + "theoretical_loss": 4.755030291611678, + "tokens_seen": 107216896 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009772107205905955, + "loss": 0.1627, + "theoretical_loss": 4.753446103057492, + "tokens_seen": 107479040 + }, + { + "epoch": 0.03, + "learning_rate": 0.000977130476649013, + "loss": 0.1607, + "theoretical_loss": 4.751866852558529, + "tokens_seen": 107741184 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009770502327074307, + "loss": 0.1627, + "theoretical_loss": 4.7502925127637585, + "tokens_seen": 108003328 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.0075781201012432575, + "objective/train/docs_used": 46682, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.372586250305176, + "objective/train/original_loss": 3.372586727142334, + "objective/train/theoretical_loss": 4.749507175892756, + "objective/train/tokens_used": 128594400, + "objective/train/value_avg": -0.013763427734375, + "objective/train/value_loss": 0.0003684050461743027, + "objective/train/value_max": -0.0030040740966796875, + "objective/train/value_min": -0.1995849609375, + "objective/train/value_reward_corr": 0.3359668813555781, + "objective/train/value_std": 0.0087127685546875, + "objective/train/weight_avg": 1.007756233215332, + "objective/train/weighted_lm_loss": 3.40148663520813, + "objective/train/weights_max": 1.1865150928497314, + "objective/train/weights_min": 0.3717136085033417, + "theoretical_loss": 4.749507175892756, + "tokens_seen": 108134400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009769699887658481, + "loss": 0.1621, + "theoretical_loss": 4.7487230565395535, + "tokens_seen": 108265472 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009768897448242657, + "loss": 0.1611, + "theoretical_loss": 4.747158456967452, + "tokens_seen": 108527616 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009768095008826833, + "loss": 0.162, + "theoretical_loss": 4.745598687341927, + "tokens_seen": 108789760 + }, + { + "epoch": 0.03, + "learning_rate": 0.000976729256941101, + "loss": 0.1631, + "theoretical_loss": 4.744043721168196, + "tokens_seen": 109051904 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009766490129995186, + "loss": 0.1598, + "theoretical_loss": 4.74249353216006, + "tokens_seen": 109314048 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009765687690579362, + "loss": 0.1589, + "theoretical_loss": 4.740948094237761, + "tokens_seen": 109576192 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009764885251163538, + "loss": 0.1612, + "theoretical_loss": 4.739407381525874, + "tokens_seen": 109838336 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009764082811747713, + "loss": 0.1607, + "theoretical_loss": 4.7378713683512235, + "tokens_seen": 110100480 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009763280372331889, + "loss": 0.1598, + "theoretical_loss": 4.7363400292408215, + "tokens_seen": 110362624 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009762477932916065, + "loss": 0.1606, + "theoretical_loss": 4.734813338919842, + "tokens_seen": 110624768 + }, + { + "epoch": 0.03, + "learning_rate": 0.000976167549350024, + "loss": 0.1582, + "theoretical_loss": 4.733291272309609, + "tokens_seen": 110886912 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009760873054084417, + "loss": 0.1579, + "theoretical_loss": 4.731773804525616, + "tokens_seen": 111149056 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.004441777244210243, + "objective/train/docs_used": 47788, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.1115875244140625, + "objective/train/original_loss": 3.1115875244140625, + "objective/train/theoretical_loss": 4.73026091087557, + "objective/train/tokens_used": 131871200, + "objective/train/value_avg": -0.01221466064453125, + "objective/train/value_loss": 0.0004770817467942834, + "objective/train/value_max": -0.00266265869140625, + "objective/train/value_min": -0.118408203125, + "objective/train/value_reward_corr": 0.48949773977616423, + "objective/train/value_std": 0.007781982421875, + "objective/train/weight_avg": 1.0046648979187012, + "objective/train/weighted_lm_loss": 3.124300003051758, + "objective/train/weights_max": 1.0862188339233398, + "objective/train/weights_min": 0.3779332637786865, + "theoretical_loss": 4.73026091087557, + "tokens_seen": 111411200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009760070614668593, + "loss": 0.1564, + "theoretical_loss": 4.73026091087557, + "tokens_seen": 111411200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009759268175252769, + "loss": 0.1583, + "theoretical_loss": 4.728752566857459, + "tokens_seen": 111673344 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009758465735836945, + "loss": 0.1586, + "theoretical_loss": 4.72724874815764, + "tokens_seen": 111935488 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009757663296421121, + "loss": 0.1579, + "theoretical_loss": 4.725749430648958, + "tokens_seen": 112197632 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009756860857005296, + "loss": 0.1593, + "theoretical_loss": 4.724254590388881, + "tokens_seen": 112459776 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009756058417589472, + "loss": 0.1575, + "theoretical_loss": 4.722764203617663, + "tokens_seen": 112721920 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009755255978173648, + "loss": 0.159, + "theoretical_loss": 4.7212782467565235, + "tokens_seen": 112984064 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009754453538757823, + "loss": 0.1581, + "theoretical_loss": 4.719796696405858, + "tokens_seen": 113246208 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009753651099342, + "loss": 0.1561, + "theoretical_loss": 4.718319529343462, + "tokens_seen": 113508352 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009752848659926175, + "loss": 0.1577, + "theoretical_loss": 4.716846722522781, + "tokens_seen": 113770496 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009752046220510353, + "loss": 0.155, + "theoretical_loss": 4.715378253071181, + "tokens_seen": 114032640 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009751243781094528, + "loss": 0.1538, + "theoretical_loss": 4.713914098288242, + "tokens_seen": 114294784 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009750441341678703, + "loss": 0.1555, + "theoretical_loss": 4.712454235644064, + "tokens_seen": 114556928 + }, + { + "epoch": 0.03, + "objective/train/advantage_avg": 0.008879084140062332, + "objective/train/docs_used": 48994, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.0145037174224854, + "objective/train/original_loss": 3.0145039558410645, + "objective/train/theoretical_loss": 4.711725906879634, + "objective/train/tokens_used": 135148000, + "objective/train/value_avg": -0.0135955810546875, + "objective/train/value_loss": 0.0002984044549521059, + "objective/train/value_max": -0.0026531219482421875, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.3232125163801465, + "objective/train/value_std": 0.0081634521484375, + "objective/train/weight_avg": 1.0090208053588867, + "objective/train/weighted_lm_loss": 3.04550838470459, + "objective/train/weights_max": 1.097188949584961, + "objective/train/weights_min": 0.3715803325176239, + "theoretical_loss": 4.711725906879634, + "tokens_seen": 114688000 + }, + { + "epoch": 0.03, + "learning_rate": 0.000974963890226288, + "loss": 0.1568, + "theoretical_loss": 4.710998642777606, + "tokens_seen": 114819072 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009748836462847055, + "loss": 0.1594, + "theoretical_loss": 4.709547297495034, + "tokens_seen": 115081216 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009748034023431231, + "loss": 0.1531, + "theoretical_loss": 4.708100177768094, + "tokens_seen": 115343360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009747231584015407, + "loss": 0.156, + "theoretical_loss": 4.7066572617325075, + "tokens_seen": 115605504 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009746429144599583, + "loss": 0.159, + "theoretical_loss": 4.705218527686375, + "tokens_seen": 115867648 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009745626705183758, + "loss": 0.1583, + "theoretical_loss": 4.703783954088612, + "tokens_seen": 116129792 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009744824265767935, + "loss": 0.159, + "theoretical_loss": 4.702353519557398, + "tokens_seen": 116391936 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009744021826352111, + "loss": 0.1551, + "theoretical_loss": 4.700927202868639, + "tokens_seen": 116654080 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009743219386936286, + "loss": 0.1544, + "theoretical_loss": 4.699504982954452, + "tokens_seen": 116916224 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009742416947520463, + "loss": 0.1559, + "theoretical_loss": 4.698086838901676, + "tokens_seen": 117178368 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009741614508104638, + "loss": 0.154, + "theoretical_loss": 4.696672749950385, + "tokens_seen": 117440512 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009740812068688814, + "loss": 0.1536, + "theoretical_loss": 4.695262695492428, + "tokens_seen": 117702656 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005216541700065136, + "objective/train/docs_used": 50256, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 3.161985158920288, + "objective/train/original_loss": 3.161985397338867, + "objective/train/theoretical_loss": 4.693856655069986, + "objective/train/tokens_used": 138424800, + "objective/train/value_avg": -0.013153076171875, + "objective/train/value_loss": 0.0008696132572367787, + "objective/train/value_max": -0.00238800048828125, + "objective/train/value_min": -0.26220703125, + "objective/train/value_reward_corr": 0.28899940519313116, + "objective/train/value_std": 0.008697509765625, + "objective/train/weight_avg": 1.0055670738220215, + "objective/train/weighted_lm_loss": 3.1820406913757324, + "objective/train/weights_max": 1.1238499879837036, + "objective/train/weights_min": 0.22635191679000854, + "theoretical_loss": 4.693856655069986, + "tokens_seen": 117964800 + }, + { + "epoch": 0.04, + "learning_rate": 0.000974000962927299, + "loss": 0.1545, + "theoretical_loss": 4.693856655069986, + "tokens_seen": 117964800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009739207189857165, + "loss": 0.1559, + "theoretical_loss": 4.692454608374145, + "tokens_seen": 118226944 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009738404750441343, + "loss": 0.1526, + "theoretical_loss": 4.6910565352434785, + "tokens_seen": 118489088 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009737602311025518, + "loss": 0.1531, + "theoretical_loss": 4.689662415662662, + "tokens_seen": 118751232 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009736799871609694, + "loss": 0.1525, + "theoretical_loss": 4.688272229761087, + "tokens_seen": 119013376 + }, + { + "epoch": 0.04, + "learning_rate": 0.000973599743219387, + "loss": 0.1542, + "theoretical_loss": 4.686885957811503, + "tokens_seen": 119275520 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009735194992778046, + "loss": 0.1505, + "theoretical_loss": 4.685503580228671, + "tokens_seen": 119537664 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009734392553362221, + "loss": 0.1508, + "theoretical_loss": 4.684125077568028, + "tokens_seen": 119799808 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009733590113946397, + "loss": 0.1548, + "theoretical_loss": 4.682750430524376, + "tokens_seen": 120061952 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009732787674530573, + "loss": 0.1532, + "theoretical_loss": 4.6813796199305795, + "tokens_seen": 120324096 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009731985235114748, + "loss": 0.1521, + "theoretical_loss": 4.68001262675628, + "tokens_seen": 120586240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009731182795698925, + "loss": 0.1479, + "theoretical_loss": 4.678649432106627, + "tokens_seen": 120848384 + }, + { + "epoch": 0.04, + "learning_rate": 0.00097303803562831, + "loss": 0.1518, + "theoretical_loss": 4.677290017221017, + "tokens_seen": 121110528 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.006032022647559643, + "objective/train/docs_used": 51395, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.9272947311401367, + "objective/train/original_loss": 2.9272947311401367, + "objective/train/theoretical_loss": 4.676611721363443, + "objective/train/tokens_used": 141701600, + "objective/train/value_avg": -0.01284027099609375, + "objective/train/value_loss": 0.000364342937245965, + "objective/train/value_max": -0.001987457275390625, + "objective/train/value_min": -0.30517578125, + "objective/train/value_reward_corr": 0.3808824724269897, + "objective/train/value_std": 0.010894775390625, + "objective/train/weight_avg": 1.0062044858932495, + "objective/train/weighted_lm_loss": 2.945836067199707, + "objective/train/weights_max": 1.2145042419433594, + "objective/train/weights_min": 0.3716398775577545, + "theoretical_loss": 4.676611721363443, + "tokens_seen": 121241600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009729577916867277, + "loss": 0.1482, + "theoretical_loss": 4.675934363471857, + "tokens_seen": 121372672 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009728775477451453, + "loss": 0.1491, + "theoretical_loss": 4.674582452363334, + "tokens_seen": 121634816 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009727973038035629, + "loss": 0.1481, + "theoretical_loss": 4.673234265530201, + "tokens_seen": 121896960 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009727170598619804, + "loss": 0.1474, + "theoretical_loss": 4.671889784736576, + "tokens_seen": 122159104 + }, + { + "epoch": 0.04, + "learning_rate": 0.000972636815920398, + "loss": 0.148, + "theoretical_loss": 4.670548991874758, + "tokens_seen": 122421248 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009725565719788156, + "loss": 0.1472, + "theoretical_loss": 4.669211868964052, + "tokens_seen": 122683392 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009724763280372332, + "loss": 0.1496, + "theoretical_loss": 4.66787839814961, + "tokens_seen": 122945536 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009723960840956508, + "loss": 0.1483, + "theoretical_loss": 4.666548561701285, + "tokens_seen": 123207680 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009723158401540683, + "loss": 0.1466, + "theoretical_loss": 4.665222342012491, + "tokens_seen": 123469824 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009722355962124861, + "loss": 0.1473, + "theoretical_loss": 4.663899721599093, + "tokens_seen": 123731968 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009721553522709036, + "loss": 0.1437, + "theoretical_loss": 4.6625806830982865, + "tokens_seen": 123994112 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009720751083293211, + "loss": 0.1438, + "theoretical_loss": 4.661265209267507, + "tokens_seen": 124256256 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005084542091935873, + "objective/train/docs_used": 52572, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7900288105010986, + "objective/train/original_loss": 2.7900285720825195, + "objective/train/theoretical_loss": 4.659953282983348, + "objective/train/tokens_used": 144978400, + "objective/train/value_avg": -0.01073455810546875, + "objective/train/value_loss": 0.00028898261371068656, + "objective/train/value_max": -0.001483917236328125, + "objective/train/value_min": -0.2239990234375, + "objective/train/value_reward_corr": 0.39944171540418233, + "objective/train/value_std": 0.008880615234375, + "objective/train/weight_avg": 1.00521981716156, + "objective/train/weighted_lm_loss": 2.8061976432800293, + "objective/train/weights_max": 1.1481295824050903, + "objective/train/weights_min": 0.3976996839046478, + "theoretical_loss": 4.659953282983348, + "tokens_seen": 124518400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009719948643877388, + "loss": 0.1466, + "theoretical_loss": 4.659953282983348, + "tokens_seen": 124518400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009719146204461563, + "loss": 0.1488, + "theoretical_loss": 4.658644887240481, + "tokens_seen": 124780544 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009718343765045739, + "loss": 0.1485, + "theoretical_loss": 4.657340005150602, + "tokens_seen": 125042688 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009717541325629915, + "loss": 0.1492, + "theoretical_loss": 4.656038619941382, + "tokens_seen": 125304832 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009716738886214091, + "loss": 0.1457, + "theoretical_loss": 4.654740714955429, + "tokens_seen": 125566976 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009715936446798266, + "loss": 0.145, + "theoretical_loss": 4.653446273649259, + "tokens_seen": 125829120 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009715134007382443, + "loss": 0.1478, + "theoretical_loss": 4.652155279592286, + "tokens_seen": 126091264 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009714331567966619, + "loss": 0.1463, + "theoretical_loss": 4.650867716465819, + "tokens_seen": 126353408 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009713529128550794, + "loss": 0.1463, + "theoretical_loss": 4.6495835680620665, + "tokens_seen": 126615552 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009712726689134971, + "loss": 0.1432, + "theoretical_loss": 4.648302818283158, + "tokens_seen": 126877696 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009711924249719146, + "loss": 0.1477, + "theoretical_loss": 4.64702545114017, + "tokens_seen": 127139840 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009711121810303323, + "loss": 0.1452, + "theoretical_loss": 4.645751450752172, + "tokens_seen": 127401984 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009710319370887498, + "loss": 0.1419, + "theoretical_loss": 4.644480801345268, + "tokens_seen": 127664128 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0065161967650055885, + "objective/train/docs_used": 53741, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8748562335968018, + "objective/train/original_loss": 2.8748559951782227, + "objective/train/theoretical_loss": 4.6438467283594225, + "objective/train/tokens_used": 148255200, + "objective/train/value_avg": -0.0144805908203125, + "objective/train/value_loss": 0.0003499473095871508, + "objective/train/value_max": -0.00269317626953125, + "objective/train/value_min": -0.36376953125, + "objective/train/value_reward_corr": 0.4529447418859527, + "objective/train/value_std": 0.01155853271484375, + "objective/train/weight_avg": 1.0066853761672974, + "objective/train/weighted_lm_loss": 2.8947949409484863, + "objective/train/weights_max": 1.2178692817687988, + "objective/train/weights_min": 0.37203988432884216, + "theoretical_loss": 4.6438467283594225, + "tokens_seen": 127795200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009709516931471673, + "loss": 0.1469, + "theoretical_loss": 4.643213487251664, + "tokens_seen": 127926272 + }, + { + "epoch": 0.04, + "learning_rate": 0.000970871449205585, + "loss": 0.1431, + "theoretical_loss": 4.641949492908737, + "tokens_seen": 128188416 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009707912052640026, + "loss": 0.1445, + "theoretical_loss": 4.640688802858113, + "tokens_seen": 128450560 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009707109613224202, + "loss": 0.1478, + "theoretical_loss": 4.63943140174476, + "tokens_seen": 128712704 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009706307173808378, + "loss": 0.1449, + "theoretical_loss": 4.6381772743160905, + "tokens_seen": 128974848 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009705504734392554, + "loss": 0.1467, + "theoretical_loss": 4.636926405421065, + "tokens_seen": 129236992 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009704702294976729, + "loss": 0.1425, + "theoretical_loss": 4.635678780009318, + "tokens_seen": 129499136 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009703899855560905, + "loss": 0.1433, + "theoretical_loss": 4.634434383130284, + "tokens_seen": 129761280 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009703097416145081, + "loss": 0.1452, + "theoretical_loss": 4.633193199932336, + "tokens_seen": 130023424 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009702294976729256, + "loss": 0.145, + "theoretical_loss": 4.631955215661932, + "tokens_seen": 130285568 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009701492537313433, + "loss": 0.1436, + "theoretical_loss": 4.630720415662774, + "tokens_seen": 130547712 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009700690097897609, + "loss": 0.1374, + "theoretical_loss": 4.629488785374969, + "tokens_seen": 130809856 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005413413047790527, + "objective/train/docs_used": 54897, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.7717833518981934, + "objective/train/original_loss": 2.7717831134796143, + "objective/train/theoretical_loss": 4.628260310334209, + "objective/train/tokens_used": 151532000, + "objective/train/value_avg": -0.012939453125, + "objective/train/value_loss": 0.0007990457233972847, + "objective/train/value_max": -0.002124786376953125, + "objective/train/value_min": -0.2451171875, + "objective/train/value_reward_corr": 0.2380011948215656, + "objective/train/value_std": 0.01189422607421875, + "objective/train/weight_avg": 1.0057445764541626, + "objective/train/weighted_lm_loss": 2.7883291244506836, + "objective/train/weights_max": 1.2066720724105835, + "objective/train/weights_min": 0.22692078351974487, + "theoretical_loss": 4.628260310334209, + "tokens_seen": 131072000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009699887658481786, + "loss": 0.1404, + "theoretical_loss": 4.628260310334209, + "tokens_seen": 131072000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009699085219065961, + "loss": 0.1406, + "theoretical_loss": 4.6270349761709495, + "tokens_seen": 131334144 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009698282779650137, + "loss": 0.1373, + "theoretical_loss": 4.625812768609601, + "tokens_seen": 131596288 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009697480340234313, + "loss": 0.1405, + "theoretical_loss": 4.62459367346773, + "tokens_seen": 131858432 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009696677900818488, + "loss": 0.1395, + "theoretical_loss": 4.623377676655271, + "tokens_seen": 132120576 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009695875461402664, + "loss": 0.1398, + "theoretical_loss": 4.622164764173735, + "tokens_seen": 132382720 + }, + { + "epoch": 0.04, + "learning_rate": 0.000969507302198684, + "loss": 0.1402, + "theoretical_loss": 4.62095492211544, + "tokens_seen": 132644864 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009694270582571016, + "loss": 0.1464, + "theoretical_loss": 4.6197481366627455, + "tokens_seen": 132907008 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009693468143155191, + "loss": 0.1408, + "theoretical_loss": 4.618544394087287, + "tokens_seen": 133169152 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009692665703739369, + "loss": 0.1341, + "theoretical_loss": 4.617343680749233, + "tokens_seen": 133431296 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009691863264323544, + "loss": 0.1365, + "theoretical_loss": 4.61614598309653, + "tokens_seen": 133693440 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009691060824907719, + "loss": 0.1375, + "theoretical_loss": 4.614951287664179, + "tokens_seen": 133955584 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009690258385491896, + "loss": 0.139, + "theoretical_loss": 4.613759581073502, + "tokens_seen": 134217728 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.005763855762779713, + "objective/train/docs_used": 56110, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6633012294769287, + "objective/train/original_loss": 2.663301467895508, + "objective/train/theoretical_loss": 4.613164844436379, + "objective/train/tokens_used": 154808800, + "objective/train/value_avg": -0.01210784912109375, + "objective/train/value_loss": 0.00031003085314296186, + "objective/train/value_max": -0.0025310516357421875, + "objective/train/value_min": -0.236572265625, + "objective/train/value_reward_corr": 0.1698134460465261, + "objective/train/value_std": 0.0075225830078125, + "objective/train/weight_avg": 1.0059170722961426, + "objective/train/weighted_lm_loss": 2.677839756011963, + "objective/train/weights_max": 1.1739808320999146, + "objective/train/weights_min": 0.7241759300231934, + "theoretical_loss": 4.613164844436379, + "tokens_seen": 134348800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009689455946076071, + "loss": 0.1382, + "theoretical_loss": 4.612570850031418, + "tokens_seen": 134479872 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009688653506660247, + "loss": 0.1358, + "theoretical_loss": 4.611385081329736, + "tokens_seen": 134742016 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009687851067244423, + "loss": 0.1363, + "theoretical_loss": 4.610202261844444, + "tokens_seen": 135004160 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009687048627828599, + "loss": 0.1375, + "theoretical_loss": 4.6090223785350135, + "tokens_seen": 135266304 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009686246188412776, + "loss": 0.1364, + "theoretical_loss": 4.607845418443706, + "tokens_seen": 135528448 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009685443748996951, + "loss": 0.1384, + "theoretical_loss": 4.606671368694888, + "tokens_seen": 135790592 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009684641309581127, + "loss": 0.1369, + "theoretical_loss": 4.6055002164943595, + "tokens_seen": 136052736 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009683838870165303, + "loss": 0.1321, + "theoretical_loss": 4.604331949128672, + "tokens_seen": 136314880 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009683036430749479, + "loss": 0.1365, + "theoretical_loss": 4.603166553964474, + "tokens_seen": 136577024 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009682233991333654, + "loss": 0.1396, + "theoretical_loss": 4.60200401844785, + "tokens_seen": 136839168 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009681431551917831, + "loss": 0.133, + "theoretical_loss": 4.6008443301036746, + "tokens_seen": 137101312 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009680629112502006, + "loss": 0.1364, + "theoretical_loss": 4.5996874765349585, + "tokens_seen": 137363456 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.0023926738649606705, + "objective/train/docs_used": 57180, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.8781347274780273, + "objective/train/original_loss": 2.8781347274780273, + "objective/train/theoretical_loss": 4.598533445422221, + "objective/train/tokens_used": 158085600, + "objective/train/value_avg": -0.0099029541015625, + "objective/train/value_loss": 0.00033420659019611776, + "objective/train/value_max": -0.0016613006591796875, + "objective/train/value_min": -0.1622314453125, + "objective/train/value_reward_corr": 0.23598254350018186, + "objective/train/value_std": 0.00702667236328125, + "objective/train/weight_avg": 1.0025557279586792, + "objective/train/weighted_lm_loss": 2.8859775066375732, + "objective/train/weights_max": 1.1505653858184814, + "objective/train/weights_min": 0.7221318483352661, + "theoretical_loss": 4.598533445422221, + "tokens_seen": 137625600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009679826673086181, + "loss": 0.1361, + "theoretical_loss": 4.598533445422221, + "tokens_seen": 137625600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009679024233670359, + "loss": 0.1347, + "theoretical_loss": 4.597382224522855, + "tokens_seen": 137887744 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009678221794254534, + "loss": 0.1362, + "theoretical_loss": 4.596233801670502, + "tokens_seen": 138149888 + }, + { + "epoch": 0.04, + "learning_rate": 0.000967741935483871, + "loss": 0.1346, + "theoretical_loss": 4.595088164774435, + "tokens_seen": 138412032 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009676616915422886, + "loss": 0.1367, + "theoretical_loss": 4.593945301818941, + "tokens_seen": 138674176 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009675814476007062, + "loss": 0.1307, + "theoretical_loss": 4.592805200862726, + "tokens_seen": 138936320 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009675012036591238, + "loss": 0.133, + "theoretical_loss": 4.591667850038302, + "tokens_seen": 139198464 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009674209597175413, + "loss": 0.1317, + "theoretical_loss": 4.590533237551401, + "tokens_seen": 139460608 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009673407157759589, + "loss": 0.1315, + "theoretical_loss": 4.589401351680385, + "tokens_seen": 139722752 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009672604718343765, + "loss": 0.1337, + "theoretical_loss": 4.588272180775659, + "tokens_seen": 139984896 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009671802278927941, + "loss": 0.1296, + "theoretical_loss": 4.587145713259102, + "tokens_seen": 140247040 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009670999839512117, + "loss": 0.1301, + "theoretical_loss": 4.5860219376234905, + "tokens_seen": 140509184 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009670197400096294, + "loss": 0.1319, + "theoretical_loss": 4.584900842431934, + "tokens_seen": 140771328 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.006780576426535845, + "objective/train/docs_used": 58312, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.43525767326355, + "objective/train/original_loss": 2.43525767326355, + "objective/train/theoretical_loss": 4.584341296447742, + "objective/train/tokens_used": 161362400, + "objective/train/value_avg": -0.010894775390625, + "objective/train/value_loss": 0.00027257209876552224, + "objective/train/value_max": -0.0015916824340820312, + "objective/train/value_min": -0.367919921875, + "objective/train/value_reward_corr": 0.26859711054421476, + "objective/train/value_std": 0.00720977783203125, + "objective/train/weight_avg": 1.0069042444229126, + "objective/train/weighted_lm_loss": 2.4517366886138916, + "objective/train/weights_max": 1.1828418970108032, + "objective/train/weights_min": 0.3722556531429291, + "theoretical_loss": 4.584341296447742, + "tokens_seen": 140902400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009669394960680469, + "loss": 0.1284, + "theoretical_loss": 4.583782416317316, + "tokens_seen": 141033472 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009668592521264644, + "loss": 0.1302, + "theoretical_loss": 4.582666647981739, + "tokens_seen": 141295616 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009667790081848821, + "loss": 0.128, + "theoretical_loss": 4.581553526195974, + "tokens_seen": 141557760 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009666987642432996, + "loss": 0.1304, + "theoretical_loss": 4.580443039798922, + "tokens_seen": 141819904 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009666185203017172, + "loss": 0.1305, + "theoretical_loss": 4.57933517769707, + "tokens_seen": 142082048 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009665382763601348, + "loss": 0.1314, + "theoretical_loss": 4.578229928863959, + "tokens_seen": 142344192 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009664580324185524, + "loss": 0.1286, + "theoretical_loss": 4.57712728233966, + "tokens_seen": 142606336 + }, + { + "epoch": 0.04, + "learning_rate": 0.00096637778847697, + "loss": 0.127, + "theoretical_loss": 4.576027227230245, + "tokens_seen": 142868480 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009662975445353877, + "loss": 0.1315, + "theoretical_loss": 4.574929752707274, + "tokens_seen": 143130624 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009662173005938052, + "loss": 0.128, + "theoretical_loss": 4.573834848007284, + "tokens_seen": 143392768 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009661370566522228, + "loss": 0.1269, + "theoretical_loss": 4.572742502431272, + "tokens_seen": 143654912 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009660568127106404, + "loss": 0.1264, + "theoretical_loss": 4.571652705344202, + "tokens_seen": 143917056 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.003318200586363673, + "objective/train/docs_used": 59583, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.476109027862549, + "objective/train/original_loss": 2.476109027862549, + "objective/train/theoretical_loss": 4.570565446174504, + "objective/train/tokens_used": 164639200, + "objective/train/value_avg": -0.0129547119140625, + "objective/train/value_loss": 0.0007415720028802752, + "objective/train/value_max": -0.0013942718505859375, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.4452921464992741, + "objective/train/value_std": 0.01247406005859375, + "objective/train/weight_avg": 1.0036523342132568, + "objective/train/weighted_lm_loss": 2.485182285308838, + "objective/train/weights_max": 1.1576626300811768, + "objective/train/weights_min": 0.37203988432884216, + "theoretical_loss": 4.570565446174504, + "tokens_seen": 144179200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009659765687690579, + "loss": 0.1281, + "theoretical_loss": 4.570565446174504, + "tokens_seen": 144179200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009658963248274756, + "loss": 0.1276, + "theoretical_loss": 4.569480714413578, + "tokens_seen": 144441344 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009658160808858931, + "loss": 0.1301, + "theoretical_loss": 4.568398499615305, + "tokens_seen": 144703488 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009657358369443107, + "loss": 0.1291, + "theoretical_loss": 4.56731879139557, + "tokens_seen": 144965632 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009656555930027284, + "loss": 0.1264, + "theoretical_loss": 4.566241579431776, + "tokens_seen": 145227776 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009655753490611459, + "loss": 0.1285, + "theoretical_loss": 4.565166853462371, + "tokens_seen": 145489920 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009654951051195635, + "loss": 0.13, + "theoretical_loss": 4.564094603286375, + "tokens_seen": 145752064 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009654148611779811, + "loss": 0.1248, + "theoretical_loss": 4.5630248187629245, + "tokens_seen": 146014208 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009653346172363987, + "loss": 0.1261, + "theoretical_loss": 4.561957489810798, + "tokens_seen": 146276352 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009652543732948162, + "loss": 0.1243, + "theoretical_loss": 4.5608926064079665, + "tokens_seen": 146538496 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009651741293532339, + "loss": 0.1281, + "theoretical_loss": 4.559830158591139, + "tokens_seen": 146800640 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009650938854116514, + "loss": 0.1229, + "theoretical_loss": 4.558770136455316, + "tokens_seen": 147062784 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009650136414700689, + "loss": 0.1253, + "theoretical_loss": 4.557712530153342, + "tokens_seen": 147324928 + }, + { + "epoch": 0.04, + "objective/train/advantage_avg": 0.002189334249123931, + "objective/train/docs_used": 60747, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.354844570159912, + "objective/train/original_loss": 2.354844570159912, + "objective/train/theoretical_loss": 4.557184629878522, + "objective/train/tokens_used": 167916000, + "objective/train/value_avg": -0.00820159912109375, + "objective/train/value_loss": 0.0003651907609310001, + "objective/train/value_max": -0.001239776611328125, + "objective/train/value_min": -0.23779296875, + "objective/train/value_reward_corr": 0.19971720203272725, + "objective/train/value_std": 0.00504302978515625, + "objective/train/weight_avg": 1.0023622512817383, + "objective/train/weighted_lm_loss": 2.3594002723693848, + "objective/train/weights_max": 1.057178020477295, + "objective/train/weights_min": 0.6187294125556946, + "theoretical_loss": 4.557184629878522, + "tokens_seen": 147456000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009649333975284866, + "loss": 0.1229, + "theoretical_loss": 4.556657329895469, + "tokens_seen": 147587072 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009648531535869042, + "loss": 0.1265, + "theoretical_loss": 4.5556045259489135, + "tokens_seen": 147849216 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009647729096453219, + "loss": 0.1269, + "theoretical_loss": 4.554554108637437, + "tokens_seen": 148111360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009646926657037394, + "loss": 0.1242, + "theoretical_loss": 4.553506068340907, + "tokens_seen": 148373504 + }, + { + "epoch": 0.05, + "learning_rate": 0.000964612421762157, + "loss": 0.1229, + "theoretical_loss": 4.552460395494878, + "tokens_seen": 148635648 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009645321778205746, + "loss": 0.1247, + "theoretical_loss": 4.55141708059017, + "tokens_seen": 148897792 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009644519338789921, + "loss": 0.1208, + "theoretical_loss": 4.5503761141724555, + "tokens_seen": 149159936 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009643716899374097, + "loss": 0.1224, + "theoretical_loss": 4.549337486841843, + "tokens_seen": 149422080 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009642914459958273, + "loss": 0.1231, + "theoretical_loss": 4.548301189252473, + "tokens_seen": 149684224 + }, + { + "epoch": 0.05, + "learning_rate": 0.000964211202054245, + "loss": 0.124, + "theoretical_loss": 4.54726721211211, + "tokens_seen": 149946368 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009641309581126625, + "loss": 0.1215, + "theoretical_loss": 4.546235546181743, + "tokens_seen": 150208512 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009640507141710802, + "loss": 0.1179, + "theoretical_loss": 4.545206182275189, + "tokens_seen": 150470656 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004590987227857113, + "objective/train/docs_used": 61752, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.4665567874908447, + "objective/train/original_loss": 2.466557025909424, + "objective/train/theoretical_loss": 4.5441791112587016, + "objective/train/tokens_used": 171192800, + "objective/train/value_avg": -0.01053619384765625, + "objective/train/value_loss": 0.0002645233762450516, + "objective/train/value_max": -0.00133514404296875, + "objective/train/value_min": -0.203369140625, + "objective/train/value_reward_corr": 0.45074871885232737, + "objective/train/value_std": 0.0080718994140625, + "objective/train/weight_avg": 1.004720687866211, + "objective/train/weighted_lm_loss": 2.478607177734375, + "objective/train/weights_max": 1.1166024208068848, + "objective/train/weights_min": 0.6107982993125916, + "theoretical_loss": 4.5441791112587016, + "tokens_seen": 150732800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009639704702294977, + "loss": 0.1199, + "theoretical_loss": 4.5441791112587016, + "tokens_seen": 150732800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009638902262879152, + "loss": 0.1258, + "theoretical_loss": 4.5431543240505725, + "tokens_seen": 150994944 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009638099823463329, + "loss": 0.123, + "theoretical_loss": 4.5421318116207585, + "tokens_seen": 151257088 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009637297384047504, + "loss": 0.1184, + "theoretical_loss": 4.541111564990485, + "tokens_seen": 151519232 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009636494944631681, + "loss": 0.1201, + "theoretical_loss": 4.540093575231879, + "tokens_seen": 151781376 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009635692505215856, + "loss": 0.1185, + "theoretical_loss": 4.539077833467582, + "tokens_seen": 152043520 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009634890065800032, + "loss": 0.12, + "theoretical_loss": 4.538064330870389, + "tokens_seen": 152305664 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009634087626384209, + "loss": 0.1218, + "theoretical_loss": 4.537053058662869, + "tokens_seen": 152567808 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009633285186968385, + "loss": 0.1205, + "theoretical_loss": 4.536044008117005, + "tokens_seen": 152829952 + }, + { + "epoch": 0.05, + "learning_rate": 0.000963248274755256, + "loss": 0.124, + "theoretical_loss": 4.535037170553833, + "tokens_seen": 153092096 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009631680308136736, + "loss": 0.1195, + "theoretical_loss": 4.534032537343078, + "tokens_seen": 153354240 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009630877868720912, + "loss": 0.1228, + "theoretical_loss": 4.533030099902803, + "tokens_seen": 153616384 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009630075429305087, + "loss": 0.1237, + "theoretical_loss": 4.53202984969905, + "tokens_seen": 153878528 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004563726019114256, + "objective/train/docs_used": 63027, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.6050822734832764, + "objective/train/original_loss": 2.6050822734832764, + "objective/train/theoretical_loss": 4.531530542157043, + "objective/train/tokens_used": 174469600, + "objective/train/value_avg": -0.00949859619140625, + "objective/train/value_loss": 0.0006905548507347703, + "objective/train/value_max": -0.0010986328125, + "objective/train/value_min": -0.29541015625, + "objective/train/value_reward_corr": 0.3721653551526731, + "objective/train/value_std": 0.00933074951171875, + "objective/train/weight_avg": 1.004849910736084, + "objective/train/weighted_lm_loss": 2.618398904800415, + "objective/train/weights_max": 1.2023754119873047, + "objective/train/weights_min": 0.36947569251060486, + "theoretical_loss": 4.531530542157043, + "tokens_seen": 154009600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009629272989889264, + "loss": 0.1251, + "theoretical_loss": 4.531031778245499, + "tokens_seen": 154140672 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009628470550473439, + "loss": 0.1225, + "theoretical_loss": 4.530035877103115, + "tokens_seen": 154402816 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009627668111057615, + "loss": 0.1218, + "theoretical_loss": 4.529042137879809, + "tokens_seen": 154664960 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009626865671641792, + "loss": 0.1192, + "theoretical_loss": 4.528050552230092, + "tokens_seen": 154927104 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009626063232225967, + "loss": 0.1212, + "theoretical_loss": 4.527061111854746, + "tokens_seen": 155189248 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009625260792810143, + "loss": 0.1221, + "theoretical_loss": 4.526073808500481, + "tokens_seen": 155451392 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009624458353394319, + "loss": 0.1189, + "theoretical_loss": 4.525088633959613, + "tokens_seen": 155713536 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009623655913978495, + "loss": 0.1184, + "theoretical_loss": 4.524105580069728, + "tokens_seen": 155975680 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009622853474562671, + "loss": 0.1185, + "theoretical_loss": 4.523124638713361, + "tokens_seen": 156237824 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009622051035146847, + "loss": 0.1208, + "theoretical_loss": 4.522145801817673, + "tokens_seen": 156499968 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009621248595731022, + "loss": 0.1158, + "theoretical_loss": 4.521169061354129, + "tokens_seen": 156762112 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009620446156315198, + "loss": 0.1177, + "theoretical_loss": 4.520194409338185, + "tokens_seen": 157024256 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004033830948174, + "objective/train/docs_used": 64097, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.5532474517822266, + "objective/train/original_loss": 2.5532474517822266, + "objective/train/theoretical_loss": 4.519221837828971, + "objective/train/tokens_used": 177746400, + "objective/train/value_avg": -0.01045989990234375, + "objective/train/value_loss": 0.00029761483892798424, + "objective/train/value_max": -0.0013408660888671875, + "objective/train/value_min": -0.1632080078125, + "objective/train/value_reward_corr": 0.47638880122386174, + "objective/train/value_std": 0.006755828857421875, + "objective/train/weight_avg": 1.0041792392730713, + "objective/train/weighted_lm_loss": 2.564786434173584, + "objective/train/weights_max": 1.0706846714019775, + "objective/train/weights_min": 0.7207310795783997, + "theoretical_loss": 4.519221837828971, + "tokens_seen": 157286400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009619643716899374, + "loss": 0.1201, + "theoretical_loss": 4.519221837828971, + "tokens_seen": 157286400 + }, + { + "epoch": 0.05, + "learning_rate": 0.000961884127748355, + "loss": 0.1197, + "theoretical_loss": 4.51825133892898, + "tokens_seen": 157548544 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009618038838067727, + "loss": 0.1175, + "theoretical_loss": 4.517282904783764, + "tokens_seen": 157810688 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009617236398651902, + "loss": 0.1204, + "theoretical_loss": 4.516316527581621, + "tokens_seen": 158072832 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009616433959236078, + "loss": 0.1225, + "theoretical_loss": 4.515352199553295, + "tokens_seen": 158334976 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009615631519820254, + "loss": 0.1203, + "theoretical_loss": 4.514389912971679, + "tokens_seen": 158597120 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009614829080404429, + "loss": 0.1203, + "theoretical_loss": 4.513429660151513, + "tokens_seen": 158859264 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009614026640988605, + "loss": 0.1171, + "theoretical_loss": 4.51247143344909, + "tokens_seen": 159121408 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009613224201572781, + "loss": 0.12, + "theoretical_loss": 4.511515225261961, + "tokens_seen": 159383552 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009612421762156957, + "loss": 0.1183, + "theoretical_loss": 4.5105610280286506, + "tokens_seen": 159645696 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009611619322741134, + "loss": 0.1196, + "theoretical_loss": 4.509608834228365, + "tokens_seen": 159907840 + }, + { + "epoch": 0.05, + "learning_rate": 0.000961081688332531, + "loss": 0.1155, + "theoretical_loss": 4.508658636380705, + "tokens_seen": 160169984 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009610014443909485, + "loss": 0.1206, + "theoretical_loss": 4.507710427045389, + "tokens_seen": 160432128 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0053785815834999084, + "objective/train/docs_used": 65316, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.3940212726593018, + "objective/train/original_loss": 2.3940210342407227, + "objective/train/theoretical_loss": 4.507237065755672, + "objective/train/tokens_used": 181023200, + "objective/train/value_avg": -0.00850677490234375, + "objective/train/value_loss": 0.00017240087618120015, + "objective/train/value_max": -0.0016107559204101562, + "objective/train/value_min": -0.1729736328125, + "objective/train/value_reward_corr": 0.16862680553399217, + "objective/train/value_std": 0.005374908447265625, + "objective/train/weight_avg": 1.005456805229187, + "objective/train/weighted_lm_loss": 2.4071598052978516, + "objective/train/weights_max": 1.0916688442230225, + "objective/train/weights_min": 0.370330810546875, + "theoretical_loss": 4.507237065755672, + "tokens_seen": 160563200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009609212004493661, + "loss": 0.1181, + "theoretical_loss": 4.50676419882197, + "tokens_seen": 160694272 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009608409565077837, + "loss": 0.1183, + "theoretical_loss": 4.505819944349556, + "tokens_seen": 160956416 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009607607125662012, + "loss": 0.1155, + "theoretical_loss": 4.504877656306535, + "tokens_seen": 161218560 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009606804686246189, + "loss": 0.1189, + "theoretical_loss": 4.503937327410306, + "tokens_seen": 161480704 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009606002246830364, + "loss": 0.1185, + "theoretical_loss": 4.502998950417004, + "tokens_seen": 161742848 + }, + { + "epoch": 0.05, + "learning_rate": 0.000960519980741454, + "loss": 0.1188, + "theoretical_loss": 4.502062518121232, + "tokens_seen": 162004992 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009604397367998717, + "loss": 0.1163, + "theoretical_loss": 4.501128023355796, + "tokens_seen": 162267136 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009603594928582893, + "loss": 0.1164, + "theoretical_loss": 4.500195458991443, + "tokens_seen": 162529280 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009602792489167068, + "loss": 0.1192, + "theoretical_loss": 4.499264817936593, + "tokens_seen": 162791424 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009601990049751244, + "loss": 0.1209, + "theoretical_loss": 4.498336093137089, + "tokens_seen": 163053568 + }, + { + "epoch": 0.05, + "learning_rate": 0.000960118761033542, + "loss": 0.1193, + "theoretical_loss": 4.49740927757593, + "tokens_seen": 163315712 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009600385170919595, + "loss": 0.1164, + "theoretical_loss": 4.496484364273021, + "tokens_seen": 163577856 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.00311545399017632, + "objective/train/docs_used": 66569, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.244389295578003, + "objective/train/original_loss": 2.244389533996582, + "objective/train/theoretical_loss": 4.495561346284925, + "objective/train/tokens_used": 184300000, + "objective/train/value_avg": -0.00873565673828125, + "objective/train/value_loss": 0.00021333472977858037, + "objective/train/value_max": -0.0010862350463867188, + "objective/train/value_min": -0.260009765625, + "objective/train/value_reward_corr": 0.3512147373222607, + "objective/train/value_std": 0.00701904296875, + "objective/train/weight_avg": 1.0032168626785278, + "objective/train/weighted_lm_loss": 2.2516353130340576, + "objective/train/weights_max": 1.1541095972061157, + "objective/train/weights_min": 0.3705597221851349, + "theoretical_loss": 4.495561346284925, + "tokens_seen": 163840000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009599582731503772, + "loss": 0.1161, + "theoretical_loss": 4.495561346284925, + "tokens_seen": 163840000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009598780292087947, + "loss": 0.1176, + "theoretical_loss": 4.494640216704598, + "tokens_seen": 164102144 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009597977852672124, + "loss": 0.1193, + "theoretical_loss": 4.493720968661158, + "tokens_seen": 164364288 + }, + { + "epoch": 0.05, + "learning_rate": 0.00095971754132563, + "loss": 0.116, + "theoretical_loss": 4.492803595319623, + "tokens_seen": 164626432 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009596372973840475, + "loss": 0.1149, + "theoretical_loss": 4.49188808988068, + "tokens_seen": 164888576 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009595570534424652, + "loss": 0.1118, + "theoretical_loss": 4.490974445580429, + "tokens_seen": 165150720 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009594768095008827, + "loss": 0.113, + "theoretical_loss": 4.490062655690153, + "tokens_seen": 165412864 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009593965655593003, + "loss": 0.113, + "theoretical_loss": 4.489152713516077, + "tokens_seen": 165675008 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009593163216177179, + "loss": 0.1102, + "theoretical_loss": 4.488244612399129, + "tokens_seen": 165937152 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009592360776761355, + "loss": 0.1119, + "theoretical_loss": 4.487338345714707, + "tokens_seen": 166199296 + }, + { + "epoch": 0.05, + "learning_rate": 0.000959155833734553, + "loss": 0.1138, + "theoretical_loss": 4.486433906872448, + "tokens_seen": 166461440 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009590755897929706, + "loss": 0.1152, + "theoretical_loss": 4.485531289315997, + "tokens_seen": 166723584 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009589953458513882, + "loss": 0.1099, + "theoretical_loss": 4.484630486522775, + "tokens_seen": 166985728 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.002676489297300577, + "objective/train/docs_used": 67770, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.380342483520508, + "objective/train/original_loss": 2.380342483520508, + "objective/train/theoretical_loss": 4.48418076363325, + "objective/train/tokens_used": 187576800, + "objective/train/value_avg": -0.01169586181640625, + "objective/train/value_loss": 0.0005142788286320865, + "objective/train/value_max": -0.0011920928955078125, + "objective/train/value_min": -0.332763671875, + "objective/train/value_reward_corr": 0.49718080147400245, + "objective/train/value_std": 0.01336669921875, + "objective/train/weight_avg": 1.0029011964797974, + "objective/train/weighted_lm_loss": 2.385775327682495, + "objective/train/weights_max": 1.2041622400283813, + "objective/train/weights_min": 0.39096081256866455, + "theoretical_loss": 4.48418076363325, + "tokens_seen": 167116800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009589151019098058, + "loss": 0.117, + "theoretical_loss": 4.483731492003757, + "tokens_seen": 167247872 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009588348579682235, + "loss": 0.113, + "theoretical_loss": 4.482834299303246, + "tokens_seen": 167510016 + }, + { + "epoch": 0.05, + "learning_rate": 0.000958754614026641, + "loss": 0.1132, + "theoretical_loss": 4.481938901998647, + "tokens_seen": 167772160 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009586743700850587, + "loss": 0.1124, + "theoretical_loss": 4.481045293700248, + "tokens_seen": 168034304 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009585941261434762, + "loss": 0.1135, + "theoretical_loss": 4.480153468051001, + "tokens_seen": 168296448 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009585138822018937, + "loss": 0.1152, + "theoretical_loss": 4.4792634187263065, + "tokens_seen": 168558592 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009584336382603114, + "loss": 0.1142, + "theoretical_loss": 4.4783751394337905, + "tokens_seen": 168820736 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009583533943187289, + "loss": 0.1093, + "theoretical_loss": 4.4774886239131, + "tokens_seen": 169082880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009582731503771465, + "loss": 0.116, + "theoretical_loss": 4.476603865935683, + "tokens_seen": 169345024 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009581929064355642, + "loss": 0.1161, + "theoretical_loss": 4.475720859304583, + "tokens_seen": 169607168 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009581126624939818, + "loss": 0.1115, + "theoretical_loss": 4.474839597854226, + "tokens_seen": 169869312 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009580324185523993, + "loss": 0.1149, + "theoretical_loss": 4.473960075450218, + "tokens_seen": 170131456 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.005700128152966499, + "objective/train/docs_used": 69022, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.486732006072998, + "objective/train/original_loss": 2.486732006072998, + "objective/train/theoretical_loss": 4.473082285989134, + "objective/train/tokens_used": 190853600, + "objective/train/value_avg": -0.0110015869140625, + "objective/train/value_loss": 0.0002670148969627917, + "objective/train/value_max": -0.0014438629150390625, + "objective/train/value_min": -0.30224609375, + "objective/train/value_reward_corr": 0.4285667679812899, + "objective/train/value_std": 0.01032257080078125, + "objective/train/weight_avg": 1.0058281421661377, + "objective/train/weighted_lm_loss": 2.500220775604248, + "objective/train/weights_max": 1.1895605325698853, + "objective/train/weights_min": 0.3700695335865021, + "theoretical_loss": 4.473082285989134, + "tokens_seen": 170393600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009579521746108169, + "loss": 0.118, + "theoretical_loss": 4.473082285989134, + "tokens_seen": 170393600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009578719306692345, + "loss": 0.1144, + "theoretical_loss": 4.472206223398325, + "tokens_seen": 170655744 + }, + { + "epoch": 0.05, + "learning_rate": 0.000957791686727652, + "loss": 0.1134, + "theoretical_loss": 4.471331881635698, + "tokens_seen": 170917888 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009577114427860697, + "loss": 0.113, + "theoretical_loss": 4.470459254689533, + "tokens_seen": 171180032 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009576311988444872, + "loss": 0.1129, + "theoretical_loss": 4.469588336578277, + "tokens_seen": 171442176 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009575509549029048, + "loss": 0.1111, + "theoretical_loss": 4.468719121350343, + "tokens_seen": 171704320 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009574707109613225, + "loss": 0.1102, + "theoretical_loss": 4.467851603083923, + "tokens_seen": 171966464 + }, + { + "epoch": 0.05, + "learning_rate": 0.00095739046701974, + "loss": 0.1133, + "theoretical_loss": 4.466985775886784, + "tokens_seen": 172228608 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009573102230781577, + "loss": 0.1121, + "theoretical_loss": 4.466121633896087, + "tokens_seen": 172490752 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009572299791365752, + "loss": 0.1152, + "theoretical_loss": 4.465259171278182, + "tokens_seen": 172752896 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009571497351949928, + "loss": 0.1114, + "theoretical_loss": 4.464398382228435, + "tokens_seen": 173015040 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009570694912534104, + "loss": 0.1146, + "theoretical_loss": 4.463539260971023, + "tokens_seen": 173277184 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956989247311828, + "loss": 0.1145, + "theoretical_loss": 4.462681801758762, + "tokens_seen": 173539328 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0046385833993554115, + "objective/train/docs_used": 70257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.40885853767395, + "objective/train/original_loss": 2.408858299255371, + "objective/train/theoretical_loss": 4.462253693631352, + "objective/train/tokens_used": 194130400, + "objective/train/value_avg": -0.0102081298828125, + "objective/train/value_loss": 0.00034067677916027606, + "objective/train/value_max": -0.0015306472778320312, + "objective/train/value_min": -0.171630859375, + "objective/train/value_reward_corr": 0.4651631801087514, + "objective/train/value_std": 0.008270263671875, + "objective/train/weight_avg": 1.0047951936721802, + "objective/train/weighted_lm_loss": 2.420976161956787, + "objective/train/weights_max": 1.1363425254821777, + "objective/train/weights_min": 0.5437347888946533, + "theoretical_loss": 4.462253693631352, + "tokens_seen": 173670400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009569090033702455, + "loss": 0.1156, + "theoretical_loss": 4.461825998872914, + "tokens_seen": 173801472 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009568287594286632, + "loss": 0.1096, + "theoretical_loss": 4.460971846623005, + "tokens_seen": 174063616 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009567485154870808, + "loss": 0.1121, + "theoretical_loss": 4.460119339346643, + "tokens_seen": 174325760 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009566682715454983, + "loss": 0.1099, + "theoretical_loss": 4.45926847140934, + "tokens_seen": 174587904 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956588027603916, + "loss": 0.1113, + "theoretical_loss": 4.45841923720433, + "tokens_seen": 174850048 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009565077836623335, + "loss": 0.1171, + "theoretical_loss": 4.4575716311523905, + "tokens_seen": 175112192 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009564275397207511, + "loss": 0.1131, + "theoretical_loss": 4.456725647701669, + "tokens_seen": 175374336 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009563472957791687, + "loss": 0.1123, + "theoretical_loss": 4.455881281327508, + "tokens_seen": 175636480 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009562670518375863, + "loss": 0.1149, + "theoretical_loss": 4.4550385265322685, + "tokens_seen": 175898624 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009561868078960038, + "loss": 0.1116, + "theoretical_loss": 4.45419737784516, + "tokens_seen": 176160768 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009561065639544214, + "loss": 0.1141, + "theoretical_loss": 4.45335782982207, + "tokens_seen": 176422912 + }, + { + "epoch": 0.05, + "learning_rate": 0.000956026320012839, + "loss": 0.1142, + "theoretical_loss": 4.452519877045393, + "tokens_seen": 176685056 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.004688139073550701, + "objective/train/docs_used": 71338, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.032062292098999, + "objective/train/original_loss": 2.032062292098999, + "objective/train/theoretical_loss": 4.451683514123864, + "objective/train/tokens_used": 197407200, + "objective/train/value_avg": -0.01114654541015625, + "objective/train/value_loss": 0.00021585206559393555, + "objective/train/value_max": -0.00131988525390625, + "objective/train/value_min": -0.267822265625, + "objective/train/value_reward_corr": 0.5101379035115723, + "objective/train/value_std": 0.008941650390625, + "objective/train/weight_avg": 1.004790186882019, + "objective/train/weighted_lm_loss": 2.041867971420288, + "objective/train/weights_max": 1.1229585409164429, + "objective/train/weights_min": 0.36935028433799744, + "theoretical_loss": 4.451683514123864, + "tokens_seen": 176947200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009559460760712567, + "loss": 0.1141, + "theoretical_loss": 4.451683514123864, + "tokens_seen": 176947200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009558658321296743, + "loss": 0.1138, + "theoretical_loss": 4.450848735692391, + "tokens_seen": 177209344 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009557855881880918, + "loss": 0.1133, + "theoretical_loss": 4.450015536411886, + "tokens_seen": 177471488 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009557053442465095, + "loss": 0.1106, + "theoretical_loss": 4.449183910969108, + "tokens_seen": 177733632 + }, + { + "epoch": 0.05, + "learning_rate": 0.000955625100304927, + "loss": 0.1089, + "theoretical_loss": 4.448353854076494, + "tokens_seen": 177995776 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009555448563633445, + "loss": 0.1118, + "theoretical_loss": 4.4475253604719995, + "tokens_seen": 178257920 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009554646124217622, + "loss": 0.1077, + "theoretical_loss": 4.446698424918937, + "tokens_seen": 178520064 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009553843684801797, + "loss": 0.1118, + "theoretical_loss": 4.44587304220582, + "tokens_seen": 178782208 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009553041245385973, + "loss": 0.1074, + "theoretical_loss": 4.4450492071462, + "tokens_seen": 179044352 + }, + { + "epoch": 0.05, + "learning_rate": 0.000955223880597015, + "loss": 0.1145, + "theoretical_loss": 4.444226914578513, + "tokens_seen": 179306496 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009551436366554326, + "loss": 0.1149, + "theoretical_loss": 4.4434061593659235, + "tokens_seen": 179568640 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009550633927138501, + "loss": 0.116, + "theoretical_loss": 4.442586936396171, + "tokens_seen": 179830784 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009549831487722677, + "loss": 0.1145, + "theoretical_loss": 4.441769240581412, + "tokens_seen": 180092928 + }, + { + "epoch": 0.05, + "objective/train/advantage_avg": 0.0043923812918365, + "objective/train/docs_used": 72550, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.181901216506958, + "objective/train/original_loss": 2.181901454925537, + "objective/train/theoretical_loss": 4.441360963773846, + "objective/train/tokens_used": 200684000, + "objective/train/value_avg": -0.010162353515625, + "objective/train/value_loss": 0.00025610229931771755, + "objective/train/value_max": -0.0012645721435546875, + "objective/train/value_min": -0.2271728515625, + "objective/train/value_reward_corr": 0.5822503996716603, + "objective/train/value_std": 0.00917816162109375, + "objective/train/weight_avg": 1.0045078992843628, + "objective/train/weighted_lm_loss": 2.191445827484131, + "objective/train/weights_max": 1.0695090293884277, + "objective/train/weights_min": 0.3694573640823364, + "theoretical_loss": 4.441360963773846, + "tokens_seen": 180224000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009549029048306853, + "loss": 0.1193, + "theoretical_loss": 4.440953066858077, + "tokens_seen": 180355072 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009548226608891028, + "loss": 0.1133, + "theoretical_loss": 4.4401384101867105, + "tokens_seen": 180617216 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009547424169475205, + "loss": 0.1127, + "theoretical_loss": 4.439325265551826, + "tokens_seen": 180879360 + }, + { + "epoch": 0.05, + "learning_rate": 0.000954662173005938, + "loss": 0.1135, + "theoretical_loss": 4.438513627961757, + "tokens_seen": 181141504 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009545819290643557, + "loss": 0.1131, + "theoretical_loss": 4.437703492448509, + "tokens_seen": 181403648 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009545016851227733, + "loss": 0.1149, + "theoretical_loss": 4.436894854067614, + "tokens_seen": 181665792 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009544214411811908, + "loss": 0.1132, + "theoretical_loss": 4.436087707897984, + "tokens_seen": 181927936 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009543411972396085, + "loss": 0.1131, + "theoretical_loss": 4.435282049041769, + "tokens_seen": 182190080 + }, + { + "epoch": 0.06, + "learning_rate": 0.000954260953298026, + "loss": 0.1115, + "theoretical_loss": 4.434477872624212, + "tokens_seen": 182452224 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009541807093564436, + "loss": 0.1139, + "theoretical_loss": 4.433675173793507, + "tokens_seen": 182714368 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009541004654148612, + "loss": 0.1125, + "theoretical_loss": 4.43287394772066, + "tokens_seen": 182976512 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009540202214732788, + "loss": 0.1156, + "theoretical_loss": 4.43207418959935, + "tokens_seen": 183238656 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": -0.0008087606402114034, + "objective/train/docs_used": 73723, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.2917051315307617, + "objective/train/original_loss": 2.2917051315307617, + "objective/train/theoretical_loss": 4.431275894645784, + "objective/train/tokens_used": 203960800, + "objective/train/value_avg": -0.01041412353515625, + "objective/train/value_loss": 0.0009768769377842546, + "objective/train/value_max": -0.0010118484497070312, + "objective/train/value_min": -0.2391357421875, + "objective/train/value_reward_corr": 0.8343898472090797, + "objective/train/value_std": 0.0133514404296875, + "objective/train/weight_avg": 0.9996550679206848, + "objective/train/weighted_lm_loss": 2.28629732131958, + "objective/train/weights_max": 1.0813897848129272, + "objective/train/weights_min": 0.3707435429096222, + "theoretical_loss": 4.431275894645784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009539399775316963, + "loss": 0.1081, + "theoretical_loss": 4.431275894645784, + "tokens_seen": 183500800 + }, + { + "epoch": 0.06, + "learning_rate": 0.000953859733590114, + "loss": 0.1118, + "theoretical_loss": 4.43047905809857, + "tokens_seen": 183762944 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009537794896485315, + "loss": 0.1102, + "theoretical_loss": 4.42968367521857, + "tokens_seen": 184025088 + }, + { + "epoch": 0.06, + "learning_rate": 0.000953699245706949, + "loss": 0.1121, + "theoretical_loss": 4.428889741288771, + "tokens_seen": 184287232 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009536190017653668, + "loss": 0.1135, + "theoretical_loss": 4.428097251614145, + "tokens_seen": 184549376 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009535387578237843, + "loss": 0.1115, + "theoretical_loss": 4.427306201521524, + "tokens_seen": 184811520 + }, + { + "epoch": 0.06, + "learning_rate": 0.000953458513882202, + "loss": 0.1107, + "theoretical_loss": 4.426516586359458, + "tokens_seen": 185073664 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009533782699406195, + "loss": 0.1126, + "theoretical_loss": 4.425728401498089, + "tokens_seen": 185335808 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009532980259990371, + "loss": 0.1136, + "theoretical_loss": 4.424941642329019, + "tokens_seen": 185597952 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009532177820574547, + "loss": 0.1118, + "theoretical_loss": 4.42415630426518, + "tokens_seen": 185860096 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009531375381158722, + "loss": 0.1099, + "theoretical_loss": 4.423372382740707, + "tokens_seen": 186122240 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009530572941742898, + "loss": 0.1075, + "theoretical_loss": 4.422589873210806, + "tokens_seen": 186384384 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009529770502327075, + "loss": 0.1079, + "theoretical_loss": 4.4218087711516345, + "tokens_seen": 186646528 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0038929590955376625, + "objective/train/docs_used": 74904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.182293653488159, + "objective/train/original_loss": 2.18229341506958, + "objective/train/theoretical_loss": 4.421418746515567, + "objective/train/tokens_used": 207237600, + "objective/train/value_avg": -0.01084136962890625, + "objective/train/value_loss": 0.0002643898769747466, + "objective/train/value_max": -0.0011692047119140625, + "objective/train/value_min": -0.2230224609375, + "objective/train/value_reward_corr": 0.4553354681669406, + "objective/train/value_std": 0.0082855224609375, + "objective/train/weight_avg": 1.0040152072906494, + "objective/train/weighted_lm_loss": 2.1902849674224854, + "objective/train/weights_max": 1.0839669704437256, + "objective/train/weights_min": 0.3749292194843292, + "theoretical_loss": 4.421418746515567, + "tokens_seen": 186777600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009528968062911251, + "loss": 0.1131, + "theoretical_loss": 4.421029072060167, + "tokens_seen": 186908672 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009528165623495426, + "loss": 0.1097, + "theoretical_loss": 4.420250771454078, + "tokens_seen": 187170816 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009527363184079603, + "loss": 0.1131, + "theoretical_loss": 4.419473864871613, + "tokens_seen": 187432960 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009526560744663778, + "loss": 0.1132, + "theoretical_loss": 4.4186983478714685, + "tokens_seen": 187695104 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009525758305247953, + "loss": 0.1127, + "theoretical_loss": 4.417924216032667, + "tokens_seen": 187957248 + }, + { + "epoch": 0.06, + "learning_rate": 0.000952495586583213, + "loss": 0.111, + "theoretical_loss": 4.417151464954437, + "tokens_seen": 188219392 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009524153426416305, + "loss": 0.1102, + "theoretical_loss": 4.416380090256095, + "tokens_seen": 188481536 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009523350987000481, + "loss": 0.1144, + "theoretical_loss": 4.415610087576923, + "tokens_seen": 188743680 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009522548547584658, + "loss": 0.1116, + "theoretical_loss": 4.414841452576049, + "tokens_seen": 189005824 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009521746108168834, + "loss": 0.1083, + "theoretical_loss": 4.414074180932333, + "tokens_seen": 189267968 + }, + { + "epoch": 0.06, + "learning_rate": 0.000952094366875301, + "loss": 0.1086, + "theoretical_loss": 4.413308268344249, + "tokens_seen": 189530112 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009520141229337185, + "loss": 0.1115, + "theoretical_loss": 4.412543710529766, + "tokens_seen": 189792256 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0025555319152772427, + "objective/train/docs_used": 76077, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.207807779312134, + "objective/train/original_loss": 2.207808017730713, + "objective/train/theoretical_loss": 4.411780503226238, + "objective/train/tokens_used": 210514400, + "objective/train/value_avg": -0.0090789794921875, + "objective/train/value_loss": 0.0002816997584886849, + "objective/train/value_max": -0.0007886886596679688, + "objective/train/value_min": -0.25732421875, + "objective/train/value_reward_corr": 0.4408398455844697, + "objective/train/value_std": 0.0070037841796875, + "objective/train/weight_avg": 1.0026865005493164, + "objective/train/weighted_lm_loss": 2.213961362838745, + "objective/train/weights_max": 1.1967458724975586, + "objective/train/weights_min": 0.38571086525917053, + "theoretical_loss": 4.411780503226238, + "tokens_seen": 190054400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009519338789921361, + "loss": 0.111, + "theoretical_loss": 4.411780503226238, + "tokens_seen": 190054400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009518536350505537, + "loss": 0.1131, + "theoretical_loss": 4.4110186421902835, + "tokens_seen": 190316544 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009517733911089713, + "loss": 0.1077, + "theoretical_loss": 4.4102581231976785, + "tokens_seen": 190578688 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009516931471673888, + "loss": 0.1121, + "theoretical_loss": 4.409498942043237, + "tokens_seen": 190840832 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009516129032258065, + "loss": 0.1111, + "theoretical_loss": 4.408741094540707, + "tokens_seen": 191102976 + }, + { + "epoch": 0.06, + "learning_rate": 0.000951532659284224, + "loss": 0.1087, + "theoretical_loss": 4.407984576522653, + "tokens_seen": 191365120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009514524153426416, + "loss": 0.111, + "theoretical_loss": 4.407229383840347, + "tokens_seen": 191627264 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009513721714010593, + "loss": 0.1098, + "theoretical_loss": 4.406475512363663, + "tokens_seen": 191889408 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009512919274594768, + "loss": 0.1062, + "theoretical_loss": 4.405722957980962, + "tokens_seen": 192151552 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009512116835178944, + "loss": 0.1097, + "theoretical_loss": 4.40497171659899, + "tokens_seen": 192413696 + }, + { + "epoch": 0.06, + "learning_rate": 0.000951131439576312, + "loss": 0.1064, + "theoretical_loss": 4.404221784142768, + "tokens_seen": 192675840 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009510511956347296, + "loss": 0.1104, + "theoretical_loss": 4.403473156555487, + "tokens_seen": 192937984 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009509709516931472, + "loss": 0.1077, + "theoretical_loss": 4.402725829798397, + "tokens_seen": 193200128 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0021290904842317104, + "objective/train/docs_used": 77266, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.261606454849243, + "objective/train/original_loss": 2.261606454849243, + "objective/train/theoretical_loss": 4.40235265297398, + "objective/train/tokens_used": 213791200, + "objective/train/value_avg": -0.00881195068359375, + "objective/train/value_loss": 0.00027175722061656415, + "objective/train/value_max": -0.0007319450378417969, + "objective/train/value_min": -0.278076171875, + "objective/train/value_reward_corr": 0.45116222074040196, + "objective/train/value_std": 0.00730133056640625, + "objective/train/weight_avg": 1.0022554397583008, + "objective/train/weighted_lm_loss": 2.266122341156006, + "objective/train/weights_max": 1.106561303138733, + "objective/train/weights_min": 0.36955323815345764, + "theoretical_loss": 4.40235265297398, + "tokens_seen": 193331200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009508907077515648, + "loss": 0.1074, + "theoretical_loss": 4.4019797998507135, + "tokens_seen": 193462272 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009508104638099823, + "loss": 0.1048, + "theoretical_loss": 4.401235062709502, + "tokens_seen": 193724416 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009507302198684, + "loss": 0.1085, + "theoretical_loss": 4.400491614389582, + "tokens_seen": 193986560 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009506499759268176, + "loss": 0.1047, + "theoretical_loss": 4.3997494509234185, + "tokens_seen": 194248704 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009505697319852351, + "loss": 0.108, + "theoretical_loss": 4.399008568361027, + "tokens_seen": 194510848 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009504894880436528, + "loss": 0.11, + "theoretical_loss": 4.398268962769867, + "tokens_seen": 194772992 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009504092441020703, + "loss": 0.1121, + "theoretical_loss": 4.397530630234744, + "tokens_seen": 195035136 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009503290001604879, + "loss": 0.1068, + "theoretical_loss": 4.396793566857708, + "tokens_seen": 195297280 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009502487562189055, + "loss": 0.1059, + "theoretical_loss": 4.396057768757957, + "tokens_seen": 195559424 + }, + { + "epoch": 0.06, + "learning_rate": 0.000950168512277323, + "loss": 0.1079, + "theoretical_loss": 4.395323232071737, + "tokens_seen": 195821568 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009500882683357406, + "loss": 0.1078, + "theoretical_loss": 4.394589952952247, + "tokens_seen": 196083712 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009500080243941583, + "loss": 0.1036, + "theoretical_loss": 4.393857927569534, + "tokens_seen": 196345856 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.005338339600712061, + "objective/train/docs_used": 78313, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9282069206237793, + "objective/train/original_loss": 1.9282069206237793, + "objective/train/theoretical_loss": 4.393127152110409, + "objective/train/tokens_used": 217068000, + "objective/train/value_avg": -0.01195526123046875, + "objective/train/value_loss": 0.0004946094704791903, + "objective/train/value_max": -0.0013303756713867188, + "objective/train/value_min": -0.52392578125, + "objective/train/value_reward_corr": 0.5750984043308986, + "objective/train/value_std": 0.014373779296875, + "objective/train/weight_avg": 1.0055615901947021, + "objective/train/weighted_lm_loss": 1.9375295639038086, + "objective/train/weights_max": 1.6414918899536133, + "objective/train/weights_min": 0.4095984101295471, + "theoretical_loss": 4.393127152110409, + "tokens_seen": 196608000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009499277804525759, + "loss": 0.1065, + "theoretical_loss": 4.393127152110409, + "tokens_seen": 196608000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009498475365109934, + "loss": 0.1074, + "theoretical_loss": 4.392397622778343, + "tokens_seen": 196870144 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009497672925694111, + "loss": 0.107, + "theoretical_loss": 4.391669335793372, + "tokens_seen": 197132288 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009496870486278286, + "loss": 0.1047, + "theoretical_loss": 4.39094228739201, + "tokens_seen": 197394432 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009496068046862462, + "loss": 0.1091, + "theoretical_loss": 4.390216473827143, + "tokens_seen": 197656576 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009495265607446638, + "loss": 0.103, + "theoretical_loss": 4.389491891367953, + "tokens_seen": 197918720 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009494463168030813, + "loss": 0.108, + "theoretical_loss": 4.388768536299808, + "tokens_seen": 198180864 + }, + { + "epoch": 0.06, + "learning_rate": 0.000949366072861499, + "loss": 0.1101, + "theoretical_loss": 4.388046404924184, + "tokens_seen": 198443008 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009492858289199166, + "loss": 0.1064, + "theoretical_loss": 4.387325493558566, + "tokens_seen": 198705152 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009492055849783342, + "loss": 0.1078, + "theoretical_loss": 4.386605798536362, + "tokens_seen": 198967296 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009491253410367518, + "loss": 0.1102, + "theoretical_loss": 4.385887316206812, + "tokens_seen": 199229440 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009490450970951693, + "loss": 0.1037, + "theoretical_loss": 4.385170042934896, + "tokens_seen": 199491584 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009489648531535869, + "loss": 0.1051, + "theoretical_loss": 4.384453975101251, + "tokens_seen": 199753728 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0038920550141483545, + "objective/train/docs_used": 79488, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9931830167770386, + "objective/train/original_loss": 1.993182897567749, + "objective/train/theoretical_loss": 4.384096392096988, + "objective/train/tokens_used": 220344800, + "objective/train/value_avg": -0.00962066650390625, + "objective/train/value_loss": 0.00014405451656784862, + "objective/train/value_max": -0.0009326934814453125, + "objective/train/value_min": -0.26123046875, + "objective/train/value_reward_corr": 0.6021631165702985, + "objective/train/value_std": 0.00872039794921875, + "objective/train/weight_avg": 1.0039596557617188, + "objective/train/weighted_lm_loss": 2.001870632171631, + "objective/train/weights_max": 1.085601806640625, + "objective/train/weights_min": 0.3811125159263611, + "theoretical_loss": 4.384096392096988, + "tokens_seen": 199884800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009488846092120045, + "loss": 0.1077, + "theoretical_loss": 4.38373910910208, + "tokens_seen": 200015872 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009488043652704221, + "loss": 0.1058, + "theoretical_loss": 4.383025441349063, + "tokens_seen": 200278016 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009487241213288396, + "loss": 0.1104, + "theoretical_loss": 4.382312968269276, + "tokens_seen": 200540160 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009486438773872573, + "loss": 0.1058, + "theoretical_loss": 4.381601686305098, + "tokens_seen": 200802304 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009485636334456749, + "loss": 0.1045, + "theoretical_loss": 4.38089159191413, + "tokens_seen": 201064448 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009484833895040924, + "loss": 0.1023, + "theoretical_loss": 4.380182681569111, + "tokens_seen": 201326592 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009484031455625101, + "loss": 0.1068, + "theoretical_loss": 4.379474951757829, + "tokens_seen": 201588736 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009483229016209276, + "loss": 0.107, + "theoretical_loss": 4.378768398983042, + "tokens_seen": 201850880 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009482426576793453, + "loss": 0.1039, + "theoretical_loss": 4.378063019762392, + "tokens_seen": 202113024 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009481624137377628, + "loss": 0.1046, + "theoretical_loss": 4.377358810628324, + "tokens_seen": 202375168 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009480821697961804, + "loss": 0.1048, + "theoretical_loss": 4.3766557681280025, + "tokens_seen": 202637312 + }, + { + "epoch": 0.06, + "learning_rate": 0.000948001925854598, + "loss": 0.1058, + "theoretical_loss": 4.375953888823233, + "tokens_seen": 202899456 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.001105781877413392, + "objective/train/docs_used": 80605, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.189178228378296, + "objective/train/original_loss": 2.189178228378296, + "objective/train/theoretical_loss": 4.375253169290376, + "objective/train/tokens_used": 223621600, + "objective/train/value_avg": -0.01064300537109375, + "objective/train/value_loss": 0.00040862776222638786, + "objective/train/value_max": -0.0009965896606445312, + "objective/train/value_min": -0.306640625, + "objective/train/value_reward_corr": 0.6113931009977059, + "objective/train/value_std": 0.01380157470703125, + "objective/train/weight_avg": 1.0012942552566528, + "objective/train/weighted_lm_loss": 2.192763328552246, + "objective/train/weights_max": 1.2806123495101929, + "objective/train/weights_min": 0.37096279859542847, + "theoretical_loss": 4.375253169290376, + "tokens_seen": 203161600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009479216819130155, + "loss": 0.105, + "theoretical_loss": 4.375253169290376, + "tokens_seen": 203161600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009478414379714331, + "loss": 0.1059, + "theoretical_loss": 4.374553606120274, + "tokens_seen": 203423744 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009477611940298508, + "loss": 0.1047, + "theoretical_loss": 4.373855195918162, + "tokens_seen": 203685888 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009476809500882684, + "loss": 0.1076, + "theoretical_loss": 4.3731579353036, + "tokens_seen": 203948032 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009476007061466859, + "loss": 0.104, + "theoretical_loss": 4.372461820910382, + "tokens_seen": 204210176 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009475204622051036, + "loss": 0.1065, + "theoretical_loss": 4.371766849386468, + "tokens_seen": 204472320 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009474402182635211, + "loss": 0.1068, + "theoretical_loss": 4.3710730173939005, + "tokens_seen": 204734464 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009473599743219387, + "loss": 0.1063, + "theoretical_loss": 4.370380321608731, + "tokens_seen": 204996608 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009472797303803563, + "loss": 0.108, + "theoretical_loss": 4.369688758720937, + "tokens_seen": 205258752 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009471994864387738, + "loss": 0.1043, + "theoretical_loss": 4.368998325434355, + "tokens_seen": 205520896 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009471192424971916, + "loss": 0.1023, + "theoretical_loss": 4.3683090184666, + "tokens_seen": 205783040 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009470389985556091, + "loss": 0.1021, + "theoretical_loss": 4.367620834548987, + "tokens_seen": 206045184 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009469587546140267, + "loss": 0.1042, + "theoretical_loss": 4.3669337704264635, + "tokens_seen": 206307328 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0008768333937041461, + "objective/train/docs_used": 81724, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.068549871444702, + "objective/train/original_loss": 2.068549871444702, + "objective/train/theoretical_loss": 4.366590657274865, + "objective/train/tokens_used": 226898400, + "objective/train/value_avg": -0.0103302001953125, + "objective/train/value_loss": 0.00021303060930222273, + "objective/train/value_max": -0.0006880760192871094, + "objective/train/value_min": -0.205078125, + "objective/train/value_reward_corr": 0.6560487348637545, + "objective/train/value_std": 0.01080322265625, + "objective/train/weight_avg": 1.0009812116622925, + "objective/train/weighted_lm_loss": 2.0716254711151123, + "objective/train/weights_max": 1.1215885877609253, + "objective/train/weights_min": 0.8218191862106323, + "theoretical_loss": 4.366590657274865, + "tokens_seen": 206438400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009468785106724443, + "loss": 0.1075, + "theoretical_loss": 4.366247822857533, + "tokens_seen": 206569472 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009467982667308619, + "loss": 0.1054, + "theoretical_loss": 4.365562988614176, + "tokens_seen": 206831616 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009467180227892794, + "loss": 0.1035, + "theoretical_loss": 4.364879264481787, + "tokens_seen": 207093760 + }, + { + "epoch": 0.06, + "learning_rate": 0.000946637778847697, + "loss": 0.1039, + "theoretical_loss": 4.364196647259092, + "tokens_seen": 207355904 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009465575349061146, + "loss": 0.1061, + "theoretical_loss": 4.363515133758084, + "tokens_seen": 207618048 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009464772909645321, + "loss": 0.1078, + "theoretical_loss": 4.3628347208039475, + "tokens_seen": 207880192 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009463970470229499, + "loss": 0.1021, + "theoretical_loss": 4.362155405234985, + "tokens_seen": 208142336 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009463168030813674, + "loss": 0.1093, + "theoretical_loss": 4.361477183902554, + "tokens_seen": 208404480 + }, + { + "epoch": 0.06, + "learning_rate": 0.000946236559139785, + "loss": 0.1055, + "theoretical_loss": 4.360800053670989, + "tokens_seen": 208666624 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009461563151982026, + "loss": 0.1034, + "theoretical_loss": 4.360124011417536, + "tokens_seen": 208928768 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009460760712566201, + "loss": 0.105, + "theoretical_loss": 4.359449054032282, + "tokens_seen": 209190912 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009459958273150377, + "loss": 0.1048, + "theoretical_loss": 4.358775178418089, + "tokens_seen": 209453056 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.001143173431046307, + "objective/train/docs_used": 82917, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0207271575927734, + "objective/train/original_loss": 2.0207271575927734, + "objective/train/theoretical_loss": 4.358102381490517, + "objective/train/tokens_used": 230175200, + "objective/train/value_avg": -0.009368896484375, + "objective/train/value_loss": 0.00035423377994447947, + "objective/train/value_max": -0.0006718635559082031, + "objective/train/value_min": -0.259033203125, + "objective/train/value_reward_corr": 0.4731410524064374, + "objective/train/value_std": 0.00994110107421875, + "objective/train/weight_avg": 1.001305103302002, + "objective/train/weighted_lm_loss": 2.0232884883880615, + "objective/train/weights_max": 1.2774591445922852, + "objective/train/weights_min": 0.3702460527420044, + "theoretical_loss": 4.358102381490517, + "tokens_seen": 209715200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009459155833734553, + "loss": 0.1056, + "theoretical_loss": 4.358102381490517, + "tokens_seen": 209715200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009458353394318729, + "loss": 0.1055, + "theoretical_loss": 4.3574306601777675, + "tokens_seen": 209977344 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009457550954902905, + "loss": 0.1016, + "theoretical_loss": 4.356760011420608, + "tokens_seen": 210239488 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009456748515487081, + "loss": 0.1089, + "theoretical_loss": 4.3560904321723095, + "tokens_seen": 210501632 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009455946076071257, + "loss": 0.106, + "theoretical_loss": 4.355421919398576, + "tokens_seen": 210763776 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009455143636655433, + "loss": 0.1057, + "theoretical_loss": 4.354754470077481, + "tokens_seen": 211025920 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009454341197239609, + "loss": 0.1032, + "theoretical_loss": 4.354088081199402, + "tokens_seen": 211288064 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009453538757823784, + "loss": 0.1041, + "theoretical_loss": 4.3534227497669535, + "tokens_seen": 211550208 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009452736318407961, + "loss": 0.1035, + "theoretical_loss": 4.352758472794923, + "tokens_seen": 211812352 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009451933878992136, + "loss": 0.1058, + "theoretical_loss": 4.352095247310208, + "tokens_seen": 212074496 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009451131439576312, + "loss": 0.1048, + "theoretical_loss": 4.351433070351748, + "tokens_seen": 212336640 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009450329000160488, + "loss": 0.1064, + "theoretical_loss": 4.350771938970466, + "tokens_seen": 212598784 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009449526560744663, + "loss": 0.1028, + "theoretical_loss": 4.350111850229202, + "tokens_seen": 212860928 + }, + { + "epoch": 0.06, + "objective/train/advantage_avg": 0.0043486496433615685, + "objective/train/docs_used": 84140, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9397648572921753, + "objective/train/original_loss": 1.9397649765014648, + "objective/train/theoretical_loss": 4.349782195933957, + "objective/train/tokens_used": 233452000, + "objective/train/value_avg": -0.01163482666015625, + "objective/train/value_loss": 0.00020028821018058807, + "objective/train/value_max": -0.0011072158813476562, + "objective/train/value_min": -0.1822509765625, + "objective/train/value_reward_corr": 0.6988513016032423, + "objective/train/value_std": 0.0160675048828125, + "objective/train/weight_avg": 1.0044445991516113, + "objective/train/weighted_lm_loss": 1.948439121246338, + "objective/train/weights_max": 1.1115705966949463, + "objective/train/weights_min": 0.3973115086555481, + "theoretical_loss": 4.349782195933957, + "tokens_seen": 212992000 + }, + { + "epoch": 0.06, + "learning_rate": 0.000944872412132884, + "loss": 0.1017, + "theoretical_loss": 4.34945280120265, + "tokens_seen": 213123072 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009447921681913016, + "loss": 0.103, + "theoretical_loss": 4.348794788977298, + "tokens_seen": 213385216 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009447119242497192, + "loss": 0.1039, + "theoretical_loss": 4.348137810651366, + "tokens_seen": 213647360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009446316803081368, + "loss": 0.102, + "theoretical_loss": 4.347481863334738, + "tokens_seen": 213909504 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009445514363665544, + "loss": 0.1045, + "theoretical_loss": 4.346826944148912, + "tokens_seen": 214171648 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009444711924249719, + "loss": 0.1049, + "theoretical_loss": 4.3461730502269305, + "tokens_seen": 214433792 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009443909484833896, + "loss": 0.1036, + "theoretical_loss": 4.345520178713323, + "tokens_seen": 214695936 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009443107045418071, + "loss": 0.1077, + "theoretical_loss": 4.344868326764045, + "tokens_seen": 214958080 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009442304606002246, + "loss": 0.1054, + "theoretical_loss": 4.344217491546422, + "tokens_seen": 215220224 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009441502166586424, + "loss": 0.1037, + "theoretical_loss": 4.343567670239084, + "tokens_seen": 215482368 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009440699727170599, + "loss": 0.1059, + "theoretical_loss": 4.342918860031914, + "tokens_seen": 215744512 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009439897287754775, + "loss": 0.1007, + "theoretical_loss": 4.342271058125983, + "tokens_seen": 216006656 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0013779407599940896, + "objective/train/docs_used": 85191, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.144592761993408, + "objective/train/original_loss": 2.1445932388305664, + "objective/train/theoretical_loss": 4.341624261733497, + "objective/train/tokens_used": 236728800, + "objective/train/value_avg": -0.01018524169921875, + "objective/train/value_loss": 0.0004004819202236831, + "objective/train/value_max": -0.0008134841918945312, + "objective/train/value_min": -0.294677734375, + "objective/train/value_reward_corr": 0.4947933347916151, + "objective/train/value_std": 0.01059722900390625, + "objective/train/weight_avg": 1.0015608072280884, + "objective/train/weighted_lm_loss": 2.14778208732605, + "objective/train/weights_max": 1.2878975868225098, + "objective/train/weights_min": 0.3711496591567993, + "theoretical_loss": 4.341624261733497, + "tokens_seen": 216268800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009439094848338951, + "loss": 0.1053, + "theoretical_loss": 4.341624261733497, + "tokens_seen": 216268800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009438292408923127, + "loss": 0.1064, + "theoretical_loss": 4.340978468077735, + "tokens_seen": 216530944 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009437489969507302, + "loss": 0.1038, + "theoretical_loss": 4.340333674392992, + "tokens_seen": 216793088 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009436687530091478, + "loss": 0.1046, + "theoretical_loss": 4.339689877924531, + "tokens_seen": 217055232 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009435885090675654, + "loss": 0.1038, + "theoretical_loss": 4.3390470759285105, + "tokens_seen": 217317376 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009435082651259829, + "loss": 0.1039, + "theoretical_loss": 4.338405265671941, + "tokens_seen": 217579520 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009434280211844006, + "loss": 0.0991, + "theoretical_loss": 4.337764444432625, + "tokens_seen": 217841664 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009433477772428182, + "loss": 0.1038, + "theoretical_loss": 4.337124609499101, + "tokens_seen": 218103808 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009432675333012359, + "loss": 0.1003, + "theoretical_loss": 4.336485758170589, + "tokens_seen": 218365952 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009431872893596534, + "loss": 0.106, + "theoretical_loss": 4.335847887756934, + "tokens_seen": 218628096 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009431070454180709, + "loss": 0.1021, + "theoretical_loss": 4.335210995578553, + "tokens_seen": 218890240 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009430268014764886, + "loss": 0.1041, + "theoretical_loss": 4.334575078966383, + "tokens_seen": 219152384 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009429465575349061, + "loss": 0.1049, + "theoretical_loss": 4.333940135261823, + "tokens_seen": 219414528 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.002295425161719322, + "objective/train/docs_used": 86440, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9799681901931763, + "objective/train/original_loss": 1.9799679517745972, + "objective/train/theoretical_loss": 4.3336230274219325, + "objective/train/tokens_used": 240005600, + "objective/train/value_avg": -0.00746917724609375, + "objective/train/value_loss": 0.00019181027892045677, + "objective/train/value_max": -0.0007948875427246094, + "objective/train/value_min": -0.2081298828125, + "objective/train/value_reward_corr": 0.306679385928593, + "objective/train/value_std": 0.00490570068359375, + "objective/train/weight_avg": 1.002386212348938, + "objective/train/weighted_lm_loss": 1.9838651418685913, + "objective/train/weights_max": 1.0839669704437256, + "objective/train/weights_min": 0.37081003189086914, + "theoretical_loss": 4.3336230274219325, + "tokens_seen": 219545600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009428663135933237, + "loss": 0.1019, + "theoretical_loss": 4.333306161816684, + "tokens_seen": 219676672 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009427860696517413, + "loss": 0.1057, + "theoretical_loss": 4.332673155993131, + "tokens_seen": 219938816 + }, + { + "epoch": 0.07, + "learning_rate": 0.000942705825710159, + "loss": 0.1016, + "theoretical_loss": 4.332041115163636, + "tokens_seen": 220200960 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009426255817685765, + "loss": 0.1049, + "theoretical_loss": 4.331410036710925, + "tokens_seen": 220463104 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009425453378269941, + "loss": 0.1027, + "theoretical_loss": 4.330779918027919, + "tokens_seen": 220725248 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009424650938854117, + "loss": 0.1047, + "theoretical_loss": 4.330150756517692, + "tokens_seen": 220987392 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009423848499438292, + "loss": 0.1025, + "theoretical_loss": 4.3295225495934115, + "tokens_seen": 221249536 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009423046060022469, + "loss": 0.0982, + "theoretical_loss": 4.328895294678292, + "tokens_seen": 221511680 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009422243620606644, + "loss": 0.103, + "theoretical_loss": 4.32826898920554, + "tokens_seen": 221773824 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009421441181190821, + "loss": 0.1057, + "theoretical_loss": 4.3276436306183115, + "tokens_seen": 222035968 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009420638741774996, + "loss": 0.1003, + "theoretical_loss": 4.327019216369651, + "tokens_seen": 222298112 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009419836302359171, + "loss": 0.1012, + "theoretical_loss": 4.32639574392245, + "tokens_seen": 222560256 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0020489406306296587, + "objective/train/docs_used": 87601, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8337969779968262, + "objective/train/original_loss": 1.8337969779968262, + "objective/train/theoretical_loss": 4.325773210749392, + "objective/train/tokens_used": 243282400, + "objective/train/value_avg": -0.0086669921875, + "objective/train/value_loss": 0.000297574297292158, + "objective/train/value_max": -0.0009889602661132812, + "objective/train/value_min": -0.6943359375, + "objective/train/value_reward_corr": 0.5675373911477959, + "objective/train/value_std": 0.0108795166015625, + "objective/train/weight_avg": 1.0021827220916748, + "objective/train/weighted_lm_loss": 1.838815450668335, + "objective/train/weights_max": 1.4390860795974731, + "objective/train/weights_min": 0.37089207768440247, + "theoretical_loss": 4.325773210749392, + "tokens_seen": 222822400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009419033862943349, + "loss": 0.0976, + "theoretical_loss": 4.325773210749392, + "tokens_seen": 222822400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009418231423527524, + "loss": 0.1022, + "theoretical_loss": 4.325151614332908, + "tokens_seen": 223084544 + }, + { + "epoch": 0.07, + "learning_rate": 0.00094174289841117, + "loss": 0.1034, + "theoretical_loss": 4.3245309521651265, + "tokens_seen": 223346688 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009416626544695876, + "loss": 0.103, + "theoretical_loss": 4.323911221747817, + "tokens_seen": 223608832 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009415824105280052, + "loss": 0.1048, + "theoretical_loss": 4.323292420592356, + "tokens_seen": 223870976 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009415021665864227, + "loss": 0.1024, + "theoretical_loss": 4.322674546219666, + "tokens_seen": 224133120 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009414219226448404, + "loss": 0.1028, + "theoretical_loss": 4.322057596160174, + "tokens_seen": 224395264 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009413416787032579, + "loss": 0.0994, + "theoretical_loss": 4.321441567953762, + "tokens_seen": 224657408 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009412614347616754, + "loss": 0.1017, + "theoretical_loss": 4.320826459149725, + "tokens_seen": 224919552 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009411811908200932, + "loss": 0.1075, + "theoretical_loss": 4.3202122673067125, + "tokens_seen": 225181696 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009411009468785107, + "loss": 0.1007, + "theoretical_loss": 4.319598989992695, + "tokens_seen": 225443840 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009410207029369283, + "loss": 0.1049, + "theoretical_loss": 4.318986624784908, + "tokens_seen": 225705984 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009409404589953459, + "loss": 0.1034, + "theoretical_loss": 4.318375169269813, + "tokens_seen": 225968128 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0023550805635750294, + "objective/train/docs_used": 88830, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9909827709197998, + "objective/train/original_loss": 1.9909827709197998, + "objective/train/theoretical_loss": 4.3180697818953035, + "objective/train/tokens_used": 246559200, + "objective/train/value_avg": -0.00814056396484375, + "objective/train/value_loss": 0.00033672992140054703, + "objective/train/value_max": -0.0005397796630859375, + "objective/train/value_min": -0.321533203125, + "objective/train/value_reward_corr": 0.5563445957193008, + "objective/train/value_std": 0.010406494140625, + "objective/train/weight_avg": 1.0025025606155396, + "objective/train/weighted_lm_loss": 1.9960408210754395, + "objective/train/weights_max": 1.258882761001587, + "objective/train/weights_min": 0.3709288537502289, + "theoretical_loss": 4.3180697818953035, + "tokens_seen": 226099200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009408602150537635, + "loss": 0.1014, + "theoretical_loss": 4.317764621043046, + "tokens_seen": 226230272 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009407799711121811, + "loss": 0.0995, + "theoretical_loss": 4.317154977709375, + "tokens_seen": 226492416 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009406997271705986, + "loss": 0.1001, + "theoretical_loss": 4.3165462368826555, + "tokens_seen": 226754560 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009406194832290162, + "loss": 0.0995, + "theoretical_loss": 4.315938396185782, + "tokens_seen": 227016704 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009405392392874338, + "loss": 0.1001, + "theoretical_loss": 4.315331453250648, + "tokens_seen": 227278848 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009404589953458514, + "loss": 0.0994, + "theoretical_loss": 4.314725405718099, + "tokens_seen": 227540992 + }, + { + "epoch": 0.07, + "learning_rate": 0.000940378751404269, + "loss": 0.0997, + "theoretical_loss": 4.314120251237887, + "tokens_seen": 227803136 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009402985074626867, + "loss": 0.103, + "theoretical_loss": 4.31351598746863, + "tokens_seen": 228065280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009402182635211042, + "loss": 0.1013, + "theoretical_loss": 4.312912612077767, + "tokens_seen": 228327424 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009401380195795217, + "loss": 0.1022, + "theoretical_loss": 4.312310122741512, + "tokens_seen": 228589568 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009400577756379394, + "loss": 0.1028, + "theoretical_loss": 4.311708517144817, + "tokens_seen": 228851712 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009399775316963569, + "loss": 0.1024, + "theoretical_loss": 4.311107792981323, + "tokens_seen": 229113856 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0026854872703552246, + "objective/train/docs_used": 90061, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.144618511199951, + "objective/train/original_loss": 2.144618511199951, + "objective/train/theoretical_loss": 4.310507947953321, + "objective/train/tokens_used": 249836000, + "objective/train/value_avg": -0.00982666015625, + "objective/train/value_loss": 0.00028081017080694437, + "objective/train/value_max": -0.0007295608520507812, + "objective/train/value_min": -0.66845703125, + "objective/train/value_reward_corr": 0.6199593894084428, + "objective/train/value_std": 0.01155853271484375, + "objective/train/weight_avg": 1.0028172731399536, + "objective/train/weighted_lm_loss": 2.1502115726470947, + "objective/train/weights_max": 1.3387657403945923, + "objective/train/weights_min": 0.37122613191604614, + "theoretical_loss": 4.310507947953321, + "tokens_seen": 229376000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009398972877547745, + "loss": 0.1031, + "theoretical_loss": 4.310507947953321, + "tokens_seen": 229376000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009398170438131921, + "loss": 0.1045, + "theoretical_loss": 4.309908979771709, + "tokens_seen": 229638144 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009397367998716097, + "loss": 0.1044, + "theoretical_loss": 4.3093108861559495, + "tokens_seen": 229900288 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009396565559300272, + "loss": 0.1022, + "theoretical_loss": 4.308713664834029, + "tokens_seen": 230162432 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009395763119884449, + "loss": 0.0958, + "theoretical_loss": 4.308117313542413, + "tokens_seen": 230424576 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009394960680468625, + "loss": 0.1022, + "theoretical_loss": 4.30752183002601, + "tokens_seen": 230686720 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009394158241052801, + "loss": 0.099, + "theoretical_loss": 4.3069272120381275, + "tokens_seen": 230948864 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009393355801636977, + "loss": 0.102, + "theoretical_loss": 4.30633345734043, + "tokens_seen": 231211008 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009392553362221152, + "loss": 0.1004, + "theoretical_loss": 4.3057405637029, + "tokens_seen": 231473152 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009391750922805329, + "loss": 0.1015, + "theoretical_loss": 4.305148528903798, + "tokens_seen": 231735296 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009390948483389504, + "loss": 0.1023, + "theoretical_loss": 4.304557350729623, + "tokens_seen": 231997440 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009390146043973679, + "loss": 0.1002, + "theoretical_loss": 4.303967026975072, + "tokens_seen": 232259584 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009389343604557857, + "loss": 0.1024, + "theoretical_loss": 4.303377555442998, + "tokens_seen": 232521728 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0007922460208646953, + "objective/train/docs_used": 91258, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9420995712280273, + "objective/train/original_loss": 1.9420994520187378, + "objective/train/theoretical_loss": 4.303083138576003, + "objective/train/tokens_used": 253112800, + "objective/train/value_avg": -0.006786346435546875, + "objective/train/value_loss": 0.0005959350382909179, + "objective/train/value_max": -0.0007352828979492188, + "objective/train/value_min": -0.293701171875, + "objective/train/value_reward_corr": 0.39993119929541304, + "objective/train/value_std": 0.007472991943359375, + "objective/train/weight_avg": 1.0010305643081665, + "objective/train/weighted_lm_loss": 1.9428025484085083, + "objective/train/weights_max": 1.215839147567749, + "objective/train/weights_min": 0.2238616794347763, + "theoretical_loss": 4.303083138576003, + "tokens_seen": 232652800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009388541165142032, + "loss": 0.098, + "theoretical_loss": 4.302788933944375, + "tokens_seen": 232783872 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009387738725726208, + "loss": 0.1014, + "theoretical_loss": 4.302201160298255, + "tokens_seen": 233046016 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009386936286310384, + "loss": 0.1019, + "theoretical_loss": 4.301614232331733, + "tokens_seen": 233308160 + }, + { + "epoch": 0.07, + "learning_rate": 0.000938613384689456, + "loss": 0.101, + "theoretical_loss": 4.301028147879904, + "tokens_seen": 233570304 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009385331407478735, + "loss": 0.0983, + "theoretical_loss": 4.300442904785831, + "tokens_seen": 233832448 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009384528968062911, + "loss": 0.1014, + "theoretical_loss": 4.299858500900495, + "tokens_seen": 234094592 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009383726528647087, + "loss": 0.1024, + "theoretical_loss": 4.2992749340827725, + "tokens_seen": 234356736 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009382924089231262, + "loss": 0.1028, + "theoretical_loss": 4.298692202199386, + "tokens_seen": 234618880 + }, + { + "epoch": 0.07, + "learning_rate": 0.000938212164981544, + "loss": 0.101, + "theoretical_loss": 4.298110303124871, + "tokens_seen": 234881024 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009381319210399615, + "loss": 0.1022, + "theoretical_loss": 4.29752923474154, + "tokens_seen": 235143168 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009380516770983792, + "loss": 0.1006, + "theoretical_loss": 4.29694899493944, + "tokens_seen": 235405312 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009379714331567967, + "loss": 0.1017, + "theoretical_loss": 4.2963695816163225, + "tokens_seen": 235667456 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.0031256088986992836, + "objective/train/docs_used": 92471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.102999448776245, + "objective/train/original_loss": 2.102999687194824, + "objective/train/theoretical_loss": 4.295790992677603, + "objective/train/tokens_used": 256389600, + "objective/train/value_avg": -0.01329803466796875, + "objective/train/value_loss": 0.0007132674218155444, + "objective/train/value_max": -0.0005130767822265625, + "objective/train/value_min": -0.8544921875, + "objective/train/value_reward_corr": 0.6135290805544735, + "objective/train/value_std": 0.0253448486328125, + "objective/train/weight_avg": 1.0034579038619995, + "objective/train/weighted_lm_loss": 2.107316493988037, + "objective/train/weights_max": 1.5163893699645996, + "objective/train/weights_min": 0.39436399936676025, + "theoretical_loss": 4.295790992677603, + "tokens_seen": 235929600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009378911892152143, + "loss": 0.0997, + "theoretical_loss": 4.295790992677603, + "tokens_seen": 235929600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009378109452736319, + "loss": 0.1009, + "theoretical_loss": 4.2952132260363225, + "tokens_seen": 236191744 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009377307013320494, + "loss": 0.0996, + "theoretical_loss": 4.294636279613117, + "tokens_seen": 236453888 + }, + { + "epoch": 0.07, + "learning_rate": 0.000937650457390467, + "loss": 0.1036, + "theoretical_loss": 4.294060151336178, + "tokens_seen": 236716032 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009375702134488846, + "loss": 0.1019, + "theoretical_loss": 4.293484839141217, + "tokens_seen": 236978176 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009374899695073022, + "loss": 0.1, + "theoretical_loss": 4.29291034097143, + "tokens_seen": 237240320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009374097255657198, + "loss": 0.097, + "theoretical_loss": 4.2923366547774595, + "tokens_seen": 237502464 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009373294816241375, + "loss": 0.1002, + "theoretical_loss": 4.2917637785173675, + "tokens_seen": 237764608 + }, + { + "epoch": 0.07, + "learning_rate": 0.000937249237682555, + "loss": 0.1004, + "theoretical_loss": 4.291191710156591, + "tokens_seen": 238026752 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009371689937409725, + "loss": 0.1003, + "theoretical_loss": 4.290620447667912, + "tokens_seen": 238288896 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009370887497993902, + "loss": 0.0979, + "theoretical_loss": 4.290049989031424, + "tokens_seen": 238551040 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009370085058578077, + "loss": 0.1033, + "theoretical_loss": 4.289480332234493, + "tokens_seen": 238813184 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009369282619162254, + "loss": 0.1012, + "theoretical_loss": 4.288911475271731, + "tokens_seen": 239075328 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.001886105164885521, + "objective/train/docs_used": 93570, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0680296421051025, + "objective/train/original_loss": 2.0680294036865234, + "objective/train/theoretical_loss": 4.28862734610345, + "objective/train/tokens_used": 259666400, + "objective/train/value_avg": -0.008026123046875, + "objective/train/value_loss": 0.0002583606983534992, + "objective/train/value_max": -0.0006852149963378906, + "objective/train/value_min": -0.2454833984375, + "objective/train/value_reward_corr": 0.48885293063683233, + "objective/train/value_std": 0.007061004638671875, + "objective/train/weight_avg": 1.0020033121109009, + "objective/train/weighted_lm_loss": 2.0723109245300293, + "objective/train/weights_max": 1.1007682085037231, + "objective/train/weights_min": 0.3832702934741974, + "theoretical_loss": 4.28862734610345, + "tokens_seen": 239206400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009368480179746429, + "loss": 0.1002, + "theoretical_loss": 4.288343416144952, + "tokens_seen": 239337472 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009367677740330605, + "loss": 0.1023, + "theoretical_loss": 4.287776152863146, + "tokens_seen": 239599616 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009366875300914782, + "loss": 0.1016, + "theoretical_loss": 4.287209683442444, + "tokens_seen": 239861760 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009366072861498957, + "loss": 0.1004, + "theoretical_loss": 4.286644005906081, + "tokens_seen": 240123904 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009365270422083133, + "loss": 0.101, + "theoretical_loss": 4.286079118284368, + "tokens_seen": 240386048 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009364467982667309, + "loss": 0.1034, + "theoretical_loss": 4.285515018614655, + "tokens_seen": 240648192 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009363665543251485, + "loss": 0.096, + "theoretical_loss": 4.2849517049412995, + "tokens_seen": 240910336 + }, + { + "epoch": 0.07, + "learning_rate": 0.000936286310383566, + "loss": 0.0974, + "theoretical_loss": 4.284389175315636, + "tokens_seen": 241172480 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009362060664419837, + "loss": 0.0995, + "theoretical_loss": 4.283827427795939, + "tokens_seen": 241434624 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009361258225004012, + "loss": 0.0987, + "theoretical_loss": 4.283266460447394, + "tokens_seen": 241696768 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009360455785588187, + "loss": 0.0967, + "theoretical_loss": 4.282706271342066, + "tokens_seen": 241958912 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009359653346172365, + "loss": 0.1028, + "theoretical_loss": 4.282146858558866, + "tokens_seen": 242221056 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.001827435800805688, + "objective/train/docs_used": 94878, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0017893314361572, + "objective/train/original_loss": 2.001789093017578, + "objective/train/theoretical_loss": 4.281588220183519, + "objective/train/tokens_used": 262943200, + "objective/train/value_avg": -0.01013946533203125, + "objective/train/value_loss": 0.0004723604361061007, + "objective/train/value_max": -0.0006642341613769531, + "objective/train/value_min": -0.4013671875, + "objective/train/value_reward_corr": 0.613649177640665, + "objective/train/value_std": 0.0154876708984375, + "objective/train/weight_avg": 1.0020326375961304, + "objective/train/weighted_lm_loss": 2.0051186084747314, + "objective/train/weights_max": 1.3551794290542603, + "objective/train/weights_min": 0.37200865149497986, + "theoretical_loss": 4.281588220183519, + "tokens_seen": 242483200 + }, + { + "epoch": 0.07, + "learning_rate": 0.000935885090675654, + "loss": 0.0977, + "theoretical_loss": 4.281588220183519, + "tokens_seen": 242483200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009358048467340716, + "loss": 0.0995, + "theoretical_loss": 4.281030354308533, + "tokens_seen": 242745344 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009357246027924892, + "loss": 0.0989, + "theoretical_loss": 4.280473259033169, + "tokens_seen": 243007488 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009356443588509068, + "loss": 0.0982, + "theoretical_loss": 4.27991693246341, + "tokens_seen": 243269632 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009355641149093244, + "loss": 0.0986, + "theoretical_loss": 4.279361372711923, + "tokens_seen": 243531776 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009354838709677419, + "loss": 0.1022, + "theoretical_loss": 4.278806577898042, + "tokens_seen": 243793920 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009354036270261595, + "loss": 0.0979, + "theoretical_loss": 4.278252546147724, + "tokens_seen": 244056064 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009353233830845771, + "loss": 0.0995, + "theoretical_loss": 4.277699275593523, + "tokens_seen": 244318208 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009352431391429948, + "loss": 0.0949, + "theoretical_loss": 4.277146764374566, + "tokens_seen": 244580352 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009351628952014123, + "loss": 0.0999, + "theoretical_loss": 4.276595010636514, + "tokens_seen": 244842496 + }, + { + "epoch": 0.07, + "learning_rate": 0.00093508265125983, + "loss": 0.0993, + "theoretical_loss": 4.276044012531534, + "tokens_seen": 245104640 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009350024073182475, + "loss": 0.1013, + "theoretical_loss": 4.275493768218274, + "tokens_seen": 245366784 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009349221633766651, + "loss": 0.0975, + "theoretical_loss": 4.274944275861828, + "tokens_seen": 245628928 + }, + { + "epoch": 0.07, + "objective/train/advantage_avg": 0.002574330661445856, + "objective/train/docs_used": 96014, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0550389289855957, + "objective/train/original_loss": 2.055039405822754, + "objective/train/theoretical_loss": 4.274669811095759, + "objective/train/tokens_used": 266220000, + "objective/train/value_avg": -0.01256561279296875, + "objective/train/value_loss": 0.0013382832985371351, + "objective/train/value_max": -0.0008039474487304688, + "objective/train/value_min": -0.810546875, + "objective/train/value_reward_corr": 0.6458940332158234, + "objective/train/value_std": 0.0258026123046875, + "objective/train/weight_avg": 1.003141164779663, + "objective/train/weighted_lm_loss": 2.06048846244812, + "objective/train/weights_max": 1.4646140336990356, + "objective/train/weights_min": 0.36990442872047424, + "theoretical_loss": 4.274669811095759, + "tokens_seen": 245760000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009348419194350827, + "loss": 0.098, + "theoretical_loss": 4.274395533633712, + "tokens_seen": 245891072 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009347616754935002, + "loss": 0.096, + "theoretical_loss": 4.273847539711825, + "tokens_seen": 246153216 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009346814315519178, + "loss": 0.1013, + "theoretical_loss": 4.273300292280435, + "tokens_seen": 246415360 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009346011876103354, + "loss": 0.0989, + "theoretical_loss": 4.272753789530134, + "tokens_seen": 246677504 + }, + { + "epoch": 0.07, + "learning_rate": 0.000934520943668753, + "loss": 0.096, + "theoretical_loss": 4.272208029657822, + "tokens_seen": 246939648 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009344406997271707, + "loss": 0.0971, + "theoretical_loss": 4.271663010866669, + "tokens_seen": 247201792 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009343604557855883, + "loss": 0.1003, + "theoretical_loss": 4.2711187313660925, + "tokens_seen": 247463936 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009342802118440058, + "loss": 0.0983, + "theoretical_loss": 4.270575189371727, + "tokens_seen": 247726080 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009341999679024234, + "loss": 0.0991, + "theoretical_loss": 4.270032383105398, + "tokens_seen": 247988224 + }, + { + "epoch": 0.08, + "learning_rate": 0.000934119723960841, + "loss": 0.0973, + "theoretical_loss": 4.269490310795089, + "tokens_seen": 248250368 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009340394800192585, + "loss": 0.1025, + "theoretical_loss": 4.268948970674917, + "tokens_seen": 248512512 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009339592360776762, + "loss": 0.1013, + "theoretical_loss": 4.268408360985109, + "tokens_seen": 248774656 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.00040733805508352816, + "objective/train/docs_used": 97104, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9560189247131348, + "objective/train/original_loss": 1.9560186862945557, + "objective/train/theoretical_loss": 4.267868479971966, + "objective/train/tokens_used": 269496800, + "objective/train/value_avg": -0.010101318359375, + "objective/train/value_loss": 0.0005951999919489026, + "objective/train/value_max": -0.0006189346313476562, + "objective/train/value_min": -0.81201171875, + "objective/train/value_reward_corr": 0.654036095652701, + "objective/train/value_std": 0.0165863037109375, + "objective/train/weight_avg": 1.0006780624389648, + "objective/train/weighted_lm_loss": 1.9563541412353516, + "objective/train/weights_max": 1.3073004484176636, + "objective/train/weights_min": 0.4099235534667969, + "theoretical_loss": 4.267868479971966, + "tokens_seen": 249036800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009338789921360937, + "loss": 0.0996, + "theoretical_loss": 4.267868479971966, + "tokens_seen": 249036800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009337987481945113, + "loss": 0.099, + "theoretical_loss": 4.267329325887841, + "tokens_seen": 249298944 + }, + { + "epoch": 0.08, + "learning_rate": 0.000933718504252929, + "loss": 0.0923, + "theoretical_loss": 4.266790896991109, + "tokens_seen": 249561088 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009336382603113465, + "loss": 0.1004, + "theoretical_loss": 4.266253191546146, + "tokens_seen": 249823232 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009335580163697641, + "loss": 0.0992, + "theoretical_loss": 4.265716207823292, + "tokens_seen": 250085376 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009334777724281817, + "loss": 0.0991, + "theoretical_loss": 4.2651799440988345, + "tokens_seen": 250347520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009333975284865993, + "loss": 0.0974, + "theoretical_loss": 4.2646443986549745, + "tokens_seen": 250609664 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009333172845450168, + "loss": 0.099, + "theoretical_loss": 4.264109569779803, + "tokens_seen": 250871808 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009332370406034345, + "loss": 0.1013, + "theoretical_loss": 4.263575455767277, + "tokens_seen": 251133952 + }, + { + "epoch": 0.08, + "learning_rate": 0.000933156796661852, + "loss": 0.0997, + "theoretical_loss": 4.263042054917186, + "tokens_seen": 251396096 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009330765527202696, + "loss": 0.0991, + "theoretical_loss": 4.262509365535134, + "tokens_seen": 251658240 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009329963087786873, + "loss": 0.0967, + "theoretical_loss": 4.261977385932512, + "tokens_seen": 251920384 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009329160648371048, + "loss": 0.0985, + "theoretical_loss": 4.261446114426466, + "tokens_seen": 252182528 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002480955794453621, + "objective/train/docs_used": 98257, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.974042534828186, + "objective/train/original_loss": 1.974042534828186, + "objective/train/theoretical_loss": 4.261180743685337, + "objective/train/tokens_used": 272773600, + "objective/train/value_avg": -0.00753021240234375, + "objective/train/value_loss": 0.00019367090135347098, + "objective/train/value_max": -0.0005908012390136719, + "objective/train/value_min": -0.181640625, + "objective/train/value_reward_corr": 0.6212227796200605, + "objective/train/value_std": 0.008819580078125, + "objective/train/weight_avg": 1.0025759935379028, + "objective/train/weighted_lm_loss": 1.9780480861663818, + "objective/train/weights_max": 1.16512930393219, + "objective/train/weights_min": 0.7835448384284973, + "theoretical_loss": 4.261180743685337, + "tokens_seen": 252313600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009328358208955225, + "loss": 0.0973, + "theoretical_loss": 4.260915549339879, + "tokens_seen": 252444672 + }, + { + "epoch": 0.08, + "learning_rate": 0.00093275557695394, + "loss": 0.097, + "theoretical_loss": 4.2603856890013425, + "tokens_seen": 252706816 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009326753330123576, + "loss": 0.0991, + "theoretical_loss": 4.25985653174513, + "tokens_seen": 252968960 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009325950890707752, + "loss": 0.0948, + "theoretical_loss": 4.259328075911173, + "tokens_seen": 253231104 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009325148451291927, + "loss": 0.0989, + "theoretical_loss": 4.258800319845038, + "tokens_seen": 253493248 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009324346011876103, + "loss": 0.0974, + "theoretical_loss": 4.258273261897896, + "tokens_seen": 253755392 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009323543572460279, + "loss": 0.097, + "theoretical_loss": 4.257746900426506, + "tokens_seen": 254017536 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009322741133044456, + "loss": 0.0975, + "theoretical_loss": 4.25722123379318, + "tokens_seen": 254279680 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009321938693628631, + "loss": 0.1009, + "theoretical_loss": 4.256696260365768, + "tokens_seen": 254541824 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009321136254212808, + "loss": 0.0996, + "theoretical_loss": 4.256171978517629, + "tokens_seen": 254803968 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009320333814796983, + "loss": 0.0952, + "theoretical_loss": 4.255648386627607, + "tokens_seen": 255066112 + }, + { + "epoch": 0.08, + "learning_rate": 0.000931953137538116, + "loss": 0.0987, + "theoretical_loss": 4.255125483080007, + "tokens_seen": 255328256 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0014403994427993894, + "objective/train/docs_used": 99403, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.000425100326538, + "objective/train/original_loss": 2.000425338745117, + "objective/train/theoretical_loss": 4.254603266264572, + "objective/train/tokens_used": 276050400, + "objective/train/value_avg": -0.00920867919921875, + "objective/train/value_loss": 0.0005131934303790331, + "objective/train/value_max": -0.0005726814270019531, + "objective/train/value_min": -0.29736328125, + "objective/train/value_reward_corr": 0.55161832441128, + "objective/train/value_std": 0.011138916015625, + "objective/train/weight_avg": 1.0016599893569946, + "objective/train/weighted_lm_loss": 2.0043511390686035, + "objective/train/weights_max": 1.3048828840255737, + "objective/train/weights_min": 0.3703209161758423, + "theoretical_loss": 4.254603266264572, + "tokens_seen": 255590400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009318728935965335, + "loss": 0.0987, + "theoretical_loss": 4.254603266264572, + "tokens_seen": 255590400 + }, + { + "epoch": 0.08, + "learning_rate": 0.000931792649654951, + "loss": 0.0957, + "theoretical_loss": 4.254081734576458, + "tokens_seen": 255852544 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009317124057133687, + "loss": 0.0968, + "theoretical_loss": 4.253560886416212, + "tokens_seen": 256114688 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009316321617717862, + "loss": 0.0972, + "theoretical_loss": 4.253040720189746, + "tokens_seen": 256376832 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009315519178302038, + "loss": 0.0973, + "theoretical_loss": 4.252521234308315, + "tokens_seen": 256638976 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009314716738886215, + "loss": 0.0967, + "theoretical_loss": 4.2520024271884935, + "tokens_seen": 256901120 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009313914299470391, + "loss": 0.0981, + "theoretical_loss": 4.251484297252151, + "tokens_seen": 257163264 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009313111860054566, + "loss": 0.1018, + "theoretical_loss": 4.250966842926434, + "tokens_seen": 257425408 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009312309420638742, + "loss": 0.0991, + "theoretical_loss": 4.250450062643734, + "tokens_seen": 257687552 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009311506981222918, + "loss": 0.0955, + "theoretical_loss": 4.249933954841672, + "tokens_seen": 257949696 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009310704541807093, + "loss": 0.1025, + "theoretical_loss": 4.2494185179630755, + "tokens_seen": 258211840 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930990210239127, + "loss": 0.0993, + "theoretical_loss": 4.24890375045595, + "tokens_seen": 258473984 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009309099662975445, + "loss": 0.0974, + "theoretical_loss": 4.248389650773463, + "tokens_seen": 258736128 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0018602707423269749, + "objective/train/docs_used": 100739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8192548751831055, + "objective/train/original_loss": 1.8192548751831055, + "objective/train/theoretical_loss": 4.24813285088448, + "objective/train/tokens_used": 279327200, + "objective/train/value_avg": -0.00965118408203125, + "objective/train/value_loss": 0.00019530275312718004, + "objective/train/value_max": -0.0006694793701171875, + "objective/train/value_min": -0.4150390625, + "objective/train/value_reward_corr": 0.596449167871979, + "objective/train/value_std": 0.01064300537109375, + "objective/train/weight_avg": 1.0019567012786865, + "objective/train/weighted_lm_loss": 1.8218411207199097, + "objective/train/weights_max": 1.1794391870498657, + "objective/train/weights_min": 0.8217251300811768, + "theoretical_loss": 4.24813285088448, + "tokens_seen": 258867200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009308297223559621, + "loss": 0.099, + "theoretical_loss": 4.24787621737392, + "tokens_seen": 258998272 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009307494784143798, + "loss": 0.0972, + "theoretical_loss": 4.247363448720739, + "tokens_seen": 259260416 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009306692344727973, + "loss": 0.0988, + "theoretical_loss": 4.246851343282432, + "tokens_seen": 259522560 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930588990531215, + "loss": 0.1011, + "theoretical_loss": 4.246339899532582, + "tokens_seen": 259784704 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009305087465896325, + "loss": 0.0971, + "theoretical_loss": 4.245829115949818, + "tokens_seen": 260046848 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009304285026480501, + "loss": 0.1012, + "theoretical_loss": 4.245318991017802, + "tokens_seen": 260308992 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009303482587064677, + "loss": 0.0982, + "theoretical_loss": 4.244809523225195, + "tokens_seen": 260571136 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009302680147648853, + "loss": 0.1, + "theoretical_loss": 4.244300711065646, + "tokens_seen": 260833280 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009301877708233028, + "loss": 0.0938, + "theoretical_loss": 4.243792553037767, + "tokens_seen": 261095424 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009301075268817204, + "loss": 0.0983, + "theoretical_loss": 4.243285047645106, + "tokens_seen": 261357568 + }, + { + "epoch": 0.08, + "learning_rate": 0.000930027282940138, + "loss": 0.1027, + "theoretical_loss": 4.242778193396136, + "tokens_seen": 261619712 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009299470389985556, + "loss": 0.0991, + "theoretical_loss": 4.242271988804228, + "tokens_seen": 261881856 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.003497189376503229, + "objective/train/docs_used": 101934, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0956976413726807, + "objective/train/original_loss": 2.0956978797912598, + "objective/train/theoretical_loss": 4.241766432387629, + "objective/train/tokens_used": 282604000, + "objective/train/value_avg": -0.0075836181640625, + "objective/train/value_loss": 0.00010112895688507706, + "objective/train/value_max": -0.00064849853515625, + "objective/train/value_min": -0.1431884765625, + "objective/train/value_reward_corr": 0.3128660984446543, + "objective/train/value_std": 0.005107879638671875, + "objective/train/weight_avg": 1.0035475492477417, + "objective/train/weighted_lm_loss": 2.104475498199463, + "objective/train/weights_max": 1.1146957874298096, + "objective/train/weights_min": 0.901778519153595, + "theoretical_loss": 4.241766432387629, + "tokens_seen": 262144000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009298667950569733, + "loss": 0.0955, + "theoretical_loss": 4.241766432387629, + "tokens_seen": 262144000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009297865511153908, + "loss": 0.0972, + "theoretical_loss": 4.241261522669445, + "tokens_seen": 262406144 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009297063071738084, + "loss": 0.0996, + "theoretical_loss": 4.240757258177617, + "tokens_seen": 262668288 + }, + { + "epoch": 0.08, + "learning_rate": 0.000929626063232226, + "loss": 0.0959, + "theoretical_loss": 4.240253637444903, + "tokens_seen": 262930432 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009295458192906435, + "loss": 0.0955, + "theoretical_loss": 4.239750659008854, + "tokens_seen": 263192576 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009294655753490611, + "loss": 0.0971, + "theoretical_loss": 4.2392483214117975, + "tokens_seen": 263454720 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009293853314074787, + "loss": 0.0999, + "theoretical_loss": 4.238746623200815, + "tokens_seen": 263716864 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009293050874658963, + "loss": 0.0985, + "theoretical_loss": 4.238245562927722, + "tokens_seen": 263979008 + }, + { + "epoch": 0.08, + "learning_rate": 0.000929224843524314, + "loss": 0.0991, + "theoretical_loss": 4.237745139149047, + "tokens_seen": 264241152 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009291445995827316, + "loss": 0.0964, + "theoretical_loss": 4.237245350426015, + "tokens_seen": 264503296 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009290643556411491, + "loss": 0.0942, + "theoretical_loss": 4.236746195324523, + "tokens_seen": 264765440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009289841116995667, + "loss": 0.1008, + "theoretical_loss": 4.2362476724151215, + "tokens_seen": 265027584 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009289038677579843, + "loss": 0.0977, + "theoretical_loss": 4.235749780272998, + "tokens_seen": 265289728 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0023133521899580956, + "objective/train/docs_used": 103026, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7841287851333618, + "objective/train/original_loss": 1.7841286659240723, + "objective/train/theoretical_loss": 4.235501070295674, + "objective/train/tokens_used": 285880800, + "objective/train/value_avg": -0.007236480712890625, + "objective/train/value_loss": 0.00020630829385481775, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.1910400390625, + "objective/train/value_reward_corr": 0.3257111556668698, + "objective/train/value_std": 0.00516510009765625, + "objective/train/weight_avg": 1.0024032592773438, + "objective/train/weighted_lm_loss": 1.7887521982192993, + "objective/train/weights_max": 1.092002034187317, + "objective/train/weights_min": 0.36931154131889343, + "theoretical_loss": 4.235501070295674, + "tokens_seen": 265420800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009288236238164018, + "loss": 0.1003, + "theoretical_loss": 4.235252517477956, + "tokens_seen": 265551872 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009287433798748195, + "loss": 0.0946, + "theoretical_loss": 4.23475588261439, + "tokens_seen": 265814016 + }, + { + "epoch": 0.08, + "learning_rate": 0.000928663135933237, + "loss": 0.0971, + "theoretical_loss": 4.234259874271275, + "tokens_seen": 266076160 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009285828919916546, + "loss": 0.0972, + "theoretical_loss": 4.23376449104214, + "tokens_seen": 266338304 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009285026480500723, + "loss": 0.1004, + "theoretical_loss": 4.233269731525055, + "tokens_seen": 266600448 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009284224041084899, + "loss": 0.0981, + "theoretical_loss": 4.232775594322605, + "tokens_seen": 266862592 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009283421601669074, + "loss": 0.0975, + "theoretical_loss": 4.232282078041876, + "tokens_seen": 267124736 + }, + { + "epoch": 0.08, + "learning_rate": 0.000928261916225325, + "loss": 0.0978, + "theoretical_loss": 4.231789181294436, + "tokens_seen": 267386880 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009281816722837426, + "loss": 0.1, + "theoretical_loss": 4.231296902696314, + "tokens_seen": 267649024 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009281014283421601, + "loss": 0.0954, + "theoretical_loss": 4.230805240867982, + "tokens_seen": 267911168 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009280211844005778, + "loss": 0.0972, + "theoretical_loss": 4.230314194434336, + "tokens_seen": 268173312 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009279409404589953, + "loss": 0.0976, + "theoretical_loss": 4.229823762024681, + "tokens_seen": 268435456 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.001721393782645464, + "objective/train/docs_used": 104173, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.79710054397583, + "objective/train/original_loss": 1.79710054397583, + "objective/train/theoretical_loss": 4.2293339422727065, + "objective/train/tokens_used": 289157600, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.00033259327756240964, + "objective/train/value_max": -0.0006589889526367188, + "objective/train/value_min": -0.2215576171875, + "objective/train/value_reward_corr": 0.6127857051291834, + "objective/train/value_std": 0.0121307373046875, + "objective/train/weight_avg": 1.0018748044967651, + "objective/train/weighted_lm_loss": 1.7999058961868286, + "objective/train/weights_max": 1.2480190992355347, + "objective/train/weights_min": 0.40334662795066833, + "theoretical_loss": 4.2293339422727065, + "tokens_seen": 268697600 + }, + { + "epoch": 0.08, + "learning_rate": 0.000927860696517413, + "loss": 0.0974, + "theoretical_loss": 4.2293339422727065, + "tokens_seen": 268697600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009277804525758306, + "loss": 0.0988, + "theoretical_loss": 4.228844733816474, + "tokens_seen": 268959744 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009277002086342481, + "loss": 0.0948, + "theoretical_loss": 4.228356135298394, + "tokens_seen": 269221888 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009276199646926658, + "loss": 0.0941, + "theoretical_loss": 4.227868145365211, + "tokens_seen": 269484032 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009275397207510833, + "loss": 0.0966, + "theoretical_loss": 4.227380762667987, + "tokens_seen": 269746176 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009274594768095009, + "loss": 0.0953, + "theoretical_loss": 4.226893985862076, + "tokens_seen": 270008320 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009273792328679185, + "loss": 0.099, + "theoretical_loss": 4.226407813607116, + "tokens_seen": 270270464 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009272989889263361, + "loss": 0.0969, + "theoretical_loss": 4.2259222445670055, + "tokens_seen": 270532608 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009272187449847536, + "loss": 0.0936, + "theoretical_loss": 4.225437277409885, + "tokens_seen": 270794752 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009271385010431712, + "loss": 0.0955, + "theoretical_loss": 4.224952910808122, + "tokens_seen": 271056896 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009270582571015889, + "loss": 0.0976, + "theoretical_loss": 4.224469143438294, + "tokens_seen": 271319040 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009269780131600064, + "loss": 0.1001, + "theoretical_loss": 4.223985973981171, + "tokens_seen": 271581184 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009268977692184241, + "loss": 0.0996, + "theoretical_loss": 4.223503401121693, + "tokens_seen": 271843328 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002592921955510974, + "objective/train/docs_used": 105443, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8855316638946533, + "objective/train/original_loss": 1.8855319023132324, + "objective/train/theoretical_loss": 4.223262338006254, + "objective/train/tokens_used": 292434400, + "objective/train/value_avg": -0.00720977783203125, + "objective/train/value_loss": 0.00012640572094824165, + "objective/train/value_max": -0.00036835670471191406, + "objective/train/value_min": -0.2186279296875, + "objective/train/value_reward_corr": 0.5305388442877321, + "objective/train/value_std": 0.00785064697265625, + "objective/train/weight_avg": 1.0026540756225586, + "objective/train/weighted_lm_loss": 1.8909047842025757, + "objective/train/weights_max": 1.1803033351898193, + "objective/train/weights_min": 0.4927148222923279, + "theoretical_loss": 4.223262338006254, + "tokens_seen": 271974400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009268175252768416, + "loss": 0.0944, + "theoretical_loss": 4.223021423548962, + "tokens_seen": 272105472 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009267372813352593, + "loss": 0.0964, + "theoretical_loss": 4.222540039956215, + "tokens_seen": 272367616 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009266570373936768, + "loss": 0.0953, + "theoretical_loss": 4.222059249040814, + "tokens_seen": 272629760 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009265767934520943, + "loss": 0.0991, + "theoretical_loss": 4.2215790495042285, + "tokens_seen": 272891904 + }, + { + "epoch": 0.08, + "learning_rate": 0.000926496549510512, + "loss": 0.0986, + "theoretical_loss": 4.221099440052014, + "tokens_seen": 273154048 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009264163055689295, + "loss": 0.0959, + "theoretical_loss": 4.220620419393799, + "tokens_seen": 273416192 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009263360616273471, + "loss": 0.0982, + "theoretical_loss": 4.220141986243268, + "tokens_seen": 273678336 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009262558176857648, + "loss": 0.0984, + "theoretical_loss": 4.219664139318145, + "tokens_seen": 273940480 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009261755737441824, + "loss": 0.0973, + "theoretical_loss": 4.219186877340174, + "tokens_seen": 274202624 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009260953298025999, + "loss": 0.0947, + "theoretical_loss": 4.218710199035108, + "tokens_seen": 274464768 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009260150858610175, + "loss": 0.0947, + "theoretical_loss": 4.218234103132686, + "tokens_seen": 274726912 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009259348419194351, + "loss": 0.0974, + "theoretical_loss": 4.217758588366623, + "tokens_seen": 274989056 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.002839514520019293, + "objective/train/docs_used": 106703, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8725626468658447, + "objective/train/original_loss": 1.8725626468658447, + "objective/train/theoretical_loss": 4.217283653474588, + "objective/train/tokens_used": 295711200, + "objective/train/value_avg": -0.01064300537109375, + "objective/train/value_loss": 0.00048408194561488926, + "objective/train/value_max": -0.0005173683166503906, + "objective/train/value_min": -0.8681640625, + "objective/train/value_reward_corr": 0.614751588044035, + "objective/train/value_std": 0.016937255859375, + "objective/train/weight_avg": 1.003056287765503, + "objective/train/weighted_lm_loss": 1.8782776594161987, + "objective/train/weights_max": 1.6007287502288818, + "objective/train/weights_min": 0.3700864911079407, + "theoretical_loss": 4.217283653474588, + "tokens_seen": 275251200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009258545979778526, + "loss": 0.094, + "theoretical_loss": 4.217283653474588, + "tokens_seen": 275251200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009257743540362703, + "loss": 0.0967, + "theoretical_loss": 4.216809297198195, + "tokens_seen": 275513344 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009256941100946878, + "loss": 0.0972, + "theoretical_loss": 4.21633551828298, + "tokens_seen": 275775488 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009256138661531054, + "loss": 0.0984, + "theoretical_loss": 4.215862315478388, + "tokens_seen": 276037632 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009255336222115231, + "loss": 0.0994, + "theoretical_loss": 4.2153896875377574, + "tokens_seen": 276299776 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009254533782699407, + "loss": 0.0979, + "theoretical_loss": 4.214917633218304, + "tokens_seen": 276561920 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009253731343283583, + "loss": 0.098, + "theoretical_loss": 4.214446151281106, + "tokens_seen": 276824064 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009252928903867758, + "loss": 0.0947, + "theoretical_loss": 4.213975240491084, + "tokens_seen": 277086208 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009252126464451934, + "loss": 0.0965, + "theoretical_loss": 4.213504899616995, + "tokens_seen": 277348352 + }, + { + "epoch": 0.08, + "learning_rate": 0.000925132402503611, + "loss": 0.0967, + "theoretical_loss": 4.213035127431402, + "tokens_seen": 277610496 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009250521585620286, + "loss": 0.0971, + "theoretical_loss": 4.212565922710677, + "tokens_seen": 277872640 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009249719146204461, + "loss": 0.0967, + "theoretical_loss": 4.21209728423497, + "tokens_seen": 278134784 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009248916706788639, + "loss": 0.0989, + "theoretical_loss": 4.2116292107882, + "tokens_seen": 278396928 + }, + { + "epoch": 0.08, + "objective/train/advantage_avg": 0.0019138733623549342, + "objective/train/docs_used": 107929, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.952666163444519, + "objective/train/original_loss": 1.9526660442352295, + "objective/train/theoretical_loss": 4.211395385571668, + "objective/train/tokens_used": 298988000, + "objective/train/value_avg": -0.00894927978515625, + "objective/train/value_loss": 0.0007483014487661421, + "objective/train/value_max": -0.00033664703369140625, + "objective/train/value_min": -0.8779296875, + "objective/train/value_reward_corr": 0.5763799754051704, + "objective/train/value_std": 0.0159759521484375, + "objective/train/weight_avg": 1.002224087715149, + "objective/train/weighted_lm_loss": 1.956479549407959, + "objective/train/weights_max": 1.5199860334396362, + "objective/train/weights_min": 0.37053996324539185, + "theoretical_loss": 4.211395385571668, + "tokens_seen": 278528000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009248114267372814, + "loss": 0.0977, + "theoretical_loss": 4.211161701158042, + "tokens_seen": 278659072 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009247311827956989, + "loss": 0.0988, + "theoretical_loss": 4.2106947541359085, + "tokens_seen": 278921216 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009246509388541166, + "loss": 0.0962, + "theoretical_loss": 4.210228368516935, + "tokens_seen": 279183360 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009245706949125341, + "loss": 0.1018, + "theoretical_loss": 4.209762543099966, + "tokens_seen": 279445504 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009244904509709517, + "loss": 0.0973, + "theoretical_loss": 4.209297276687541, + "tokens_seen": 279707648 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009244102070293693, + "loss": 0.1001, + "theoretical_loss": 4.2088325680858745, + "tokens_seen": 279969792 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009243299630877869, + "loss": 0.0956, + "theoretical_loss": 4.208368416104849, + "tokens_seen": 280231936 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009242497191462045, + "loss": 0.0991, + "theoretical_loss": 4.207904819557995, + "tokens_seen": 280494080 + }, + { + "epoch": 0.09, + "learning_rate": 0.000924169475204622, + "loss": 0.0967, + "theoretical_loss": 4.207441777262477, + "tokens_seen": 280756224 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009240892312630397, + "loss": 0.0966, + "theoretical_loss": 4.206979288039081, + "tokens_seen": 281018368 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009240089873214573, + "loss": 0.0953, + "theoretical_loss": 4.206517350712199, + "tokens_seen": 281280512 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009239287433798749, + "loss": 0.0974, + "theoretical_loss": 4.206055964109813, + "tokens_seen": 281542656 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.002816372085362673, + "objective/train/docs_used": 109222, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8340928554534912, + "objective/train/original_loss": 1.8340927362442017, + "objective/train/theoretical_loss": 4.205595127063485, + "objective/train/tokens_used": 302264800, + "objective/train/value_avg": -0.00853729248046875, + "objective/train/value_loss": 0.0001849338150350377, + "objective/train/value_max": -0.0005483627319335938, + "objective/train/value_min": -0.266357421875, + "objective/train/value_reward_corr": 0.5370670101322965, + "objective/train/value_std": 0.0100250244140625, + "objective/train/weight_avg": 1.0029041767120361, + "objective/train/weighted_lm_loss": 1.8380697965621948, + "objective/train/weights_max": 1.208823800086975, + "objective/train/weights_min": 0.3825574517250061, + "theoretical_loss": 4.205595127063485, + "tokens_seen": 281804800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009238484994382924, + "loss": 0.0958, + "theoretical_loss": 4.205595127063485, + "tokens_seen": 281804800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009237682554967101, + "loss": 0.0947, + "theoretical_loss": 4.205134838408337, + "tokens_seen": 282066944 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009236880115551276, + "loss": 0.0966, + "theoretical_loss": 4.20467509698304, + "tokens_seen": 282329088 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009236077676135451, + "loss": 0.0962, + "theoretical_loss": 4.204215901629803, + "tokens_seen": 282591232 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009235275236719628, + "loss": 0.0969, + "theoretical_loss": 4.203757251194353, + "tokens_seen": 282853376 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009234472797303803, + "loss": 0.096, + "theoretical_loss": 4.203299144525923, + "tokens_seen": 283115520 + }, + { + "epoch": 0.09, + "learning_rate": 0.000923367035788798, + "loss": 0.0966, + "theoretical_loss": 4.202841580477241, + "tokens_seen": 283377664 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009232867918472156, + "loss": 0.0995, + "theoretical_loss": 4.202384557904513, + "tokens_seen": 283639808 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009232065479056332, + "loss": 0.096, + "theoretical_loss": 4.201928075667411, + "tokens_seen": 283901952 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009231263039640507, + "loss": 0.0955, + "theoretical_loss": 4.201472132629057, + "tokens_seen": 284164096 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009230460600224683, + "loss": 0.1011, + "theoretical_loss": 4.201016727656012, + "tokens_seen": 284426240 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009229658160808859, + "loss": 0.0984, + "theoretical_loss": 4.2005618596182615, + "tokens_seen": 284688384 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009228855721393035, + "loss": 0.0986, + "theoretical_loss": 4.200107527389202, + "tokens_seen": 284950528 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.00423348369076848, + "objective/train/docs_used": 110428, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.866652250289917, + "objective/train/original_loss": 1.866652488708496, + "objective/train/theoretical_loss": 4.1998805618517965, + "objective/train/tokens_used": 305541600, + "objective/train/value_avg": -0.00862884521484375, + "objective/train/value_loss": 0.00014239439042285085, + "objective/train/value_max": -0.0005507469177246094, + "objective/train/value_min": -0.1759033203125, + "objective/train/value_reward_corr": 0.4883459042398012, + "objective/train/value_std": 0.0081787109375, + "objective/train/weight_avg": 1.0043009519577026, + "objective/train/weighted_lm_loss": 1.8749480247497559, + "objective/train/weights_max": 1.1676921844482422, + "objective/train/weights_min": 0.3836681544780731, + "theoretical_loss": 4.1998805618517965, + "tokens_seen": 285081600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009228053281977211, + "loss": 0.0967, + "theoretical_loss": 4.199653729845626, + "tokens_seen": 285212672 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009227250842561386, + "loss": 0.0968, + "theoretical_loss": 4.199200465867714, + "tokens_seen": 285474816 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009226448403145564, + "loss": 0.0961, + "theoretical_loss": 4.198747734339013, + "tokens_seen": 285736960 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009225645963729739, + "loss": 0.0945, + "theoretical_loss": 4.198295534146429, + "tokens_seen": 285999104 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009224843524313914, + "loss": 0.0952, + "theoretical_loss": 4.197843864180214, + "tokens_seen": 286261248 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009224041084898091, + "loss": 0.0955, + "theoretical_loss": 4.197392723333951, + "tokens_seen": 286523392 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009223238645482266, + "loss": 0.098, + "theoretical_loss": 4.196942110504538, + "tokens_seen": 286785536 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009222436206066442, + "loss": 0.0955, + "theoretical_loss": 4.196492024592183, + "tokens_seen": 287047680 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009221633766650618, + "loss": 0.0951, + "theoretical_loss": 4.196042464500382, + "tokens_seen": 287309824 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009220831327234794, + "loss": 0.0948, + "theoretical_loss": 4.195593429135916, + "tokens_seen": 287571968 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009220028887818969, + "loss": 0.1016, + "theoretical_loss": 4.195144917408828, + "tokens_seen": 287834112 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009219226448403146, + "loss": 0.095, + "theoretical_loss": 4.194696928232417, + "tokens_seen": 288096256 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.003713709767907858, + "objective/train/docs_used": 111591, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8504618406295776, + "objective/train/original_loss": 1.850461721420288, + "objective/train/theoretical_loss": 4.194249460523222, + "objective/train/tokens_used": 308818400, + "objective/train/value_avg": -0.0093841552734375, + "objective/train/value_loss": 0.0001626669109100476, + "objective/train/value_max": -0.00070953369140625, + "objective/train/value_min": -0.247802734375, + "objective/train/value_reward_corr": 0.5734128643584034, + "objective/train/value_std": 0.01049041748046875, + "objective/train/weight_avg": 1.0037906169891357, + "objective/train/weighted_lm_loss": 1.857253909111023, + "objective/train/weights_max": 1.1863479614257812, + "objective/train/weights_min": 0.3687554597854614, + "theoretical_loss": 4.194249460523222, + "tokens_seen": 288358400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009218424008987322, + "loss": 0.0936, + "theoretical_loss": 4.194249460523222, + "tokens_seen": 288358400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009217621569571497, + "loss": 0.0967, + "theoretical_loss": 4.193802513201015, + "tokens_seen": 288620544 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009216819130155674, + "loss": 0.0998, + "theoretical_loss": 4.193356085188778, + "tokens_seen": 288882688 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009216016690739849, + "loss": 0.0987, + "theoretical_loss": 4.1929101754127025, + "tokens_seen": 289144832 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009215214251324026, + "loss": 0.0962, + "theoretical_loss": 4.192464782802167, + "tokens_seen": 289406976 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009214411811908201, + "loss": 0.096, + "theoretical_loss": 4.192019906289733, + "tokens_seen": 289669120 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009213609372492377, + "loss": 0.099, + "theoretical_loss": 4.1915755448111245, + "tokens_seen": 289931264 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009212806933076553, + "loss": 0.0974, + "theoretical_loss": 4.191131697305222, + "tokens_seen": 290193408 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009212004493660728, + "loss": 0.0959, + "theoretical_loss": 4.1906883627140505, + "tokens_seen": 290455552 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009211202054244905, + "loss": 0.0929, + "theoretical_loss": 4.19024553998276, + "tokens_seen": 290717696 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009210399614829081, + "loss": 0.0956, + "theoretical_loss": 4.189803228059623, + "tokens_seen": 290979840 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009209597175413257, + "loss": 0.096, + "theoretical_loss": 4.189361425896016, + "tokens_seen": 291241984 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009208794735997432, + "loss": 0.0929, + "theoretical_loss": 4.188920132446411, + "tokens_seen": 291504128 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0008069810573942959, + "objective/train/docs_used": 112844, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8364742994308472, + "objective/train/original_loss": 1.8364744186401367, + "objective/train/theoretical_loss": 4.188699676163473, + "objective/train/tokens_used": 312095200, + "objective/train/value_avg": -0.008056640625, + "objective/train/value_loss": 0.00030392000917345285, + "objective/train/value_max": -0.0004012584686279297, + "objective/train/value_min": -0.474609375, + "objective/train/value_reward_corr": 0.44846135234212026, + "objective/train/value_std": 0.00936126708984375, + "objective/train/weight_avg": 1.0009410381317139, + "objective/train/weighted_lm_loss": 1.8375290632247925, + "objective/train/weights_max": 1.5633509159088135, + "objective/train/weights_min": 0.3711496591567993, + "theoretical_loss": 4.188699676163473, + "tokens_seen": 291635200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009207992296581609, + "loss": 0.0937, + "theoretical_loss": 4.188479346668359, + "tokens_seen": 291766272 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009207189857165784, + "loss": 0.0943, + "theoretical_loss": 4.188039067522484, + "tokens_seen": 292028416 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009206387417749959, + "loss": 0.0952, + "theoretical_loss": 4.18759929397247, + "tokens_seen": 292290560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009205584978334136, + "loss": 0.0959, + "theoretical_loss": 4.187160024985044, + "tokens_seen": 292552704 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009204782538918311, + "loss": 0.0975, + "theoretical_loss": 4.1867212595299685, + "tokens_seen": 292814848 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009203980099502489, + "loss": 0.0961, + "theoretical_loss": 4.186282996580034, + "tokens_seen": 293076992 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009203177660086664, + "loss": 0.0984, + "theoretical_loss": 4.185845235111037, + "tokens_seen": 293339136 + }, + { + "epoch": 0.09, + "learning_rate": 0.000920237522067084, + "loss": 0.0962, + "theoretical_loss": 4.185407974101779, + "tokens_seen": 293601280 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009201572781255016, + "loss": 0.095, + "theoretical_loss": 4.184971212534048, + "tokens_seen": 293863424 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009200770341839191, + "loss": 0.096, + "theoretical_loss": 4.184534949392611, + "tokens_seen": 294125568 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009199967902423367, + "loss": 0.0951, + "theoretical_loss": 4.184099183665199, + "tokens_seen": 294387712 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009199165463007543, + "loss": 0.0985, + "theoretical_loss": 4.1836639143425, + "tokens_seen": 294649856 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.002410832792520523, + "objective/train/docs_used": 114046, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9235732555389404, + "objective/train/original_loss": 1.92357337474823, + "objective/train/theoretical_loss": 4.1832291404181445, + "objective/train/tokens_used": 315372000, + "objective/train/value_avg": -0.00838470458984375, + "objective/train/value_loss": 0.00013771136582363397, + "objective/train/value_max": -0.0008459091186523438, + "objective/train/value_min": -0.28466796875, + "objective/train/value_reward_corr": 0.5819581613531781, + "objective/train/value_std": 0.0088043212890625, + "objective/train/weight_avg": 1.0024785995483398, + "objective/train/weighted_lm_loss": 1.928615689277649, + "objective/train/weights_max": 1.112996220588684, + "objective/train/weights_min": 0.6144814491271973, + "theoretical_loss": 4.1832291404181445, + "tokens_seen": 294912000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009198363023591719, + "loss": 0.0955, + "theoretical_loss": 4.1832291404181445, + "tokens_seen": 294912000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009197560584175894, + "loss": 0.0958, + "theoretical_loss": 4.182794860888696, + "tokens_seen": 295174144 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009196758144760072, + "loss": 0.0974, + "theoretical_loss": 4.18236107475364, + "tokens_seen": 295436288 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009195955705344247, + "loss": 0.0974, + "theoretical_loss": 4.18192778101537, + "tokens_seen": 295698432 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009195153265928422, + "loss": 0.0943, + "theoretical_loss": 4.181494978679181, + "tokens_seen": 295960576 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009194350826512599, + "loss": 0.0952, + "theoretical_loss": 4.181062666753256, + "tokens_seen": 296222720 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009193548387096774, + "loss": 0.0945, + "theoretical_loss": 4.180630844248653, + "tokens_seen": 296484864 + }, + { + "epoch": 0.09, + "learning_rate": 0.000919274594768095, + "loss": 0.0977, + "theoretical_loss": 4.180199510179299, + "tokens_seen": 296747008 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009191943508265126, + "loss": 0.0951, + "theoretical_loss": 4.179768663561975, + "tokens_seen": 297009152 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009191141068849302, + "loss": 0.0925, + "theoretical_loss": 4.1793383034163085, + "tokens_seen": 297271296 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009190338629433478, + "loss": 0.0942, + "theoretical_loss": 4.178908428764759, + "tokens_seen": 297533440 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009189536190017654, + "loss": 0.0948, + "theoretical_loss": 4.17847903863261, + "tokens_seen": 297795584 + }, + { + "epoch": 0.09, + "learning_rate": 0.000918873375060183, + "loss": 0.0953, + "theoretical_loss": 4.178050132047958, + "tokens_seen": 298057728 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0007046199752949178, + "objective/train/docs_used": 115247, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8377935886383057, + "objective/train/original_loss": 1.8377935886383057, + "objective/train/theoretical_loss": 4.1778358597829905, + "objective/train/tokens_used": 318648800, + "objective/train/value_avg": -0.0088348388671875, + "objective/train/value_loss": 0.0002799044887069613, + "objective/train/value_max": -0.00047469139099121094, + "objective/train/value_min": -0.80615234375, + "objective/train/value_reward_corr": 0.4904552822474981, + "objective/train/value_std": 0.01067352294921875, + "objective/train/weight_avg": 1.0008306503295898, + "objective/train/weighted_lm_loss": 1.8391979932785034, + "objective/train/weights_max": 1.235874056816101, + "objective/train/weights_min": 0.37098827958106995, + "theoretical_loss": 4.1778358597829905, + "tokens_seen": 298188800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009187931311186006, + "loss": 0.0941, + "theoretical_loss": 4.177621708041703, + "tokens_seen": 298319872 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009187128871770182, + "loss": 0.0969, + "theoretical_loss": 4.177193765647534, + "tokens_seen": 298582016 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009186326432354357, + "loss": 0.0977, + "theoretical_loss": 4.176766303901922, + "tokens_seen": 298844160 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009185523992938534, + "loss": 0.0959, + "theoretical_loss": 4.17633932184411, + "tokens_seen": 299106304 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009184721553522709, + "loss": 0.0955, + "theoretical_loss": 4.1759128185161005, + "tokens_seen": 299368448 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009183919114106885, + "loss": 0.0921, + "theoretical_loss": 4.175486792962646, + "tokens_seen": 299630592 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009183116674691061, + "loss": 0.0956, + "theoretical_loss": 4.175061244231237, + "tokens_seen": 299892736 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009182314235275236, + "loss": 0.0972, + "theoretical_loss": 4.174636171372097, + "tokens_seen": 300154880 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009181511795859412, + "loss": 0.0917, + "theoretical_loss": 4.174211573438166, + "tokens_seen": 300417024 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009180709356443589, + "loss": 0.0939, + "theoretical_loss": 4.173787449485094, + "tokens_seen": 300679168 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009179906917027765, + "loss": 0.098, + "theoretical_loss": 4.17336379857123, + "tokens_seen": 300941312 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009179104477611941, + "loss": 0.0945, + "theoretical_loss": 4.172940619757611, + "tokens_seen": 301203456 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0009184295777231455, + "objective/train/docs_used": 116418, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7909634113311768, + "objective/train/original_loss": 1.7909634113311768, + "objective/train/theoretical_loss": 4.172517912107954, + "objective/train/tokens_used": 321925600, + "objective/train/value_avg": -0.0111846923828125, + "objective/train/value_loss": 0.00032702440512366593, + "objective/train/value_max": -0.0006213188171386719, + "objective/train/value_min": -0.42529296875, + "objective/train/value_reward_corr": 0.6038215415930122, + "objective/train/value_std": 0.013702392578125, + "objective/train/weight_avg": 1.0010701417922974, + "objective/train/weighted_lm_loss": 1.792219877243042, + "objective/train/weights_max": 1.2377305030822754, + "objective/train/weights_min": 0.3794703483581543, + "theoretical_loss": 4.172517912107954, + "tokens_seen": 301465600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009178302038196117, + "loss": 0.0937, + "theoretical_loss": 4.172517912107954, + "tokens_seen": 301465600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009177499598780292, + "loss": 0.0927, + "theoretical_loss": 4.172095674688645, + "tokens_seen": 301727744 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009176697159364468, + "loss": 0.0956, + "theoretical_loss": 4.171673906568729, + "tokens_seen": 301989888 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009175894719948644, + "loss": 0.0972, + "theoretical_loss": 4.171252606819899, + "tokens_seen": 302252032 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009175092280532819, + "loss": 0.0951, + "theoretical_loss": 4.170831774516489, + "tokens_seen": 302514176 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009174289841116997, + "loss": 0.0955, + "theoretical_loss": 4.170411408735461, + "tokens_seen": 302776320 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009173487401701172, + "loss": 0.0954, + "theoretical_loss": 4.169991508556398, + "tokens_seen": 303038464 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009172684962285348, + "loss": 0.0944, + "theoretical_loss": 4.169572073061493, + "tokens_seen": 303300608 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009171882522869524, + "loss": 0.0975, + "theoretical_loss": 4.16915310133554, + "tokens_seen": 303562752 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009171080083453699, + "loss": 0.0963, + "theoretical_loss": 4.1687345924659205, + "tokens_seen": 303824896 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009170277644037875, + "loss": 0.094, + "theoretical_loss": 4.168316545542602, + "tokens_seen": 304087040 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009169475204622051, + "loss": 0.0935, + "theoretical_loss": 4.167898959658121, + "tokens_seen": 304349184 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009168672765206227, + "loss": 0.0902, + "theoretical_loss": 4.167481833907576, + "tokens_seen": 304611328 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0005431807949207723, + "objective/train/docs_used": 117554, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7858117818832397, + "objective/train/original_loss": 1.7858116626739502, + "objective/train/theoretical_loss": 4.167273443300447, + "objective/train/tokens_used": 325202400, + "objective/train/value_avg": -0.0095367431640625, + "objective/train/value_loss": 0.00023923047410789877, + "objective/train/value_max": -0.0003447532653808594, + "objective/train/value_min": -0.414794921875, + "objective/train/value_reward_corr": 0.6826350512891199, + "objective/train/value_std": 0.01375579833984375, + "objective/train/weight_avg": 1.0006574392318726, + "objective/train/weighted_lm_loss": 1.7867764234542847, + "objective/train/weights_max": 1.3765175342559814, + "objective/train/weights_min": 0.3716852366924286, + "theoretical_loss": 4.167273443300447, + "tokens_seen": 304742400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009167870325790402, + "loss": 0.0982, + "theoretical_loss": 4.16706516738862, + "tokens_seen": 304873472 + }, + { + "epoch": 0.09, + "learning_rate": 0.000916706788637458, + "loss": 0.0941, + "theoretical_loss": 4.166648959201449, + "tokens_seen": 305135616 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009166265446958755, + "loss": 0.0978, + "theoretical_loss": 4.166233208448794, + "tokens_seen": 305397760 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009165463007542931, + "loss": 0.0968, + "theoretical_loss": 4.165817914235908, + "tokens_seen": 305659904 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009164660568127107, + "loss": 0.0952, + "theoretical_loss": 4.165403075670562, + "tokens_seen": 305922048 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009163858128711282, + "loss": 0.0942, + "theoretical_loss": 4.164988691863032, + "tokens_seen": 306184192 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009163055689295459, + "loss": 0.0944, + "theoretical_loss": 4.164574761926092, + "tokens_seen": 306446336 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009162253249879634, + "loss": 0.0949, + "theoretical_loss": 4.164161284975005, + "tokens_seen": 306708480 + }, + { + "epoch": 0.09, + "learning_rate": 0.000916145081046381, + "loss": 0.0941, + "theoretical_loss": 4.1637482601275115, + "tokens_seen": 306970624 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009160648371047986, + "loss": 0.0983, + "theoretical_loss": 4.163335686503822, + "tokens_seen": 307232768 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009159845931632162, + "loss": 0.0961, + "theoretical_loss": 4.162923563226607, + "tokens_seen": 307494912 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009159043492216338, + "loss": 0.0935, + "theoretical_loss": 4.1625118894209905, + "tokens_seen": 307757056 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.0024794533383101225, + "objective/train/docs_used": 118729, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.930225133895874, + "objective/train/original_loss": 1.930225133895874, + "objective/train/theoretical_loss": 4.16210066421454, + "objective/train/tokens_used": 328479200, + "objective/train/value_avg": -0.01091766357421875, + "objective/train/value_loss": 0.0003562370839063078, + "objective/train/value_max": -0.0003101825714111328, + "objective/train/value_min": -0.7587890625, + "objective/train/value_reward_corr": 0.711995979515493, + "objective/train/value_std": 0.02008056640625, + "objective/train/weight_avg": 1.002644658088684, + "objective/train/weighted_lm_loss": 1.9346448183059692, + "objective/train/weights_max": 1.8252228498458862, + "objective/train/weights_min": 0.36886805295944214, + "theoretical_loss": 4.16210066421454, + "tokens_seen": 308019200 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009158241052800514, + "loss": 0.0936, + "theoretical_loss": 4.16210066421454, + "tokens_seen": 308019200 + }, + { + "epoch": 0.09, + "learning_rate": 0.000915743861338469, + "loss": 0.0949, + "theoretical_loss": 4.161689886737255, + "tokens_seen": 308281344 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009156636173968865, + "loss": 0.0969, + "theoretical_loss": 4.161279556121562, + "tokens_seen": 308543488 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009155833734553042, + "loss": 0.0955, + "theoretical_loss": 4.160869671502302, + "tokens_seen": 308805632 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009155031295137217, + "loss": 0.0963, + "theoretical_loss": 4.160460232016725, + "tokens_seen": 309067776 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009154228855721394, + "loss": 0.0969, + "theoretical_loss": 4.16005123680448, + "tokens_seen": 309329920 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009153426416305569, + "loss": 0.0937, + "theoretical_loss": 4.159642685007606, + "tokens_seen": 309592064 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009152623976889744, + "loss": 0.0927, + "theoretical_loss": 4.1592345757705225, + "tokens_seen": 309854208 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009151821537473922, + "loss": 0.0921, + "theoretical_loss": 4.158826908240022, + "tokens_seen": 310116352 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009151019098058097, + "loss": 0.0931, + "theoretical_loss": 4.158419681565265, + "tokens_seen": 310378496 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009150216658642273, + "loss": 0.0949, + "theoretical_loss": 4.1580128948977615, + "tokens_seen": 310640640 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009149414219226449, + "loss": 0.094, + "theoretical_loss": 4.157606547391374, + "tokens_seen": 310902784 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009148611779810625, + "loss": 0.0976, + "theoretical_loss": 4.157200638202301, + "tokens_seen": 311164928 + }, + { + "epoch": 0.09, + "objective/train/advantage_avg": 0.002907957648858428, + "objective/train/docs_used": 120076, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0078868865966797, + "objective/train/original_loss": 2.0078868865966797, + "objective/train/theoretical_loss": 4.156997847713709, + "objective/train/tokens_used": 331756000, + "objective/train/value_avg": -0.010467529296875, + "objective/train/value_loss": 0.0005483797285705805, + "objective/train/value_max": -0.0007014274597167969, + "objective/train/value_min": -0.6416015625, + "objective/train/value_reward_corr": 0.563955012185076, + "objective/train/value_std": 0.0168914794921875, + "objective/train/weight_avg": 1.0031453371047974, + "objective/train/weighted_lm_loss": 2.013608694076538, + "objective/train/weights_max": 1.2697027921676636, + "objective/train/weights_min": 0.37099963426589966, + "theoretical_loss": 4.156997847713709, + "tokens_seen": 311296000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00091478093403948, + "loss": 0.0907, + "theoretical_loss": 4.156795166489074, + "tokens_seen": 311427072 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009147006900978976, + "loss": 0.0952, + "theoretical_loss": 4.156390131412543, + "tokens_seen": 311689216 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009146204461563152, + "loss": 0.0937, + "theoretical_loss": 4.155985532135875, + "tokens_seen": 311951360 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009145402022147327, + "loss": 0.097, + "theoretical_loss": 4.1555813678245395, + "tokens_seen": 312213504 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009144599582731505, + "loss": 0.0941, + "theoretical_loss": 4.155177637646306, + "tokens_seen": 312475648 + }, + { + "epoch": 0.09, + "learning_rate": 0.000914379714331568, + "loss": 0.0916, + "theoretical_loss": 4.154774340771228, + "tokens_seen": 312737792 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009142994703899856, + "loss": 0.0953, + "theoretical_loss": 4.154371476371646, + "tokens_seen": 312999936 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009142192264484032, + "loss": 0.0935, + "theoretical_loss": 4.153969043622169, + "tokens_seen": 313262080 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009141389825068207, + "loss": 0.0967, + "theoretical_loss": 4.15356704169967, + "tokens_seen": 313524224 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009140587385652384, + "loss": 0.0973, + "theoretical_loss": 4.153165469783279, + "tokens_seen": 313786368 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009139784946236559, + "loss": 0.0972, + "theoretical_loss": 4.152764327054376, + "tokens_seen": 314048512 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009138982506820735, + "loss": 0.095, + "theoretical_loss": 4.152363612696579, + "tokens_seen": 314310656 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0016880643088370562, + "objective/train/docs_used": 121169, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8933781385421753, + "objective/train/original_loss": 1.8933782577514648, + "objective/train/theoretical_loss": 4.151963325895737, + "objective/train/tokens_used": 335032800, + "objective/train/value_avg": -0.00879669189453125, + "objective/train/value_loss": 0.0004417987947817892, + "objective/train/value_max": -0.0004973411560058594, + "objective/train/value_min": -0.265380859375, + "objective/train/value_reward_corr": 0.4943290748479078, + "objective/train/value_std": 0.01016998291015625, + "objective/train/weight_avg": 1.0018750429153442, + "objective/train/weighted_lm_loss": 1.8967622518539429, + "objective/train/weights_max": 1.139946699142456, + "objective/train/weights_min": 0.36912843585014343, + "theoretical_loss": 4.151963325895737, + "tokens_seen": 314572800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009138180067404911, + "loss": 0.0935, + "theoretical_loss": 4.151963325895737, + "tokens_seen": 314572800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009137377627989088, + "loss": 0.0948, + "theoretical_loss": 4.151563465839927, + "tokens_seen": 314834944 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009136575188573263, + "loss": 0.0952, + "theoretical_loss": 4.151164031719437, + "tokens_seen": 315097088 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009135772749157439, + "loss": 0.0959, + "theoretical_loss": 4.15076502272677, + "tokens_seen": 315359232 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009134970309741615, + "loss": 0.0937, + "theoretical_loss": 4.150366438056622, + "tokens_seen": 315621376 + }, + { + "epoch": 0.1, + "learning_rate": 0.000913416787032579, + "loss": 0.0926, + "theoretical_loss": 4.149968276905888, + "tokens_seen": 315883520 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009133365430909967, + "loss": 0.099, + "theoretical_loss": 4.149570538473644, + "tokens_seen": 316145664 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009132562991494142, + "loss": 0.0913, + "theoretical_loss": 4.149173221961146, + "tokens_seen": 316407808 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009131760552078318, + "loss": 0.094, + "theoretical_loss": 4.1487763265718165, + "tokens_seen": 316669952 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009130958112662494, + "loss": 0.0944, + "theoretical_loss": 4.148379851511241, + "tokens_seen": 316932096 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009130155673246669, + "loss": 0.0942, + "theoretical_loss": 4.147983795987161, + "tokens_seen": 317194240 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009129353233830846, + "loss": 0.0937, + "theoretical_loss": 4.14758815920946, + "tokens_seen": 317456384 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009128550794415022, + "loss": 0.0916, + "theoretical_loss": 4.147192940390165, + "tokens_seen": 317718528 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.00349609088152647, + "objective/train/docs_used": 122440, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.709967017173767, + "objective/train/original_loss": 1.7099668979644775, + "objective/train/theoretical_loss": 4.146995487469262, + "objective/train/tokens_used": 338309600, + "objective/train/value_avg": -0.00899505615234375, + "objective/train/value_loss": 0.00012839387636631727, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.255615234375, + "objective/train/value_reward_corr": 0.6779735039057588, + "objective/train/value_std": 0.0102081298828125, + "objective/train/weight_avg": 1.0035591125488281, + "objective/train/weighted_lm_loss": 1.7162126302719116, + "objective/train/weights_max": 1.1629977226257324, + "objective/train/weights_min": 0.6108145713806152, + "theoretical_loss": 4.146995487469262, + "tokens_seen": 317849600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009127748354999198, + "loss": 0.0971, + "theoretical_loss": 4.146798138743433, + "tokens_seen": 317980672 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009126945915583374, + "loss": 0.0904, + "theoretical_loss": 4.146403753485544, + "tokens_seen": 318242816 + }, + { + "epoch": 0.1, + "learning_rate": 0.000912614347616755, + "loss": 0.093, + "theoretical_loss": 4.146009783834892, + "tokens_seen": 318504960 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009125341036751725, + "loss": 0.0953, + "theoretical_loss": 4.145616229011987, + "tokens_seen": 318767104 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009124538597335902, + "loss": 0.0957, + "theoretical_loss": 4.145223088239432, + "tokens_seen": 319029248 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009123736157920077, + "loss": 0.0923, + "theoretical_loss": 4.14483036074193, + "tokens_seen": 319291392 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009122933718504252, + "loss": 0.0962, + "theoretical_loss": 4.14443804574627, + "tokens_seen": 319553536 + }, + { + "epoch": 0.1, + "learning_rate": 0.000912213127908843, + "loss": 0.0927, + "theoretical_loss": 4.144046142481317, + "tokens_seen": 319815680 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009121328839672605, + "loss": 0.0968, + "theoretical_loss": 4.143654650178012, + "tokens_seen": 320077824 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009120526400256781, + "loss": 0.097, + "theoretical_loss": 4.143263568069358, + "tokens_seen": 320339968 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009119723960840957, + "loss": 0.0928, + "theoretical_loss": 4.142872895390417, + "tokens_seen": 320602112 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009118921521425133, + "loss": 0.0939, + "theoretical_loss": 4.142482631378303, + "tokens_seen": 320864256 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0006320319953374565, + "objective/train/docs_used": 123582, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8734241724014282, + "objective/train/original_loss": 1.8734240531921387, + "objective/train/theoretical_loss": 4.142092775272169, + "objective/train/tokens_used": 341586400, + "objective/train/value_avg": -0.01175689697265625, + "objective/train/value_loss": 0.00034092742134816945, + "objective/train/value_max": -0.0005526542663574219, + "objective/train/value_min": -0.2489013671875, + "objective/train/value_reward_corr": 0.649642412628804, + "objective/train/value_std": 0.01458740234375, + "objective/train/weight_avg": 1.0007905960083008, + "objective/train/weighted_lm_loss": 1.8751580715179443, + "objective/train/weights_max": 1.2165815830230713, + "objective/train/weights_min": 0.37836602330207825, + "theoretical_loss": 4.142092775272169, + "tokens_seen": 321126400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009118119082009308, + "loss": 0.0918, + "theoretical_loss": 4.142092775272169, + "tokens_seen": 321126400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009117316642593484, + "loss": 0.0898, + "theoretical_loss": 4.141703326313209, + "tokens_seen": 321388544 + }, + { + "epoch": 0.1, + "learning_rate": 0.000911651420317766, + "loss": 0.0933, + "theoretical_loss": 4.141314283744643, + "tokens_seen": 321650688 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009115711763761835, + "loss": 0.093, + "theoretical_loss": 4.140925646811714, + "tokens_seen": 321912832 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009114909324346013, + "loss": 0.0948, + "theoretical_loss": 4.1405374147616785, + "tokens_seen": 322174976 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009114106884930188, + "loss": 0.0929, + "theoretical_loss": 4.140149586843803, + "tokens_seen": 322437120 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009113304445514365, + "loss": 0.094, + "theoretical_loss": 4.139762162309355, + "tokens_seen": 322699264 + }, + { + "epoch": 0.1, + "learning_rate": 0.000911250200609854, + "loss": 0.0934, + "theoretical_loss": 4.139375140411592, + "tokens_seen": 322961408 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009111699566682715, + "loss": 0.0913, + "theoretical_loss": 4.138988520405764, + "tokens_seen": 323223552 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009110897127266892, + "loss": 0.0922, + "theoretical_loss": 4.138602301549097, + "tokens_seen": 323485696 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009110094687851067, + "loss": 0.0922, + "theoretical_loss": 4.138216483100795, + "tokens_seen": 323747840 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009109292248435243, + "loss": 0.0901, + "theoretical_loss": 4.137831064322021, + "tokens_seen": 324009984 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009108489809019419, + "loss": 0.0906, + "theoretical_loss": 4.1374460444759045, + "tokens_seen": 324272128 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0036898814141750336, + "objective/train/docs_used": 124782, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.784644365310669, + "objective/train/original_loss": 1.784644603729248, + "objective/train/theoretical_loss": 4.137253683922854, + "objective/train/tokens_used": 344863200, + "objective/train/value_avg": -0.00811767578125, + "objective/train/value_loss": 0.0003189310082234442, + "objective/train/value_max": -0.0005593299865722656, + "objective/train/value_min": -0.67138671875, + "objective/train/value_reward_corr": 0.6118297905242213, + "objective/train/value_std": 0.0111846923828125, + "objective/train/weight_avg": 1.0038195848464966, + "objective/train/weighted_lm_loss": 1.790905475616455, + "objective/train/weights_max": 1.1770083904266357, + "objective/train/weights_min": 0.23969075083732605, + "theoretical_loss": 4.137253683922854, + "tokens_seen": 324403200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009107687369603596, + "loss": 0.0923, + "theoretical_loss": 4.137061422827525, + "tokens_seen": 324534272 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009106884930187771, + "loss": 0.0939, + "theoretical_loss": 4.136677198643908, + "tokens_seen": 324796416 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009106082490771947, + "loss": 0.0912, + "theoretical_loss": 4.13629337119402, + "tokens_seen": 325058560 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009105280051356123, + "loss": 0.0923, + "theoretical_loss": 4.135909939748757, + "tokens_seen": 325320704 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009104477611940298, + "loss": 0.0918, + "theoretical_loss": 4.135526903580946, + "tokens_seen": 325582848 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009103675172524475, + "loss": 0.0938, + "theoretical_loss": 4.135144261965327, + "tokens_seen": 325844992 + }, + { + "epoch": 0.1, + "learning_rate": 0.000910287273310865, + "loss": 0.0923, + "theoretical_loss": 4.134762014178559, + "tokens_seen": 326107136 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009102070293692827, + "loss": 0.0925, + "theoretical_loss": 4.134380159499204, + "tokens_seen": 326369280 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009101267854277002, + "loss": 0.0931, + "theoretical_loss": 4.1339986972077245, + "tokens_seen": 326631424 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009100465414861177, + "loss": 0.0885, + "theoretical_loss": 4.133617626586475, + "tokens_seen": 326893568 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009099662975445355, + "loss": 0.0916, + "theoretical_loss": 4.133236946919698, + "tokens_seen": 327155712 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909886053602953, + "loss": 0.0972, + "theoretical_loss": 4.132856657493516, + "tokens_seen": 327417856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7246169962973871, + "debugging/entropy-1-grams": 5.13190351820591, + "debugging/length": 472.8, + "debugging/num_segments": 10, + "debugging/raw_token_scores_avg": 0.008663635700941086, + "debugging/raw_token_scores_std": 0.023182114586234093, + "debugging/score": 0.0033104506818385365, + "debugging/score_std": 0.0017105448100298692, + "epoch": 0.1, + "objective/train/advantage_avg": 0.0008121287100948393, + "objective/train/docs_used": 126013, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9080229997634888, + "objective/train/original_loss": 1.9080228805541992, + "objective/train/theoretical_loss": 4.132476757595925, + "objective/train/tokens_used": 348140000, + "objective/train/value_avg": -0.0094757080078125, + "objective/train/value_loss": 0.0002959860139526427, + "objective/train/value_max": -0.0003554821014404297, + "objective/train/value_min": -0.2890625, + "objective/train/value_reward_corr": 0.6876529624313351, + "objective/train/value_std": 0.01248931884765625, + "objective/train/weight_avg": 1.000952959060669, + "objective/train/weighted_lm_loss": 1.9104235172271729, + "objective/train/weights_max": 1.1095370054244995, + "objective/train/weights_min": 0.376959890127182, + "theoretical_loss": 4.132476757595925, + "tokens_seen": 327680000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009098058096613706, + "loss": 0.0954, + "theoretical_loss": 4.132476757595925, + "tokens_seen": 327680000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009097255657197882, + "loss": 0.0948, + "theoretical_loss": 4.132097246516788, + "tokens_seen": 327942144 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009096453217782058, + "loss": 0.0945, + "theoretical_loss": 4.131718123547829, + "tokens_seen": 328204288 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009095650778366233, + "loss": 0.0931, + "theoretical_loss": 4.131339387982628, + "tokens_seen": 328466432 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909484833895041, + "loss": 0.0913, + "theoretical_loss": 4.1309610391166105, + "tokens_seen": 328728576 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009094045899534585, + "loss": 0.0902, + "theoretical_loss": 4.1305830762470475, + "tokens_seen": 328990720 + }, + { + "epoch": 0.1, + "learning_rate": 0.000909324346011876, + "loss": 0.093, + "theoretical_loss": 4.1302054986730425, + "tokens_seen": 329252864 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009092441020702938, + "loss": 0.0922, + "theoretical_loss": 4.129828305695531, + "tokens_seen": 329515008 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009091638581287113, + "loss": 0.0919, + "theoretical_loss": 4.129451496617269, + "tokens_seen": 329777152 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009090836141871289, + "loss": 0.0938, + "theoretical_loss": 4.129075070742831, + "tokens_seen": 330039296 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009090033702455465, + "loss": 0.093, + "theoretical_loss": 4.128699027378604, + "tokens_seen": 330301440 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009089231263039641, + "loss": 0.0926, + "theoretical_loss": 4.128323365832777, + "tokens_seen": 330563584 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009088428823623817, + "loss": 0.0938, + "theoretical_loss": 4.127948085415338, + "tokens_seen": 330825728 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.003476484213024378, + "objective/train/docs_used": 127130, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9096784591674805, + "objective/train/original_loss": 1.90967857837677, + "objective/train/theoretical_loss": 4.127760587914626, + "objective/train/tokens_used": 351416800, + "objective/train/value_avg": -0.009552001953125, + "objective/train/value_loss": 0.00040583781083114445, + "objective/train/value_max": -0.0003006458282470703, + "objective/train/value_min": -0.32568359375, + "objective/train/value_reward_corr": 0.46149172405686373, + "objective/train/value_std": 0.01270294189453125, + "objective/train/weight_avg": 1.0036547183990479, + "objective/train/weighted_lm_loss": 1.9170717000961304, + "objective/train/weights_max": 1.1853569746017456, + "objective/train/weights_min": 0.36846548318862915, + "theoretical_loss": 4.127760587914626, + "tokens_seen": 330956800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009087626384207992, + "loss": 0.0939, + "theoretical_loss": 4.127573185438068, + "tokens_seen": 331087872 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009086823944792168, + "loss": 0.0963, + "theoretical_loss": 4.127198665214536, + "tokens_seen": 331350016 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009086021505376344, + "loss": 0.0901, + "theoretical_loss": 4.126824524060088, + "tokens_seen": 331612160 + }, + { + "epoch": 0.1, + "learning_rate": 0.000908521906596052, + "loss": 0.093, + "theoretical_loss": 4.126450761291847, + "tokens_seen": 331874304 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009084416626544696, + "loss": 0.0952, + "theoretical_loss": 4.126077376228702, + "tokens_seen": 332136448 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009083614187128873, + "loss": 0.0878, + "theoretical_loss": 4.1257043681913075, + "tokens_seen": 332398592 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009082811747713048, + "loss": 0.092, + "theoretical_loss": 4.125331736502073, + "tokens_seen": 332660736 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009082009308297223, + "loss": 0.0937, + "theoretical_loss": 4.124959480485156, + "tokens_seen": 332922880 + }, + { + "epoch": 0.1, + "learning_rate": 0.00090812068688814, + "loss": 0.0899, + "theoretical_loss": 4.124587599466462, + "tokens_seen": 333185024 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009080404429465575, + "loss": 0.0936, + "theoretical_loss": 4.124216092773635, + "tokens_seen": 333447168 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009079601990049751, + "loss": 0.0898, + "theoretical_loss": 4.123844959736049, + "tokens_seen": 333709312 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009078799550633927, + "loss": 0.091, + "theoretical_loss": 4.123474199684807, + "tokens_seen": 333971456 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0011445061536505818, + "objective/train/docs_used": 128373, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 2.0021450519561768, + "objective/train/original_loss": 2.0021448135375977, + "objective/train/theoretical_loss": 4.123103811952736, + "objective/train/tokens_used": 354693600, + "objective/train/value_avg": -0.010498046875, + "objective/train/value_loss": 0.0002171178930439055, + "objective/train/value_max": -0.0004494190216064453, + "objective/train/value_min": -0.269287109375, + "objective/train/value_reward_corr": 0.6008675056499843, + "objective/train/value_std": 0.011932373046875, + "objective/train/weight_avg": 1.0012482404708862, + "objective/train/weighted_lm_loss": 2.003540515899658, + "objective/train/weights_max": 1.1421287059783936, + "objective/train/weights_min": 0.3697309195995331, + "theoretical_loss": 4.123103811952736, + "tokens_seen": 334233600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009077997111218103, + "loss": 0.0955, + "theoretical_loss": 4.123103811952736, + "tokens_seen": 334233600 + }, + { + "epoch": 0.1, + "learning_rate": 0.000907719467180228, + "loss": 0.0893, + "theoretical_loss": 4.122733795874372, + "tokens_seen": 334495744 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009076392232386455, + "loss": 0.0945, + "theoretical_loss": 4.122364150785966, + "tokens_seen": 334757888 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009075589792970631, + "loss": 0.0945, + "theoretical_loss": 4.1219948760254725, + "tokens_seen": 335020032 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009074787353554807, + "loss": 0.0932, + "theoretical_loss": 4.121625970932542, + "tokens_seen": 335282176 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009073984914138983, + "loss": 0.093, + "theoretical_loss": 4.121257434848519, + "tokens_seen": 335544320 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009073182474723158, + "loss": 0.0925, + "theoretical_loss": 4.120889267116435, + "tokens_seen": 335806464 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009072380035307335, + "loss": 0.0924, + "theoretical_loss": 4.1205214670810015, + "tokens_seen": 336068608 + }, + { + "epoch": 0.1, + "learning_rate": 0.000907157759589151, + "loss": 0.0926, + "theoretical_loss": 4.120154034088609, + "tokens_seen": 336330752 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009070775156475685, + "loss": 0.0924, + "theoretical_loss": 4.119786967487314, + "tokens_seen": 336592896 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009069972717059863, + "loss": 0.0936, + "theoretical_loss": 4.11942026662684, + "tokens_seen": 336855040 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009069170277644038, + "loss": 0.0898, + "theoretical_loss": 4.11905393085857, + "tokens_seen": 337117184 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009068367838228214, + "loss": 0.093, + "theoretical_loss": 4.118687959535539, + "tokens_seen": 337379328 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0014116659294813871, + "objective/train/docs_used": 129675, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7438528537750244, + "objective/train/original_loss": 1.7438528537750244, + "objective/train/theoretical_loss": 4.118505110339262, + "objective/train/tokens_used": 357970400, + "objective/train/value_avg": -0.0096435546875, + "objective/train/value_loss": 0.00027853285428136587, + "objective/train/value_max": -0.00034737586975097656, + "objective/train/value_min": -0.552734375, + "objective/train/value_reward_corr": 0.6218103415544912, + "objective/train/value_std": 0.0139312744140625, + "objective/train/weight_avg": 1.001541256904602, + "objective/train/weighted_lm_loss": 1.745504379272461, + "objective/train/weights_max": 1.441696286201477, + "objective/train/weights_min": 0.36845633387565613, + "theoretical_loss": 4.118505110339262, + "tokens_seen": 337510400 + }, + { + "epoch": 0.1, + "learning_rate": 0.000906756539881239, + "loss": 0.091, + "theoretical_loss": 4.118322352012429, + "tokens_seen": 337641472 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009066762959396566, + "loss": 0.0905, + "theoretical_loss": 4.117957107645569, + "tokens_seen": 337903616 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009065960519980741, + "loss": 0.0931, + "theoretical_loss": 4.1175922257929205, + "tokens_seen": 338165760 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009065158080564918, + "loss": 0.0947, + "theoretical_loss": 4.117227705814078, + "tokens_seen": 338427904 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009064355641149093, + "loss": 0.0977, + "theoretical_loss": 4.116863547070264, + "tokens_seen": 338690048 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009063553201733269, + "loss": 0.0932, + "theoretical_loss": 4.116499748924319, + "tokens_seen": 338952192 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009062750762317446, + "loss": 0.0935, + "theoretical_loss": 4.116136310740702, + "tokens_seen": 339214336 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009061948322901621, + "loss": 0.0875, + "theoretical_loss": 4.115773231885479, + "tokens_seen": 339476480 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009061145883485798, + "loss": 0.0908, + "theoretical_loss": 4.115410511726323, + "tokens_seen": 339738624 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009060343444069973, + "loss": 0.0922, + "theoretical_loss": 4.115048149632507, + "tokens_seen": 340000768 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009059541004654149, + "loss": 0.0936, + "theoretical_loss": 4.114686144974897, + "tokens_seen": 340262912 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009058738565238325, + "loss": 0.0925, + "theoretical_loss": 4.114324497125947, + "tokens_seen": 340525056 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.0013680076226592064, + "objective/train/docs_used": 130755, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8868423700332642, + "objective/train/original_loss": 1.8868422508239746, + "objective/train/theoretical_loss": 4.113963205459697, + "objective/train/tokens_used": 361247200, + "objective/train/value_avg": -0.007740020751953125, + "objective/train/value_loss": 0.00031318547553382814, + "objective/train/value_max": -0.0003654956817626953, + "objective/train/value_min": -0.4619140625, + "objective/train/value_reward_corr": 0.5194909400023118, + "objective/train/value_std": 0.0107574462890625, + "objective/train/weight_avg": 1.0015056133270264, + "objective/train/weighted_lm_loss": 1.8895421028137207, + "objective/train/weights_max": 1.373528242111206, + "objective/train/weights_min": 0.38126373291015625, + "theoretical_loss": 4.113963205459697, + "tokens_seen": 340787200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00090579361258225, + "loss": 0.0913, + "theoretical_loss": 4.113963205459697, + "tokens_seen": 340787200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009057133686406676, + "loss": 0.0915, + "theoretical_loss": 4.113602269351765, + "tokens_seen": 341049344 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009056331246990852, + "loss": 0.0906, + "theoretical_loss": 4.113241688179341, + "tokens_seen": 341311488 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009055528807575029, + "loss": 0.0934, + "theoretical_loss": 4.1128814613211855, + "tokens_seen": 341573632 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009054726368159204, + "loss": 0.0902, + "theoretical_loss": 4.1125215881576205, + "tokens_seen": 341835776 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009053923928743381, + "loss": 0.0901, + "theoretical_loss": 4.112162068070525, + "tokens_seen": 342097920 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009053121489327556, + "loss": 0.0916, + "theoretical_loss": 4.111802900443333, + "tokens_seen": 342360064 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009052319049911731, + "loss": 0.0895, + "theoretical_loss": 4.111444084661026, + "tokens_seen": 342622208 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009051516610495908, + "loss": 0.0891, + "theoretical_loss": 4.111085620110127, + "tokens_seen": 342884352 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009050714171080083, + "loss": 0.0918, + "theoretical_loss": 4.110727506178697, + "tokens_seen": 343146496 + }, + { + "epoch": 0.1, + "learning_rate": 0.000904991173166426, + "loss": 0.0935, + "theoretical_loss": 4.110369742256329, + "tokens_seen": 343408640 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009049109292248435, + "loss": 0.0902, + "theoretical_loss": 4.110012327734145, + "tokens_seen": 343670784 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009048306852832611, + "loss": 0.0916, + "theoretical_loss": 4.1096552620047895, + "tokens_seen": 343932928 + }, + { + "epoch": 0.1, + "objective/train/advantage_avg": 0.00047141360118985176, + "objective/train/docs_used": 131936, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.86632239818573, + "objective/train/original_loss": 1.8663225173950195, + "objective/train/theoretical_loss": 4.109476859748038, + "objective/train/tokens_used": 364524000, + "objective/train/value_avg": -0.00713348388671875, + "objective/train/value_loss": 0.00014549301704391837, + "objective/train/value_max": -0.0002472400665283203, + "objective/train/value_min": -0.466064453125, + "objective/train/value_reward_corr": 0.7043933872865291, + "objective/train/value_std": 0.01116180419921875, + "objective/train/weight_avg": 1.0005429983139038, + "objective/train/weighted_lm_loss": 1.8668056726455688, + "objective/train/weights_max": 1.1419428586959839, + "objective/train/weights_min": 0.7390621304512024, + "theoretical_loss": 4.109476859748038, + "tokens_seen": 344064000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009047504413416788, + "loss": 0.0924, + "theoretical_loss": 4.109298544462423, + "tokens_seen": 344195072 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009046701974000963, + "loss": 0.087, + "theoretical_loss": 4.108942174502721, + "tokens_seen": 344457216 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009045899534585139, + "loss": 0.0912, + "theoretical_loss": 4.108586151522863, + "tokens_seen": 344719360 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009045097095169315, + "loss": 0.0936, + "theoretical_loss": 4.1082304749215375, + "tokens_seen": 344981504 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009044294655753491, + "loss": 0.0912, + "theoretical_loss": 4.107875144098925, + "tokens_seen": 345243648 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009043492216337666, + "loss": 0.0954, + "theoretical_loss": 4.107520158456703, + "tokens_seen": 345505792 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009042689776921843, + "loss": 0.0906, + "theoretical_loss": 4.107165517398034, + "tokens_seen": 345767936 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009041887337506018, + "loss": 0.093, + "theoretical_loss": 4.106811220327568, + "tokens_seen": 346030080 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009041084898090193, + "loss": 0.0911, + "theoretical_loss": 4.10645726665143, + "tokens_seen": 346292224 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009040282458674371, + "loss": 0.0936, + "theoretical_loss": 4.10610365577722, + "tokens_seen": 346554368 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009039480019258546, + "loss": 0.0919, + "theoretical_loss": 4.105750387114009, + "tokens_seen": 346816512 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009038677579842723, + "loss": 0.0909, + "theoretical_loss": 4.105397460072329, + "tokens_seen": 347078656 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0021935408003628254, + "objective/train/docs_used": 133174, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.799157738685608, + "objective/train/original_loss": 1.7991578578948975, + "objective/train/theoretical_loss": 4.105044874064174, + "objective/train/tokens_used": 367800800, + "objective/train/value_avg": -0.00794219970703125, + "objective/train/value_loss": 0.00020322235650382936, + "objective/train/value_max": -0.0003459453582763672, + "objective/train/value_min": -0.2293701171875, + "objective/train/value_reward_corr": 0.5725621260668602, + "objective/train/value_std": 0.0089569091796875, + "objective/train/weight_avg": 1.0022897720336914, + "objective/train/weighted_lm_loss": 1.801876425743103, + "objective/train/weights_max": 1.1363343000411987, + "objective/train/weights_min": 0.3731314241886139, + "theoretical_loss": 4.105044874064174, + "tokens_seen": 347340800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009037875140426898, + "loss": 0.0874, + "theoretical_loss": 4.105044874064174, + "tokens_seen": 347340800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009037072701011074, + "loss": 0.0912, + "theoretical_loss": 4.104692628502993, + "tokens_seen": 347602944 + }, + { + "epoch": 0.11, + "learning_rate": 0.000903627026159525, + "loss": 0.0899, + "theoretical_loss": 4.104340722803683, + "tokens_seen": 347865088 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009035467822179425, + "loss": 0.0907, + "theoretical_loss": 4.103989156382589, + "tokens_seen": 348127232 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009034665382763601, + "loss": 0.0876, + "theoretical_loss": 4.103637928657495, + "tokens_seen": 348389376 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009033862943347777, + "loss": 0.0955, + "theoretical_loss": 4.103287039047622, + "tokens_seen": 348651520 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009033060503931954, + "loss": 0.0904, + "theoretical_loss": 4.102936486973624, + "tokens_seen": 348913664 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009032258064516129, + "loss": 0.09, + "theoretical_loss": 4.102586271857579, + "tokens_seen": 349175808 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009031455625100306, + "loss": 0.0886, + "theoretical_loss": 4.102236393122989, + "tokens_seen": 349437952 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009030653185684481, + "loss": 0.092, + "theoretical_loss": 4.101886850194775, + "tokens_seen": 349700096 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009029850746268657, + "loss": 0.0886, + "theoretical_loss": 4.10153764249927, + "tokens_seen": 349962240 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009029048306852833, + "loss": 0.0889, + "theoretical_loss": 4.1011887694642155, + "tokens_seen": 350224384 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009028245867437008, + "loss": 0.09, + "theoretical_loss": 4.100840230518759, + "tokens_seen": 350486528 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": -0.0007634600042365491, + "objective/train/docs_used": 134382, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8593584299087524, + "objective/train/original_loss": 1.859358549118042, + "objective/train/theoretical_loss": 4.10066608615162, + "objective/train/tokens_used": 371077600, + "objective/train/value_avg": -0.00992584228515625, + "objective/train/value_loss": 0.00040058817830868065, + "objective/train/value_max": -0.000415802001953125, + "objective/train/value_min": -0.1810302734375, + "objective/train/value_reward_corr": 0.653844216492223, + "objective/train/value_std": 0.01154327392578125, + "objective/train/weight_avg": 0.9994103908538818, + "objective/train/weighted_lm_loss": 1.8588926792144775, + "objective/train/weights_max": 1.1080909967422485, + "objective/train/weights_min": 0.22362525761127472, + "theoretical_loss": 4.10066608615162, + "tokens_seen": 350617600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009027443428021184, + "loss": 0.0923, + "theoretical_loss": 4.100492025093445, + "tokens_seen": 350748672 + }, + { + "epoch": 0.11, + "learning_rate": 0.000902664098860536, + "loss": 0.0924, + "theoretical_loss": 4.100144152620215, + "tokens_seen": 351010816 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009025838549189537, + "loss": 0.0908, + "theoretical_loss": 4.099796612532403, + "tokens_seen": 351272960 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009025036109773713, + "loss": 0.09, + "theoretical_loss": 4.0994494042647265, + "tokens_seen": 351535104 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009024233670357889, + "loss": 0.0908, + "theoretical_loss": 4.099102527253285, + "tokens_seen": 351797248 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009023431230942064, + "loss": 0.0926, + "theoretical_loss": 4.098755980935557, + "tokens_seen": 352059392 + }, + { + "epoch": 0.11, + "learning_rate": 0.000902262879152624, + "loss": 0.0895, + "theoretical_loss": 4.0984097647503965, + "tokens_seen": 352321536 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009021826352110416, + "loss": 0.0906, + "theoretical_loss": 4.09806387813802, + "tokens_seen": 352583680 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009021023912694591, + "loss": 0.0878, + "theoretical_loss": 4.0977183205400145, + "tokens_seen": 352845824 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009020221473278768, + "loss": 0.0901, + "theoretical_loss": 4.097373091399324, + "tokens_seen": 353107968 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009019419033862943, + "loss": 0.0898, + "theoretical_loss": 4.097028190160249, + "tokens_seen": 353370112 + }, + { + "epoch": 0.11, + "learning_rate": 0.000901861659444712, + "loss": 0.0912, + "theoretical_loss": 4.096683616268442, + "tokens_seen": 353632256 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0033219335600733757, + "objective/train/docs_used": 135614, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5801607370376587, + "objective/train/original_loss": 1.5801606178283691, + "objective/train/theoretical_loss": 4.096339369170902, + "objective/train/tokens_used": 374354400, + "objective/train/value_avg": -0.01206207275390625, + "objective/train/value_loss": 0.00040383703890256584, + "objective/train/value_max": -0.0004992485046386719, + "objective/train/value_min": -0.5283203125, + "objective/train/value_reward_corr": 0.6677181799151066, + "objective/train/value_std": 0.0159149169921875, + "objective/train/weight_avg": 1.003501296043396, + "objective/train/weighted_lm_loss": 1.5844190120697021, + "objective/train/weights_max": 1.4516695737838745, + "objective/train/weights_min": 0.37713250517845154, + "theoretical_loss": 4.096339369170902, + "tokens_seen": 353894400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009017814155031296, + "loss": 0.0904, + "theoretical_loss": 4.096339369170902, + "tokens_seen": 353894400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009017011715615471, + "loss": 0.0911, + "theoretical_loss": 4.095995448315972, + "tokens_seen": 354156544 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009016209276199647, + "loss": 0.0903, + "theoretical_loss": 4.095651853153331, + "tokens_seen": 354418688 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009015406836783823, + "loss": 0.0916, + "theoretical_loss": 4.095308583133997, + "tokens_seen": 354680832 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009014604397367999, + "loss": 0.0917, + "theoretical_loss": 4.094965637710314, + "tokens_seen": 354942976 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009013801957952175, + "loss": 0.0907, + "theoretical_loss": 4.094623016335954, + "tokens_seen": 355205120 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009012999518536351, + "loss": 0.0904, + "theoretical_loss": 4.094280718465911, + "tokens_seen": 355467264 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009012197079120526, + "loss": 0.0904, + "theoretical_loss": 4.093938743556496, + "tokens_seen": 355729408 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009011394639704702, + "loss": 0.0891, + "theoretical_loss": 4.093597091065333, + "tokens_seen": 355991552 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009010592200288879, + "loss": 0.0892, + "theoretical_loss": 4.093255760451357, + "tokens_seen": 356253696 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009009789760873054, + "loss": 0.0887, + "theoretical_loss": 4.092914751174808, + "tokens_seen": 356515840 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009008987321457231, + "loss": 0.0903, + "theoretical_loss": 4.092574062697225, + "tokens_seen": 356777984 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009008184882041406, + "loss": 0.0885, + "theoretical_loss": 4.092233694481447, + "tokens_seen": 357040128 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0004918540944345295, + "objective/train/docs_used": 136331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9003441333770752, + "objective/train/original_loss": 1.9003441333770752, + "objective/train/theoretical_loss": 4.092063630304224, + "objective/train/tokens_used": 377631200, + "objective/train/value_avg": -0.007537841796875, + "objective/train/value_loss": 0.00020429695723578334, + "objective/train/value_max": -0.0003597736358642578, + "objective/train/value_min": -0.2430419921875, + "objective/train/value_reward_corr": 0.5656587517444805, + "objective/train/value_std": 0.00870513916015625, + "objective/train/weight_avg": 1.0005881786346436, + "objective/train/weighted_lm_loss": 1.9004318714141846, + "objective/train/weights_max": 1.1491543054580688, + "objective/train/weights_min": 0.3702276945114136, + "theoretical_loss": 4.092063630304224, + "tokens_seen": 357171200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009007382442625582, + "loss": 0.0933, + "theoretical_loss": 4.091893645991604, + "tokens_seen": 357302272 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009006580003209758, + "loss": 0.0918, + "theoretical_loss": 4.091553916693115, + "tokens_seen": 357564416 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009005777563793933, + "loss": 0.0863, + "theoretical_loss": 4.091214506052687, + "tokens_seen": 357826560 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009004975124378109, + "loss": 0.0846, + "theoretical_loss": 4.090875413538302, + "tokens_seen": 358088704 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009004172684962285, + "loss": 0.0925, + "theoretical_loss": 4.090536638619224, + "tokens_seen": 358350848 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009003370245546462, + "loss": 0.0879, + "theoretical_loss": 4.090198180765989, + "tokens_seen": 358612992 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009002567806130637, + "loss": 0.0895, + "theoretical_loss": 4.0898600394504, + "tokens_seen": 358875136 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009001765366714814, + "loss": 0.0891, + "theoretical_loss": 4.089522214145525, + "tokens_seen": 359137280 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009000962927298989, + "loss": 0.0917, + "theoretical_loss": 4.089184704325695, + "tokens_seen": 359399424 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009000160487883166, + "loss": 0.0891, + "theoretical_loss": 4.088847509466497, + "tokens_seen": 359661568 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008999358048467341, + "loss": 0.0889, + "theoretical_loss": 4.088510629044771, + "tokens_seen": 359923712 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008998555609051516, + "loss": 0.0889, + "theoretical_loss": 4.088174062538605, + "tokens_seen": 360185856 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0018875771202147007, + "objective/train/docs_used": 137426, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.9378498792648315, + "objective/train/original_loss": 1.937849998474121, + "objective/train/theoretical_loss": 4.087837809427334, + "objective/train/tokens_used": 380908000, + "objective/train/value_avg": -0.01007843017578125, + "objective/train/value_loss": 0.00019329431233927608, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.277099609375, + "objective/train/value_reward_corr": 0.6944034668612225, + "objective/train/value_std": 0.01515960693359375, + "objective/train/weight_avg": 1.0019793510437012, + "objective/train/weighted_lm_loss": 1.9406569004058838, + "objective/train/weights_max": 1.115104079246521, + "objective/train/weights_min": 0.36884480714797974, + "theoretical_loss": 4.087837809427334, + "tokens_seen": 360448000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008997753169635693, + "loss": 0.0936, + "theoretical_loss": 4.087837809427334, + "tokens_seen": 360448000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008996950730219868, + "loss": 0.0886, + "theoretical_loss": 4.087501869191536, + "tokens_seen": 360710144 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008996148290804045, + "loss": 0.0882, + "theoretical_loss": 4.087166241313023, + "tokens_seen": 360972288 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008995345851388221, + "loss": 0.093, + "theoretical_loss": 4.086830925274842, + "tokens_seen": 361234432 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008994543411972397, + "loss": 0.0892, + "theoretical_loss": 4.08649592056127, + "tokens_seen": 361496576 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008993740972556572, + "loss": 0.093, + "theoretical_loss": 4.086161226657811, + "tokens_seen": 361758720 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008992938533140748, + "loss": 0.0863, + "theoretical_loss": 4.085826843051191, + "tokens_seen": 362020864 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008992136093724924, + "loss": 0.092, + "theoretical_loss": 4.0854927692293534, + "tokens_seen": 362283008 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008991333654309099, + "loss": 0.0896, + "theoretical_loss": 4.085159004681458, + "tokens_seen": 362545152 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008990531214893276, + "loss": 0.0908, + "theoretical_loss": 4.084825548897873, + "tokens_seen": 362807296 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008989728775477451, + "loss": 0.0933, + "theoretical_loss": 4.084492401370177, + "tokens_seen": 363069440 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008988926336061629, + "loss": 0.0915, + "theoretical_loss": 4.08415956159115, + "tokens_seen": 363331584 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008988123896645804, + "loss": 0.0896, + "theoretical_loss": 4.083827029054773, + "tokens_seen": 363593728 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.000944749335758388, + "objective/train/docs_used": 138686, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8428866863250732, + "objective/train/original_loss": 1.8428866863250732, + "objective/train/theoretical_loss": 4.083660877844774, + "objective/train/tokens_used": 384184800, + "objective/train/value_avg": -0.007354736328125, + "objective/train/value_loss": 0.0001762463798513636, + "objective/train/value_max": -0.00036406517028808594, + "objective/train/value_min": -0.16845703125, + "objective/train/value_reward_corr": 0.5633609780519697, + "objective/train/value_std": 0.008056640625, + "objective/train/weight_avg": 1.0010242462158203, + "objective/train/weighted_lm_loss": 1.8447622060775757, + "objective/train/weights_max": 1.0969452857971191, + "objective/train/weights_min": 0.3732965886592865, + "theoretical_loss": 4.083660877844774, + "tokens_seen": 363724800 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008987321457229979, + "loss": 0.0914, + "theoretical_loss": 4.0834948032562215, + "tokens_seen": 363855872 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008986519017814156, + "loss": 0.0914, + "theoretical_loss": 4.083162883691864, + "tokens_seen": 364118016 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008985716578398331, + "loss": 0.0915, + "theoretical_loss": 4.082831269859261, + "tokens_seen": 364380160 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008984914138982507, + "loss": 0.0904, + "theoretical_loss": 4.0824999612571515, + "tokens_seen": 364642304 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008984111699566683, + "loss": 0.0894, + "theoretical_loss": 4.082168957385462, + "tokens_seen": 364904448 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008983309260150859, + "loss": 0.092, + "theoretical_loss": 4.081838257745293, + "tokens_seen": 365166592 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008982506820735034, + "loss": 0.0877, + "theoretical_loss": 4.081507861838922, + "tokens_seen": 365428736 + }, + { + "epoch": 0.11, + "learning_rate": 0.000898170438131921, + "loss": 0.0904, + "theoretical_loss": 4.081177769169795, + "tokens_seen": 365690880 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008980901941903387, + "loss": 0.0874, + "theoretical_loss": 4.080847979242526, + "tokens_seen": 365953024 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008980099502487562, + "loss": 0.0904, + "theoretical_loss": 4.0805184915628905, + "tokens_seen": 366215168 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008979297063071739, + "loss": 0.0896, + "theoretical_loss": 4.080189305637827, + "tokens_seen": 366477312 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008978494623655914, + "loss": 0.0898, + "theoretical_loss": 4.079860420975429, + "tokens_seen": 366739456 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0033440319821238518, + "objective/train/docs_used": 139849, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8659143447875977, + "objective/train/original_loss": 1.8659144639968872, + "objective/train/theoretical_loss": 4.07953183708494, + "objective/train/tokens_used": 387461600, + "objective/train/value_avg": -0.00817108154296875, + "objective/train/value_loss": 0.0002236411819467321, + "objective/train/value_max": -0.00025916099548339844, + "objective/train/value_min": -0.228515625, + "objective/train/value_reward_corr": 0.38717246449564546, + "objective/train/value_std": 0.00867462158203125, + "objective/train/weight_avg": 1.0034469366073608, + "objective/train/weighted_lm_loss": 1.8702045679092407, + "objective/train/weights_max": 1.2567331790924072, + "objective/train/weights_min": 0.3701472282409668, + "theoretical_loss": 4.07953183708494, + "tokens_seen": 367001600 + }, + { + "epoch": 0.11, + "learning_rate": 0.000897769218424009, + "loss": 0.0949, + "theoretical_loss": 4.07953183708494, + "tokens_seen": 367001600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008976889744824266, + "loss": 0.0914, + "theoretical_loss": 4.079203553476759, + "tokens_seen": 367263744 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008976087305408441, + "loss": 0.0919, + "theoretical_loss": 4.078875569662424, + "tokens_seen": 367525888 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008975284865992618, + "loss": 0.0886, + "theoretical_loss": 4.07854788515462, + "tokens_seen": 367788032 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008974482426576793, + "loss": 0.0896, + "theoretical_loss": 4.078220499467168, + "tokens_seen": 368050176 + }, + { + "epoch": 0.11, + "learning_rate": 0.000897367998716097, + "loss": 0.09, + "theoretical_loss": 4.077893412115025, + "tokens_seen": 368312320 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008972877547745146, + "loss": 0.0878, + "theoretical_loss": 4.077566622614281, + "tokens_seen": 368574464 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008972075108329322, + "loss": 0.0894, + "theoretical_loss": 4.077240130482153, + "tokens_seen": 368836608 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008971272668913497, + "loss": 0.0914, + "theoretical_loss": 4.076913935236982, + "tokens_seen": 369098752 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008970470229497674, + "loss": 0.0894, + "theoretical_loss": 4.076588036398235, + "tokens_seen": 369360896 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008969667790081849, + "loss": 0.0891, + "theoretical_loss": 4.076262433486491, + "tokens_seen": 369623040 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008968865350666024, + "loss": 0.0913, + "theoretical_loss": 4.075937126023448, + "tokens_seen": 369885184 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008968062911250201, + "loss": 0.0888, + "theoretical_loss": 4.075612113531915, + "tokens_seen": 370147328 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0013215008657425642, + "objective/train/docs_used": 141078, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.754763126373291, + "objective/train/original_loss": 1.754763126373291, + "objective/train/theoretical_loss": 4.0754497177516456, + "objective/train/tokens_used": 390738400, + "objective/train/value_avg": -0.0116424560546875, + "objective/train/value_loss": 0.00028973660664632916, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.5981599021798076, + "objective/train/value_std": 0.013458251953125, + "objective/train/weight_avg": 1.001459002494812, + "objective/train/weighted_lm_loss": 1.7561613321304321, + "objective/train/weights_max": 1.1360948085784912, + "objective/train/weights_min": 0.38701966404914856, + "theoretical_loss": 4.0754497177516456, + "tokens_seen": 370278400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008967260471834376, + "loss": 0.0888, + "theoretical_loss": 4.075287395535807, + "tokens_seen": 370409472 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008966458032418553, + "loss": 0.0897, + "theoretical_loss": 4.074962971560145, + "tokens_seen": 370671616 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008965655593002729, + "loss": 0.0885, + "theoretical_loss": 4.074638841131052, + "tokens_seen": 370933760 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008964853153586905, + "loss": 0.0905, + "theoretical_loss": 4.074315003775746, + "tokens_seen": 371195904 + }, + { + "epoch": 0.11, + "learning_rate": 0.000896405071417108, + "loss": 0.0917, + "theoretical_loss": 4.073991459022544, + "tokens_seen": 371458048 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008963248274755256, + "loss": 0.0871, + "theoretical_loss": 4.073668206400851, + "tokens_seen": 371720192 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008962445835339432, + "loss": 0.0954, + "theoretical_loss": 4.0733452454411605, + "tokens_seen": 371982336 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008961643395923608, + "loss": 0.0904, + "theoretical_loss": 4.0730225756750515, + "tokens_seen": 372244480 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008960840956507784, + "loss": 0.0879, + "theoretical_loss": 4.072700196635185, + "tokens_seen": 372506624 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008960038517091959, + "loss": 0.0906, + "theoretical_loss": 4.072378107855299, + "tokens_seen": 372768768 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008959236077676137, + "loss": 0.0866, + "theoretical_loss": 4.0720563088702075, + "tokens_seen": 373030912 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008958433638260312, + "loss": 0.089, + "theoretical_loss": 4.071734799215796, + "tokens_seen": 373293056 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0027087300550192595, + "objective/train/docs_used": 142333, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8659402132034302, + "objective/train/original_loss": 1.8659402132034302, + "objective/train/theoretical_loss": 4.071413578429017, + "objective/train/tokens_used": 394015200, + "objective/train/value_avg": -0.01194000244140625, + "objective/train/value_loss": 0.0003572187852114439, + "objective/train/value_max": -0.0005660057067871094, + "objective/train/value_min": -0.54833984375, + "objective/train/value_reward_corr": 0.6665229811822021, + "objective/train/value_std": 0.0186767578125, + "objective/train/weight_avg": 1.002875804901123, + "objective/train/weighted_lm_loss": 1.8703423738479614, + "objective/train/weights_max": 1.7303780317306519, + "objective/train/weights_min": 0.39453253149986267, + "theoretical_loss": 4.071413578429017, + "tokens_seen": 373555200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008957631198844487, + "loss": 0.088, + "theoretical_loss": 4.071413578429017, + "tokens_seen": 373555200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008956828759428664, + "loss": 0.0903, + "theoretical_loss": 4.071092646047892, + "tokens_seen": 373817344 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008956026320012839, + "loss": 0.0885, + "theoretical_loss": 4.0707720016115, + "tokens_seen": 374079488 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008955223880597015, + "loss": 0.0874, + "theoretical_loss": 4.0704516446599825, + "tokens_seen": 374341632 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008954421441181191, + "loss": 0.092, + "theoretical_loss": 4.070131574734536, + "tokens_seen": 374603776 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008953619001765367, + "loss": 0.0897, + "theoretical_loss": 4.069811791377409, + "tokens_seen": 374865920 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008952816562349542, + "loss": 0.0887, + "theoretical_loss": 4.0694922941319, + "tokens_seen": 375128064 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008952014122933718, + "loss": 0.0914, + "theoretical_loss": 4.069173082542351, + "tokens_seen": 375390208 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008951211683517895, + "loss": 0.0939, + "theoretical_loss": 4.068854156154154, + "tokens_seen": 375652352 + }, + { + "epoch": 0.11, + "learning_rate": 0.000895040924410207, + "loss": 0.0905, + "theoretical_loss": 4.068535514513734, + "tokens_seen": 375914496 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008949606804686247, + "loss": 0.0918, + "theoretical_loss": 4.068217157168556, + "tokens_seen": 376176640 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008948804365270422, + "loss": 0.0897, + "theoretical_loss": 4.06789908366712, + "tokens_seen": 376438784 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008948001925854599, + "loss": 0.0918, + "theoretical_loss": 4.067581293558955, + "tokens_seen": 376700928 + }, + { + "epoch": 0.11, + "objective/train/advantage_avg": 0.0015242223162204027, + "objective/train/docs_used": 143470, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8477563858032227, + "objective/train/original_loss": 1.8477565050125122, + "objective/train/theoretical_loss": 4.067422504636857, + "objective/train/tokens_used": 397292000, + "objective/train/value_avg": -0.00897216796875, + "objective/train/value_loss": 0.00023800358758307993, + "objective/train/value_max": -0.00040459632873535156, + "objective/train/value_min": -0.291748046875, + "objective/train/value_reward_corr": 0.5821468077761229, + "objective/train/value_std": 0.0121917724609375, + "objective/train/weight_avg": 1.0016366243362427, + "objective/train/weighted_lm_loss": 1.8498717546463013, + "objective/train/weights_max": 1.1463937759399414, + "objective/train/weights_min": 0.3839375674724579, + "theoretical_loss": 4.067422504636857, + "tokens_seen": 376832000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008947199486438774, + "loss": 0.0918, + "theoretical_loss": 4.0672637863946175, + "tokens_seen": 376963072 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008946397047022949, + "loss": 0.0885, + "theoretical_loss": 4.0669465617256915, + "tokens_seen": 377225216 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008945594607607126, + "loss": 0.0878, + "theoretical_loss": 4.06662961910478, + "tokens_seen": 377487360 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008944792168191301, + "loss": 0.0873, + "theoretical_loss": 4.066312958085503, + "tokens_seen": 377749504 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008943989728775478, + "loss": 0.088, + "theoretical_loss": 4.065996578222502, + "tokens_seen": 378011648 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008943187289359654, + "loss": 0.0904, + "theoretical_loss": 4.065680479071426, + "tokens_seen": 378273792 + }, + { + "epoch": 0.11, + "learning_rate": 0.000894238484994383, + "loss": 0.0875, + "theoretical_loss": 4.0653646601889335, + "tokens_seen": 378535936 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008941582410528005, + "loss": 0.0891, + "theoretical_loss": 4.065049121132693, + "tokens_seen": 378798080 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008940779971112181, + "loss": 0.0881, + "theoretical_loss": 4.0647338614613755, + "tokens_seen": 379060224 + }, + { + "epoch": 0.11, + "learning_rate": 0.0008939977531696357, + "loss": 0.0862, + "theoretical_loss": 4.0644188807346495, + "tokens_seen": 379322368 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008939175092280532, + "loss": 0.0906, + "theoretical_loss": 4.064104178513186, + "tokens_seen": 379584512 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008938372652864709, + "loss": 0.089, + "theoretical_loss": 4.0637897543586465, + "tokens_seen": 379846656 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.002633684780448675, + "objective/train/docs_used": 144698, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.705493688583374, + "objective/train/original_loss": 1.705493688583374, + "objective/train/theoretical_loss": 4.063475607833687, + "objective/train/tokens_used": 400568800, + "objective/train/value_avg": -0.00835418701171875, + "objective/train/value_loss": 0.0002564003807492554, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.32568359375, + "objective/train/value_reward_corr": 0.6303237316396728, + "objective/train/value_std": 0.01141357421875, + "objective/train/weight_avg": 1.002751350402832, + "objective/train/weighted_lm_loss": 1.7094148397445679, + "objective/train/weights_max": 1.104806661605835, + "objective/train/weights_min": 0.3750951290130615, + "theoretical_loss": 4.063475607833687, + "tokens_seen": 380108800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008937570213448884, + "loss": 0.087, + "theoretical_loss": 4.063475607833687, + "tokens_seen": 380108800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008936767774033062, + "loss": 0.0893, + "theoretical_loss": 4.063161738501951, + "tokens_seen": 380370944 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008935965334617237, + "loss": 0.0892, + "theoretical_loss": 4.0628481459280685, + "tokens_seen": 380633088 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008935162895201413, + "loss": 0.0872, + "theoretical_loss": 4.062534829677653, + "tokens_seen": 380895232 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008934360455785589, + "loss": 0.0894, + "theoretical_loss": 4.062221789317297, + "tokens_seen": 381157376 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008933558016369764, + "loss": 0.0874, + "theoretical_loss": 4.061909024414572, + "tokens_seen": 381419520 + }, + { + "epoch": 0.12, + "learning_rate": 0.000893275557695394, + "loss": 0.088, + "theoretical_loss": 4.061596534538021, + "tokens_seen": 381681664 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008931953137538116, + "loss": 0.0913, + "theoretical_loss": 4.061284319257162, + "tokens_seen": 381943808 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008931150698122292, + "loss": 0.0907, + "theoretical_loss": 4.060972378142479, + "tokens_seen": 382205952 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008930348258706467, + "loss": 0.0902, + "theoretical_loss": 4.060660710765423, + "tokens_seen": 382468096 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008929545819290645, + "loss": 0.0914, + "theoretical_loss": 4.060349316698408, + "tokens_seen": 382730240 + }, + { + "epoch": 0.12, + "learning_rate": 0.000892874337987482, + "loss": 0.0879, + "theoretical_loss": 4.060038195514808, + "tokens_seen": 382992384 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008927940940458995, + "loss": 0.0868, + "theoretical_loss": 4.059727346788955, + "tokens_seen": 383254528 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0013565245317295194, + "objective/train/docs_used": 145827, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7567694187164307, + "objective/train/original_loss": 1.7567695379257202, + "objective/train/theoretical_loss": 4.059572024464923, + "objective/train/tokens_used": 403845600, + "objective/train/value_avg": -0.0077972412109375, + "objective/train/value_loss": 0.00035242707235738635, + "objective/train/value_max": -0.00033283233642578125, + "objective/train/value_min": -0.4189453125, + "objective/train/value_reward_corr": 0.5299108861611858, + "objective/train/value_std": 0.01129150390625, + "objective/train/weight_avg": 1.0015063285827637, + "objective/train/weighted_lm_loss": 1.7594711780548096, + "objective/train/weights_max": 1.2279499769210815, + "objective/train/weights_min": 0.36844298243522644, + "theoretical_loss": 4.059572024464923, + "tokens_seen": 383385600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008927138501043172, + "loss": 0.0874, + "theoretical_loss": 4.059416770096134, + "tokens_seen": 383516672 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008926336061627347, + "loss": 0.0888, + "theoretical_loss": 4.059106465012583, + "tokens_seen": 383778816 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008925533622211523, + "loss": 0.0881, + "theoretical_loss": 4.058796431115489, + "tokens_seen": 384040960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008924731182795699, + "loss": 0.0897, + "theoretical_loss": 4.058486667982986, + "tokens_seen": 384303104 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008923928743379875, + "loss": 0.0877, + "theoretical_loss": 4.058177175194148, + "tokens_seen": 384565248 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008923126303964051, + "loss": 0.0886, + "theoretical_loss": 4.057867952328994, + "tokens_seen": 384827392 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008922323864548226, + "loss": 0.0901, + "theoretical_loss": 4.057558998968479, + "tokens_seen": 385089536 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008921521425132403, + "loss": 0.0907, + "theoretical_loss": 4.0572503146944925, + "tokens_seen": 385351680 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008920718985716579, + "loss": 0.0887, + "theoretical_loss": 4.056941899089858, + "tokens_seen": 385613824 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008919916546300755, + "loss": 0.0885, + "theoretical_loss": 4.056633751738328, + "tokens_seen": 385875968 + }, + { + "epoch": 0.12, + "learning_rate": 0.000891911410688493, + "loss": 0.0879, + "theoretical_loss": 4.0563258722245825, + "tokens_seen": 386138112 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008918311667469107, + "loss": 0.0875, + "theoretical_loss": 4.056018260134226, + "tokens_seen": 386400256 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0018099743174389005, + "objective/train/docs_used": 146981, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.903164267539978, + "objective/train/original_loss": 1.9031643867492676, + "objective/train/theoretical_loss": 4.055710915053783, + "objective/train/tokens_used": 407122400, + "objective/train/value_avg": -0.0086517333984375, + "objective/train/value_loss": 0.0001866398670244962, + "objective/train/value_max": -0.0004425048828125, + "objective/train/value_min": -0.2122802734375, + "objective/train/value_reward_corr": 0.5988158937852228, + "objective/train/value_std": 0.01146697998046875, + "objective/train/weight_avg": 1.0019017457962036, + "objective/train/weighted_lm_loss": 1.9061700105667114, + "objective/train/weights_max": 1.1358146667480469, + "objective/train/weights_min": 0.6089951395988464, + "theoretical_loss": 4.055710915053783, + "tokens_seen": 386662400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008917509228053282, + "loss": 0.0884, + "theoretical_loss": 4.055710915053783, + "tokens_seen": 386662400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008916706788637457, + "loss": 0.0871, + "theoretical_loss": 4.055403836570701, + "tokens_seen": 386924544 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008915904349221634, + "loss": 0.0915, + "theoretical_loss": 4.05509702427334, + "tokens_seen": 387186688 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008915101909805809, + "loss": 0.0866, + "theoretical_loss": 4.054790477750974, + "tokens_seen": 387448832 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008914299470389986, + "loss": 0.0881, + "theoretical_loss": 4.054484196593791, + "tokens_seen": 387710976 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008913497030974162, + "loss": 0.0891, + "theoretical_loss": 4.054178180392885, + "tokens_seen": 387973120 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008912694591558338, + "loss": 0.0835, + "theoretical_loss": 4.053872428740256, + "tokens_seen": 388235264 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008911892152142514, + "loss": 0.0894, + "theoretical_loss": 4.053566941228809, + "tokens_seen": 388497408 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008911089712726689, + "loss": 0.0895, + "theoretical_loss": 4.053261717452346, + "tokens_seen": 388759552 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008910287273310865, + "loss": 0.0865, + "theoretical_loss": 4.052956757005573, + "tokens_seen": 389021696 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008909484833895041, + "loss": 0.0864, + "theoretical_loss": 4.0526520594840845, + "tokens_seen": 389283840 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008908682394479217, + "loss": 0.0906, + "theoretical_loss": 4.052347624484373, + "tokens_seen": 389545984 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008907879955063392, + "loss": 0.0874, + "theoretical_loss": 4.052043451603818, + "tokens_seen": 389808128 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.002918938407674432, + "objective/train/docs_used": 148229, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8769011497497559, + "objective/train/original_loss": 1.8769011497497559, + "objective/train/theoretical_loss": 4.051891463332648, + "objective/train/tokens_used": 410399200, + "objective/train/value_avg": -0.00745391845703125, + "objective/train/value_loss": 0.00021266264957375824, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.27587890625, + "objective/train/value_reward_corr": 0.5017984739541499, + "objective/train/value_std": 0.0098876953125, + "objective/train/weight_avg": 1.003010869026184, + "objective/train/weighted_lm_loss": 1.8814412355422974, + "objective/train/weights_max": 1.1254287958145142, + "objective/train/weights_min": 0.380566269159317, + "theoretical_loss": 4.051891463332648, + "tokens_seen": 389939200 + }, + { + "epoch": 0.12, + "learning_rate": 0.000890707751564757, + "loss": 0.0897, + "theoretical_loss": 4.051739540440688, + "tokens_seen": 390070272 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008906275076231745, + "loss": 0.0865, + "theoretical_loss": 4.0514358905941386, + "tokens_seen": 390332416 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008905472636815921, + "loss": 0.0881, + "theoretical_loss": 4.051132501664204, + "tokens_seen": 390594560 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008904670197400097, + "loss": 0.0898, + "theoretical_loss": 4.050829373251803, + "tokens_seen": 390856704 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008903867757984272, + "loss": 0.0905, + "theoretical_loss": 4.050526504958727, + "tokens_seen": 391118848 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008903065318568448, + "loss": 0.0853, + "theoretical_loss": 4.050223896387647, + "tokens_seen": 391380992 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008902262879152624, + "loss": 0.0879, + "theoretical_loss": 4.0499215471421035, + "tokens_seen": 391643136 + }, + { + "epoch": 0.12, + "learning_rate": 0.00089014604397368, + "loss": 0.0888, + "theoretical_loss": 4.049619456826508, + "tokens_seen": 391905280 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008900658000320975, + "loss": 0.0875, + "theoretical_loss": 4.0493176250461405, + "tokens_seen": 392167424 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008899855560905153, + "loss": 0.0879, + "theoretical_loss": 4.049016051407145, + "tokens_seen": 392429568 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008899053121489328, + "loss": 0.087, + "theoretical_loss": 4.048714735516527, + "tokens_seen": 392691712 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008898250682073504, + "loss": 0.0909, + "theoretical_loss": 4.048413676982155, + "tokens_seen": 392953856 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0008797993650659919, + "objective/train/docs_used": 149327, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8922388553619385, + "objective/train/original_loss": 1.8922390937805176, + "objective/train/theoretical_loss": 4.048112875412752, + "objective/train/tokens_used": 413676000, + "objective/train/value_avg": -0.0099639892578125, + "objective/train/value_loss": 0.00043581624049693346, + "objective/train/value_max": -0.0004076957702636719, + "objective/train/value_min": -0.87744140625, + "objective/train/value_reward_corr": 0.6595999360248977, + "objective/train/value_std": 0.01849365234375, + "objective/train/weight_avg": 1.001081109046936, + "objective/train/weighted_lm_loss": 1.8936957120895386, + "objective/train/weights_max": 1.6510648727416992, + "objective/train/weights_min": 0.36863452196121216, + "theoretical_loss": 4.048112875412752, + "tokens_seen": 393216000 + }, + { + "epoch": 0.12, + "learning_rate": 0.000889744824265768, + "loss": 0.0877, + "theoretical_loss": 4.048112875412752, + "tokens_seen": 393216000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008896645803241855, + "loss": 0.0895, + "theoretical_loss": 4.0478123304179, + "tokens_seen": 393478144 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008895843363826032, + "loss": 0.0908, + "theoretical_loss": 4.047512041608029, + "tokens_seen": 393740288 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008895040924410207, + "loss": 0.0891, + "theoretical_loss": 4.047212008594424, + "tokens_seen": 394002432 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008894238484994383, + "loss": 0.0877, + "theoretical_loss": 4.046912230989214, + "tokens_seen": 394264576 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008893436045578559, + "loss": 0.0898, + "theoretical_loss": 4.0466127084053785, + "tokens_seen": 394526720 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008892633606162734, + "loss": 0.0882, + "theoretical_loss": 4.046313440456733, + "tokens_seen": 394788864 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008891831166746911, + "loss": 0.0893, + "theoretical_loss": 4.0460144267579405, + "tokens_seen": 395051008 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008891028727331087, + "loss": 0.0901, + "theoretical_loss": 4.045715666924499, + "tokens_seen": 395313152 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008890226287915263, + "loss": 0.0885, + "theoretical_loss": 4.045417160572743, + "tokens_seen": 395575296 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008889423848499438, + "loss": 0.0904, + "theoretical_loss": 4.045118907319839, + "tokens_seen": 395837440 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008888621409083615, + "loss": 0.0868, + "theoretical_loss": 4.04482090678379, + "tokens_seen": 396099584 + }, + { + "epoch": 0.12, + "learning_rate": 0.000888781896966779, + "loss": 0.0911, + "theoretical_loss": 4.044523158583421, + "tokens_seen": 396361728 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0021504259202629328, + "objective/train/docs_used": 150450, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7327624559402466, + "objective/train/original_loss": 1.732762336730957, + "objective/train/theoretical_loss": 4.044374378990227, + "objective/train/tokens_used": 416952800, + "objective/train/value_avg": -0.00965118408203125, + "objective/train/value_loss": 0.00036191148683428764, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.8271484375, + "objective/train/value_reward_corr": 0.6954772035577824, + "objective/train/value_std": 0.01971435546875, + "objective/train/weight_avg": 1.0023201704025269, + "objective/train/weighted_lm_loss": 1.7352375984191895, + "objective/train/weights_max": 1.7499204874038696, + "objective/train/weights_min": 0.37855079770088196, + "theoretical_loss": 4.044374378990227, + "tokens_seen": 396492800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008887016530251965, + "loss": 0.0896, + "theoretical_loss": 4.044225662338388, + "tokens_seen": 396623872 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008886214090836142, + "loss": 0.0886, + "theoretical_loss": 4.04392841766917, + "tokens_seen": 396886016 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008885411651420317, + "loss": 0.0865, + "theoretical_loss": 4.043631424197067, + "tokens_seen": 397148160 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008884609212004495, + "loss": 0.09, + "theoretical_loss": 4.0433346815442, + "tokens_seen": 397410304 + }, + { + "epoch": 0.12, + "learning_rate": 0.000888380677258867, + "loss": 0.0919, + "theoretical_loss": 4.043038189333508, + "tokens_seen": 397672448 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008883004333172846, + "loss": 0.0924, + "theoretical_loss": 4.042741947188741, + "tokens_seen": 397934592 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008882201893757022, + "loss": 0.0876, + "theoretical_loss": 4.042445954734466, + "tokens_seen": 398196736 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008881399454341197, + "loss": 0.0926, + "theoretical_loss": 4.0421502115960575, + "tokens_seen": 398458880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008880597014925373, + "loss": 0.0855, + "theoretical_loss": 4.0418547173997, + "tokens_seen": 398721024 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008879794575509549, + "loss": 0.0885, + "theoretical_loss": 4.041559471772382, + "tokens_seen": 398983168 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008878992136093725, + "loss": 0.0909, + "theoretical_loss": 4.041264474341896, + "tokens_seen": 399245312 + }, + { + "epoch": 0.12, + "learning_rate": 0.00088781896966779, + "loss": 0.0891, + "theoretical_loss": 4.040969724736838, + "tokens_seen": 399507456 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.001554798916913569, + "objective/train/docs_used": 151634, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7531509399414062, + "objective/train/original_loss": 1.7531508207321167, + "objective/train/theoretical_loss": 4.040675222586599, + "objective/train/tokens_used": 420229600, + "objective/train/value_avg": -0.009552001953125, + "objective/train/value_loss": 0.00028178023057989776, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.87255859375, + "objective/train/value_reward_corr": 0.7821445899209426, + "objective/train/value_std": 0.0219268798828125, + "objective/train/weight_avg": 1.0016918182373047, + "objective/train/weighted_lm_loss": 1.755144476890564, + "objective/train/weights_max": 1.6336557865142822, + "objective/train/weights_min": 0.541962206363678, + "theoretical_loss": 4.040675222586599, + "tokens_seen": 399769600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008877387257262078, + "loss": 0.0863, + "theoretical_loss": 4.040675222586599, + "tokens_seen": 399769600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008876584817846253, + "loss": 0.0854, + "theoretical_loss": 4.04038096752137, + "tokens_seen": 400031744 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008875782378430429, + "loss": 0.0886, + "theoretical_loss": 4.040086959172136, + "tokens_seen": 400293888 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008874979939014605, + "loss": 0.085, + "theoretical_loss": 4.039793197170672, + "tokens_seen": 400556032 + }, + { + "epoch": 0.12, + "learning_rate": 0.000887417749959878, + "loss": 0.0898, + "theoretical_loss": 4.039499681149547, + "tokens_seen": 400818176 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008873375060182957, + "loss": 0.0889, + "theoretical_loss": 4.039206410742114, + "tokens_seen": 401080320 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008872572620767132, + "loss": 0.0868, + "theoretical_loss": 4.038913385582515, + "tokens_seen": 401342464 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008871770181351308, + "loss": 0.0871, + "theoretical_loss": 4.038620605305673, + "tokens_seen": 401604608 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008870967741935484, + "loss": 0.0847, + "theoretical_loss": 4.038328069547293, + "tokens_seen": 401866752 + }, + { + "epoch": 0.12, + "learning_rate": 0.000887016530251966, + "loss": 0.0879, + "theoretical_loss": 4.03803577794386, + "tokens_seen": 402128896 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008869362863103836, + "loss": 0.0887, + "theoretical_loss": 4.037743730132635, + "tokens_seen": 402391040 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008868560423688012, + "loss": 0.0855, + "theoretical_loss": 4.037451925751654, + "tokens_seen": 402653184 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008867757984272188, + "loss": 0.088, + "theoretical_loss": 4.0371603644397265, + "tokens_seen": 402915328 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0015503467293456197, + "objective/train/docs_used": 152845, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6842749118804932, + "objective/train/original_loss": 1.6842749118804932, + "objective/train/theoretical_loss": 4.037014674821996, + "objective/train/tokens_used": 423506400, + "objective/train/value_avg": -0.013092041015625, + "objective/train/value_loss": 0.0004618663515429944, + "objective/train/value_max": -0.00034737586975097656, + "objective/train/value_min": -0.5283203125, + "objective/train/value_reward_corr": 0.5689597227558094, + "objective/train/value_std": 0.016571044921875, + "objective/train/weight_avg": 1.0017578601837158, + "objective/train/weighted_lm_loss": 1.6876652240753174, + "objective/train/weights_max": 1.291870355606079, + "objective/train/weights_min": 0.36923545598983765, + "theoretical_loss": 4.037014674821996, + "tokens_seen": 403046400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008866955544856363, + "loss": 0.0901, + "theoretical_loss": 4.03686904583643, + "tokens_seen": 403177472 + }, + { + "epoch": 0.12, + "learning_rate": 0.000886615310544054, + "loss": 0.0872, + "theoretical_loss": 4.036577969582114, + "tokens_seen": 403439616 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008865350666024715, + "loss": 0.0862, + "theoretical_loss": 4.03628713531789, + "tokens_seen": 403701760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008864548226608891, + "loss": 0.0896, + "theoretical_loss": 4.035996542685638, + "tokens_seen": 403963904 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008863745787193067, + "loss": 0.0864, + "theoretical_loss": 4.0357061913279955, + "tokens_seen": 404226048 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008862943347777242, + "loss": 0.0873, + "theoretical_loss": 4.035416080888364, + "tokens_seen": 404488192 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008862140908361419, + "loss": 0.0896, + "theoretical_loss": 4.035126211010899, + "tokens_seen": 404750336 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008861338468945595, + "loss": 0.0856, + "theoretical_loss": 4.034836581340515, + "tokens_seen": 405012480 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008860536029529771, + "loss": 0.0872, + "theoretical_loss": 4.034547191522877, + "tokens_seen": 405274624 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008859733590113947, + "loss": 0.0932, + "theoretical_loss": 4.034258041204404, + "tokens_seen": 405536768 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008858931150698123, + "loss": 0.0875, + "theoretical_loss": 4.033969130032263, + "tokens_seen": 405798912 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008858128711282298, + "loss": 0.0861, + "theoretical_loss": 4.033680457654368, + "tokens_seen": 406061056 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": 0.0022317736875265837, + "objective/train/docs_used": 154148, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6351722478866577, + "objective/train/original_loss": 1.6351724863052368, + "objective/train/theoretical_loss": 4.0333920237193785, + "objective/train/tokens_used": 426783200, + "objective/train/value_avg": -0.00635528564453125, + "objective/train/value_loss": 0.00012135026918258518, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.303955078125, + "objective/train/value_reward_corr": 0.5124807247227108, + "objective/train/value_std": 0.0070343017578125, + "objective/train/weight_avg": 1.00228750705719, + "objective/train/weighted_lm_loss": 1.6392003297805786, + "objective/train/weights_max": 1.1095505952835083, + "objective/train/weights_min": 0.37000179290771484, + "theoretical_loss": 4.0333920237193785, + "tokens_seen": 406323200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008857326271866474, + "loss": 0.0859, + "theoretical_loss": 4.0333920237193785, + "tokens_seen": 406323200 + }, + { + "epoch": 0.12, + "learning_rate": 0.000885652383245065, + "loss": 0.0823, + "theoretical_loss": 4.0331038278766975, + "tokens_seen": 406585344 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008855721393034825, + "loss": 0.0895, + "theoretical_loss": 4.032815869776471, + "tokens_seen": 406847488 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008854918953619003, + "loss": 0.089, + "theoretical_loss": 4.032528149069579, + "tokens_seen": 407109632 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008854116514203178, + "loss": 0.0891, + "theoretical_loss": 4.0322406654076435, + "tokens_seen": 407371776 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008853314074787354, + "loss": 0.0893, + "theoretical_loss": 4.0319534184430195, + "tokens_seen": 407633920 + }, + { + "epoch": 0.12, + "learning_rate": 0.000885251163537153, + "loss": 0.0867, + "theoretical_loss": 4.031666407828795, + "tokens_seen": 407896064 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008851709195955705, + "loss": 0.0867, + "theoretical_loss": 4.03137963321879, + "tokens_seen": 408158208 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008850906756539881, + "loss": 0.089, + "theoretical_loss": 4.0310930942675505, + "tokens_seen": 408420352 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008850104317124057, + "loss": 0.0878, + "theoretical_loss": 4.030806790630353, + "tokens_seen": 408682496 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008849301877708233, + "loss": 0.0856, + "theoretical_loss": 4.030520721963199, + "tokens_seen": 408944640 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008848499438292408, + "loss": 0.0855, + "theoretical_loss": 4.030234887922808, + "tokens_seen": 409206784 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008847696998876586, + "loss": 0.0877, + "theoretical_loss": 4.029949288166627, + "tokens_seen": 409468928 + }, + { + "epoch": 0.12, + "objective/train/advantage_avg": -0.0004230485938023776, + "objective/train/docs_used": 155224, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8668181896209717, + "objective/train/original_loss": 1.8668180704116821, + "objective/train/theoretical_loss": 4.029806576038263, + "objective/train/tokens_used": 430060000, + "objective/train/value_avg": -0.00667572021484375, + "objective/train/value_loss": 0.00026823318330571055, + "objective/train/value_max": -0.0002512931823730469, + "objective/train/value_min": -0.2183837890625, + "objective/train/value_reward_corr": 0.5231429303787112, + "objective/train/value_std": 0.0097808837890625, + "objective/train/weight_avg": 0.9997006058692932, + "objective/train/weighted_lm_loss": 1.864938735961914, + "objective/train/weights_max": 1.1423146724700928, + "objective/train/weights_min": 0.36977463960647583, + "theoretical_loss": 4.029806576038263, + "tokens_seen": 409600000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008846894559460761, + "loss": 0.0827, + "theoretical_loss": 4.0296639223528175, + "tokens_seen": 409731072 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008846092120044937, + "loss": 0.0863, + "theoretical_loss": 4.029378790140261, + "tokens_seen": 409993216 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008845289680629113, + "loss": 0.0867, + "theoretical_loss": 4.029093891188552, + "tokens_seen": 410255360 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008844487241213288, + "loss": 0.0881, + "theoretical_loss": 4.028809225158, + "tokens_seen": 410517504 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008843684801797465, + "loss": 0.0862, + "theoretical_loss": 4.028524791709621, + "tokens_seen": 410779648 + }, + { + "epoch": 0.12, + "learning_rate": 0.000884288236238164, + "loss": 0.0867, + "theoretical_loss": 4.028240590505148, + "tokens_seen": 411041792 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008842079922965816, + "loss": 0.0864, + "theoretical_loss": 4.027956621207015, + "tokens_seen": 411303936 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008841277483549992, + "loss": 0.0861, + "theoretical_loss": 4.027672883478364, + "tokens_seen": 411566080 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008840475044134169, + "loss": 0.0897, + "theoretical_loss": 4.027389376983041, + "tokens_seen": 411828224 + }, + { + "epoch": 0.12, + "learning_rate": 0.0008839672604718344, + "loss": 0.0898, + "theoretical_loss": 4.02710610138559, + "tokens_seen": 412090368 + }, + { + "epoch": 0.12, + "learning_rate": 0.000883887016530252, + "loss": 0.0868, + "theoretical_loss": 4.02682305635126, + "tokens_seen": 412352512 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008838067725886696, + "loss": 0.0879, + "theoretical_loss": 4.026540241545994, + "tokens_seen": 412614656 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0008909243042580783, + "objective/train/docs_used": 156351, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.63358473777771, + "objective/train/original_loss": 1.633584976196289, + "objective/train/theoretical_loss": 4.026257656636431, + "objective/train/tokens_used": 433336800, + "objective/train/value_avg": -0.00994110107421875, + "objective/train/value_loss": 0.0007588982116430998, + "objective/train/value_max": -0.00039196014404296875, + "objective/train/value_min": -0.9423828125, + "objective/train/value_reward_corr": 0.6866913112731529, + "objective/train/value_std": 0.0245361328125, + "objective/train/weight_avg": 1.0012253522872925, + "objective/train/weighted_lm_loss": 1.6334694623947144, + "objective/train/weights_max": 2.1991829872131348, + "objective/train/weights_min": 0.3683927655220032, + "theoretical_loss": 4.026257656636431, + "tokens_seen": 412876800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008837265286470871, + "loss": 0.086, + "theoretical_loss": 4.026257656636431, + "tokens_seen": 412876800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008836462847055048, + "loss": 0.0873, + "theoretical_loss": 4.025975301289906, + "tokens_seen": 413138944 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008835660407639223, + "loss": 0.0868, + "theoretical_loss": 4.025693175174443, + "tokens_seen": 413401088 + }, + { + "epoch": 0.13, + "learning_rate": 0.00088348579682234, + "loss": 0.084, + "theoretical_loss": 4.02541127795876, + "tokens_seen": 413663232 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008834055528807575, + "loss": 0.0864, + "theoretical_loss": 4.02512960931226, + "tokens_seen": 413925376 + }, + { + "epoch": 0.13, + "learning_rate": 0.000883325308939175, + "loss": 0.0834, + "theoretical_loss": 4.024848168905035, + "tokens_seen": 414187520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008832450649975928, + "loss": 0.0868, + "theoretical_loss": 4.02456695640786, + "tokens_seen": 414449664 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008831648210560103, + "loss": 0.0859, + "theoretical_loss": 4.0242859714921915, + "tokens_seen": 414711808 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008830845771144279, + "loss": 0.0889, + "theoretical_loss": 4.024005213830171, + "tokens_seen": 414973952 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008830043331728455, + "loss": 0.0838, + "theoretical_loss": 4.023724683094615, + "tokens_seen": 415236096 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008829240892312631, + "loss": 0.0836, + "theoretical_loss": 4.023444378959019, + "tokens_seen": 415498240 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008828438452896806, + "loss": 0.0853, + "theoretical_loss": 4.023164301097555, + "tokens_seen": 415760384 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008827636013480982, + "loss": 0.0857, + "theoretical_loss": 4.0228844491850655, + "tokens_seen": 416022528 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.002180003095418215, + "objective/train/docs_used": 157543, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5781835317611694, + "objective/train/original_loss": 1.5781837701797485, + "objective/train/theoretical_loss": 4.022744607858259, + "objective/train/tokens_used": 436613600, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00037475841236300766, + "objective/train/value_max": -0.0002397298812866211, + "objective/train/value_min": -0.37060546875, + "objective/train/value_reward_corr": 0.5816138183738614, + "objective/train/value_std": 0.0136260986328125, + "objective/train/weight_avg": 1.0023459196090698, + "objective/train/weighted_lm_loss": 1.5822744369506836, + "objective/train/weights_max": 1.4486113786697388, + "objective/train/weights_min": 0.37067708373069763, + "theoretical_loss": 4.022744607858259, + "tokens_seen": 416153600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008826833574065158, + "loss": 0.0879, + "theoretical_loss": 4.022604822897068, + "tokens_seen": 416284672 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008826031134649333, + "loss": 0.0876, + "theoretical_loss": 4.0223254219097475, + "tokens_seen": 416546816 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008825228695233511, + "loss": 0.0878, + "theoretical_loss": 4.022046245899958, + "tokens_seen": 416808960 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008824426255817686, + "loss": 0.0861, + "theoretical_loss": 4.021767294545221, + "tokens_seen": 417071104 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008823623816401862, + "loss": 0.0877, + "theoretical_loss": 4.021488567523721, + "tokens_seen": 417333248 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008822821376986038, + "loss": 0.0875, + "theoretical_loss": 4.021210064514305, + "tokens_seen": 417595392 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008822018937570213, + "loss": 0.0835, + "theoretical_loss": 4.020931785196484, + "tokens_seen": 417857536 + }, + { + "epoch": 0.13, + "learning_rate": 0.000882121649815439, + "loss": 0.0852, + "theoretical_loss": 4.020653729250424, + "tokens_seen": 418119680 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008820414058738565, + "loss": 0.0851, + "theoretical_loss": 4.020375896356951, + "tokens_seen": 418381824 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008819611619322741, + "loss": 0.0909, + "theoretical_loss": 4.0200982861975465, + "tokens_seen": 418643968 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008818809179906917, + "loss": 0.0883, + "theoretical_loss": 4.019820898454345, + "tokens_seen": 418906112 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008818006740491094, + "loss": 0.0843, + "theoretical_loss": 4.019543732810134, + "tokens_seen": 419168256 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0019043140346184373, + "objective/train/docs_used": 158751, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.881985068321228, + "objective/train/original_loss": 1.881984829902649, + "objective/train/theoretical_loss": 4.019266788948352, + "objective/train/tokens_used": 439890400, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00036503959563560784, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.80029296875, + "objective/train/value_reward_corr": 0.6310464755810599, + "objective/train/value_std": 0.01422882080078125, + "objective/train/weight_avg": 1.0020649433135986, + "objective/train/weighted_lm_loss": 1.8862380981445312, + "objective/train/weights_max": 1.4735807180404663, + "objective/train/weights_min": 0.3895317018032074, + "theoretical_loss": 4.019266788948352, + "tokens_seen": 419430400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008817204301075269, + "loss": 0.0876, + "theoretical_loss": 4.019266788948352, + "tokens_seen": 419430400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008816401861659445, + "loss": 0.0901, + "theoretical_loss": 4.0189900665530836, + "tokens_seen": 419692544 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008815599422243621, + "loss": 0.0881, + "theoretical_loss": 4.0187135653090635, + "tokens_seen": 419954688 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008814796982827796, + "loss": 0.0832, + "theoretical_loss": 4.018437284901671, + "tokens_seen": 420216832 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008813994543411973, + "loss": 0.09, + "theoretical_loss": 4.018161225016926, + "tokens_seen": 420478976 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008813192103996148, + "loss": 0.088, + "theoretical_loss": 4.0178853853414935, + "tokens_seen": 420741120 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008812389664580324, + "loss": 0.0876, + "theoretical_loss": 4.017609765562678, + "tokens_seen": 421003264 + }, + { + "epoch": 0.13, + "learning_rate": 0.00088115872251645, + "loss": 0.0874, + "theoretical_loss": 4.017334365368422, + "tokens_seen": 421265408 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008810784785748677, + "loss": 0.0892, + "theoretical_loss": 4.017059184447303, + "tokens_seen": 421527552 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008809982346332853, + "loss": 0.088, + "theoretical_loss": 4.0167842224885355, + "tokens_seen": 421789696 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008809179906917028, + "loss": 0.0832, + "theoretical_loss": 4.016509479181968, + "tokens_seen": 422051840 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008808377467501204, + "loss": 0.0867, + "theoretical_loss": 4.016234954218078, + "tokens_seen": 422313984 + }, + { + "epoch": 0.13, + "learning_rate": 0.000880757502808538, + "loss": 0.086, + "theoretical_loss": 4.015960647287975, + "tokens_seen": 422576128 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.000816687592305243, + "objective/train/docs_used": 159824, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6725859642028809, + "objective/train/original_loss": 1.6725860834121704, + "objective/train/theoretical_loss": 4.015823575489237, + "objective/train/tokens_used": 443167200, + "objective/train/value_avg": -0.00794219970703125, + "objective/train/value_loss": 0.0004758470749948174, + "objective/train/value_max": -0.0003101825714111328, + "objective/train/value_min": -0.78759765625, + "objective/train/value_reward_corr": 0.706160026017736, + "objective/train/value_std": 0.0199432373046875, + "objective/train/weight_avg": 1.0010310411453247, + "objective/train/weighted_lm_loss": 1.6731016635894775, + "objective/train/weights_max": 1.929433822631836, + "objective/train/weights_min": 0.39345037937164307, + "theoretical_loss": 4.015823575489237, + "tokens_seen": 422707200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008806772588669556, + "loss": 0.0878, + "theoretical_loss": 4.015686558083396, + "tokens_seen": 422838272 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008805970149253731, + "loss": 0.0882, + "theoretical_loss": 4.015412686296704, + "tokens_seen": 423100416 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008805167709837908, + "loss": 0.0873, + "theoretical_loss": 4.0151390316208895, + "tokens_seen": 423362560 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008804365270422083, + "loss": 0.0878, + "theoretical_loss": 4.014865593749563, + "tokens_seen": 423624704 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008803562831006258, + "loss": 0.0855, + "theoretical_loss": 4.014592372376958, + "tokens_seen": 423886848 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008802760391590436, + "loss": 0.0876, + "theoretical_loss": 4.014319367197928, + "tokens_seen": 424148992 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008801957952174611, + "loss": 0.0907, + "theoretical_loss": 4.014046577907946, + "tokens_seen": 424411136 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008801155512758787, + "loss": 0.089, + "theoretical_loss": 4.013774004203099, + "tokens_seen": 424673280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008800353073342963, + "loss": 0.0877, + "theoretical_loss": 4.013501645780092, + "tokens_seen": 424935424 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008799550633927139, + "loss": 0.0839, + "theoretical_loss": 4.013229502336242, + "tokens_seen": 425197568 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008798748194511314, + "loss": 0.0868, + "theoretical_loss": 4.0129575735694765, + "tokens_seen": 425459712 + }, + { + "epoch": 0.13, + "learning_rate": 0.000879794575509549, + "loss": 0.0894, + "theoretical_loss": 4.012685859178337, + "tokens_seen": 425721856 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0018310642335563898, + "objective/train/docs_used": 161088, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5738030672073364, + "objective/train/original_loss": 1.5738029479980469, + "objective/train/theoretical_loss": 4.012414358861969, + "objective/train/tokens_used": 446444000, + "objective/train/value_avg": -0.006816864013671875, + "objective/train/value_loss": 0.00017067580483853817, + "objective/train/value_max": -0.0003542900085449219, + "objective/train/value_min": -0.53857421875, + "objective/train/value_reward_corr": 0.5243874673729859, + "objective/train/value_std": 0.009002685546875, + "objective/train/weight_avg": 1.0019116401672363, + "objective/train/weighted_lm_loss": 1.5763643980026245, + "objective/train/weights_max": 1.7135618925094604, + "objective/train/weights_min": 0.3707987070083618, + "theoretical_loss": 4.012414358861969, + "tokens_seen": 425984000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008797143315679666, + "loss": 0.0845, + "theoretical_loss": 4.012414358861969, + "tokens_seen": 425984000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008796340876263842, + "loss": 0.0872, + "theoretical_loss": 4.01214307232013, + "tokens_seen": 426246144 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008795538436848019, + "loss": 0.0868, + "theoretical_loss": 4.011871999253178, + "tokens_seen": 426508288 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008794735997432194, + "loss": 0.0889, + "theoretical_loss": 4.011601139362078, + "tokens_seen": 426770432 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008793933558016371, + "loss": 0.0858, + "theoretical_loss": 4.011330492348397, + "tokens_seen": 427032576 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008793131118600546, + "loss": 0.0894, + "theoretical_loss": 4.0110600579143, + "tokens_seen": 427294720 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008792328679184721, + "loss": 0.0869, + "theoretical_loss": 4.010789835762555, + "tokens_seen": 427556864 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008791526239768898, + "loss": 0.0891, + "theoretical_loss": 4.010519825596525, + "tokens_seen": 427819008 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008790723800353073, + "loss": 0.0865, + "theoretical_loss": 4.010250027120169, + "tokens_seen": 428081152 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008789921360937249, + "loss": 0.0857, + "theoretical_loss": 4.009980440038043, + "tokens_seen": 428343296 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008789118921521425, + "loss": 0.0893, + "theoretical_loss": 4.009711064055291, + "tokens_seen": 428605440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008788316482105602, + "loss": 0.0856, + "theoretical_loss": 4.009441898877652, + "tokens_seen": 428867584 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008787514042689777, + "loss": 0.0858, + "theoretical_loss": 4.009172944211455, + "tokens_seen": 429129728 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0020093463826924562, + "objective/train/docs_used": 162190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7857974767684937, + "objective/train/original_loss": 1.7857975959777832, + "objective/train/theoretical_loss": 4.009038545728536, + "objective/train/tokens_used": 449720800, + "objective/train/value_avg": -0.00783538818359375, + "objective/train/value_loss": 0.0001668190088821575, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.62890625, + "objective/train/value_reward_corr": 0.671483245399306, + "objective/train/value_std": 0.01235198974609375, + "objective/train/weight_avg": 1.0020873546600342, + "objective/train/weighted_lm_loss": 1.7891877889633179, + "objective/train/weights_max": 1.2835239171981812, + "objective/train/weights_min": 0.3886055648326874, + "theoretical_loss": 4.009038545728536, + "tokens_seen": 429260800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008786711603273953, + "loss": 0.0874, + "theoretical_loss": 4.008904199763615, + "tokens_seen": 429391872 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008785909163858129, + "loss": 0.0856, + "theoretical_loss": 4.008635665241635, + "tokens_seen": 429654016 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008785106724442304, + "loss": 0.0889, + "theoretical_loss": 4.008367340353602, + "tokens_seen": 429916160 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008784304285026481, + "loss": 0.0869, + "theoretical_loss": 4.008099224808188, + "tokens_seen": 430178304 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008783501845610656, + "loss": 0.0842, + "theoretical_loss": 4.007831318314645, + "tokens_seen": 430440448 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008782699406194833, + "loss": 0.0848, + "theoretical_loss": 4.00756362058281, + "tokens_seen": 430702592 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008781896966779008, + "loss": 0.0841, + "theoretical_loss": 4.007296131323094, + "tokens_seen": 430964736 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008781094527363185, + "loss": 0.087, + "theoretical_loss": 4.007028850246487, + "tokens_seen": 431226880 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008780292087947361, + "loss": 0.0865, + "theoretical_loss": 4.006761777064557, + "tokens_seen": 431489024 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008779489648531536, + "loss": 0.0862, + "theoretical_loss": 4.006494911489444, + "tokens_seen": 431751168 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008778687209115712, + "loss": 0.0868, + "theoretical_loss": 4.006228253233864, + "tokens_seen": 432013312 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008777884769699888, + "loss": 0.0884, + "theoretical_loss": 4.0059618020111, + "tokens_seen": 432275456 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0010576657950878143, + "objective/train/docs_used": 163281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.687941551208496, + "objective/train/original_loss": 1.687941312789917, + "objective/train/theoretical_loss": 4.00569555753501, + "objective/train/tokens_used": 452997600, + "objective/train/value_avg": -0.006397247314453125, + "objective/train/value_loss": 0.0002257569576613605, + "objective/train/value_max": -0.0002758502960205078, + "objective/train/value_min": -0.3525390625, + "objective/train/value_reward_corr": 0.5840597410895295, + "objective/train/value_std": 0.01157379150390625, + "objective/train/weight_avg": 1.0011610984802246, + "objective/train/weighted_lm_loss": 1.6893194913864136, + "objective/train/weights_max": 1.2730658054351807, + "objective/train/weights_min": 0.3710364103317261, + "theoretical_loss": 4.00569555753501, + "tokens_seen": 432537600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008777082330284064, + "loss": 0.0873, + "theoretical_loss": 4.00569555753501, + "tokens_seen": 432537600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008776279890868239, + "loss": 0.0862, + "theoretical_loss": 4.0054295195200185, + "tokens_seen": 432799744 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008775477451452416, + "loss": 0.0855, + "theoretical_loss": 4.005163687681116, + "tokens_seen": 433061888 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008774675012036591, + "loss": 0.0855, + "theoretical_loss": 4.00489806173386, + "tokens_seen": 433324032 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008773872572620766, + "loss": 0.0895, + "theoretical_loss": 4.004632641394372, + "tokens_seen": 433586176 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008773070133204944, + "loss": 0.0874, + "theoretical_loss": 4.0043674263793365, + "tokens_seen": 433848320 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008772267693789119, + "loss": 0.0848, + "theoretical_loss": 4.004102416405998, + "tokens_seen": 434110464 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008771465254373296, + "loss": 0.0857, + "theoretical_loss": 4.0038376111921625, + "tokens_seen": 434372608 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008770662814957471, + "loss": 0.0877, + "theoretical_loss": 4.0035730104561935, + "tokens_seen": 434634752 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008769860375541647, + "loss": 0.0893, + "theoretical_loss": 4.003308613917012, + "tokens_seen": 434896896 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008769057936125823, + "loss": 0.0879, + "theoretical_loss": 4.003044421294094, + "tokens_seen": 435159040 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008768255496709998, + "loss": 0.0856, + "theoretical_loss": 4.002780432307468, + "tokens_seen": 435421184 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008767453057294174, + "loss": 0.0858, + "theoretical_loss": 4.0025166466777184, + "tokens_seen": 435683328 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0019057170720770955, + "objective/train/docs_used": 164592, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.62677001953125, + "objective/train/original_loss": 1.6267703771591187, + "objective/train/theoretical_loss": 4.002384830034506, + "objective/train/tokens_used": 456274400, + "objective/train/value_avg": -0.00865936279296875, + "objective/train/value_loss": 0.00031009086524136364, + "objective/train/value_max": -0.00029587745666503906, + "objective/train/value_min": -0.68408203125, + "objective/train/value_reward_corr": 0.6592497390378055, + "objective/train/value_std": 0.01568603515625, + "objective/train/weight_avg": 1.0020478963851929, + "objective/train/weighted_lm_loss": 1.629705548286438, + "objective/train/weights_max": 1.3052014112472534, + "objective/train/weights_min": 0.3907938003540039, + "theoretical_loss": 4.002384830034506, + "tokens_seen": 435814400 + }, + { + "epoch": 0.13, + "learning_rate": 0.000876665061787835, + "loss": 0.0874, + "theoretical_loss": 4.00225306412598, + "tokens_seen": 435945472 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008765848178462527, + "loss": 0.0876, + "theoretical_loss": 4.001989684373934, + "tokens_seen": 436207616 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008765045739046702, + "loss": 0.0865, + "theoretical_loss": 4.0017265071438155, + "tokens_seen": 436469760 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008764243299630879, + "loss": 0.0866, + "theoretical_loss": 4.001463532158402, + "tokens_seen": 436731904 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008763440860215054, + "loss": 0.0868, + "theoretical_loss": 4.001200759141019, + "tokens_seen": 436994048 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008762638420799229, + "loss": 0.0861, + "theoretical_loss": 4.000938187815535, + "tokens_seen": 437256192 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008761835981383406, + "loss": 0.0855, + "theoretical_loss": 4.000675817906362, + "tokens_seen": 437518336 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008761033541967581, + "loss": 0.0857, + "theoretical_loss": 4.000413649138453, + "tokens_seen": 437780480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008760231102551757, + "loss": 0.0873, + "theoretical_loss": 4.000151681237301, + "tokens_seen": 438042624 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008759428663135933, + "loss": 0.0856, + "theoretical_loss": 3.9998899139289392, + "tokens_seen": 438304768 + }, + { + "epoch": 0.13, + "learning_rate": 0.000875862622372011, + "loss": 0.0858, + "theoretical_loss": 3.999628346939934, + "tokens_seen": 438566912 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008757823784304286, + "loss": 0.0886, + "theoretical_loss": 3.9993669799973928, + "tokens_seen": 438829056 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0023695742711424828, + "objective/train/docs_used": 165818, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.712162733078003, + "objective/train/original_loss": 1.7121628522872925, + "objective/train/theoretical_loss": 3.9991058128289536, + "objective/train/tokens_used": 459551200, + "objective/train/value_avg": -0.0120391845703125, + "objective/train/value_loss": 0.0006223080563358963, + "objective/train/value_max": -0.0002002716064453125, + "objective/train/value_min": -0.81982421875, + "objective/train/value_reward_corr": 0.7511700502995355, + "objective/train/value_std": 0.0264739990234375, + "objective/train/weight_avg": 1.0026506185531616, + "objective/train/weighted_lm_loss": 1.716010570526123, + "objective/train/weights_max": 1.3089244365692139, + "objective/train/weights_min": 0.41716745495796204, + "theoretical_loss": 3.9991058128289536, + "tokens_seen": 439091200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008757021344888461, + "loss": 0.0856, + "theoretical_loss": 3.9991058128289536, + "tokens_seen": 439091200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008756218905472637, + "loss": 0.0895, + "theoretical_loss": 3.998844845162789, + "tokens_seen": 439353344 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008755416466056813, + "loss": 0.0873, + "theoretical_loss": 3.998584076727604, + "tokens_seen": 439615488 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008754614026640989, + "loss": 0.0886, + "theoretical_loss": 3.998323507252633, + "tokens_seen": 439877632 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008753811587225164, + "loss": 0.0853, + "theoretical_loss": 3.998063136467639, + "tokens_seen": 440139776 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008753009147809341, + "loss": 0.0881, + "theoretical_loss": 3.9978029641029154, + "tokens_seen": 440401920 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008752206708393516, + "loss": 0.0861, + "theoretical_loss": 3.9975429898892783, + "tokens_seen": 440664064 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008751404268977691, + "loss": 0.0871, + "theoretical_loss": 3.9972832135580707, + "tokens_seen": 440926208 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008750601829561869, + "loss": 0.087, + "theoretical_loss": 3.9970236348411605, + "tokens_seen": 441188352 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008749799390146044, + "loss": 0.0868, + "theoretical_loss": 3.996764253470935, + "tokens_seen": 441450496 + }, + { + "epoch": 0.13, + "learning_rate": 0.000874899695073022, + "loss": 0.0825, + "theoretical_loss": 3.996505069180306, + "tokens_seen": 441712640 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008748194511314396, + "loss": 0.0872, + "theoretical_loss": 3.9962460817027017, + "tokens_seen": 441974784 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008747392071898572, + "loss": 0.0865, + "theoretical_loss": 3.995987290772071, + "tokens_seen": 442236928 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": 0.0025604458060115576, + "objective/train/docs_used": 166951, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6768556833267212, + "objective/train/original_loss": 1.6768556833267212, + "objective/train/theoretical_loss": 3.9958579689288705, + "objective/train/tokens_used": 462828000, + "objective/train/value_avg": -0.0081634521484375, + "objective/train/value_loss": 0.00017307909729424864, + "objective/train/value_max": -0.0002472400665283203, + "objective/train/value_min": -0.494140625, + "objective/train/value_reward_corr": 0.5969880927520907, + "objective/train/value_std": 0.0106353759765625, + "objective/train/weight_avg": 1.0026414394378662, + "objective/train/weighted_lm_loss": 1.6814841032028198, + "objective/train/weights_max": 1.2068192958831787, + "objective/train/weights_min": 0.37579405307769775, + "theoretical_loss": 3.9958579689288705, + "tokens_seen": 442368000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008746589632482748, + "loss": 0.0859, + "theoretical_loss": 3.9957286961228786, + "tokens_seen": 442499072 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008745787193066924, + "loss": 0.0838, + "theoretical_loss": 3.995470297490106, + "tokens_seen": 442761216 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008744984753651099, + "loss": 0.0841, + "theoretical_loss": 3.995212094609249, + "tokens_seen": 443023360 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008744182314235275, + "loss": 0.0857, + "theoretical_loss": 3.994954087216315, + "tokens_seen": 443285504 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008743379874819452, + "loss": 0.0886, + "theoretical_loss": 3.994696275047825, + "tokens_seen": 443547648 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008742577435403627, + "loss": 0.0862, + "theoretical_loss": 3.9944386578408113, + "tokens_seen": 443809792 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008741774995987804, + "loss": 0.0845, + "theoretical_loss": 3.9941812353328126, + "tokens_seen": 444071936 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008740972556571979, + "loss": 0.0852, + "theoretical_loss": 3.993924007261878, + "tokens_seen": 444334080 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008740170117156155, + "loss": 0.0881, + "theoretical_loss": 3.9936669733665617, + "tokens_seen": 444596224 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008739367677740331, + "loss": 0.0859, + "theoretical_loss": 3.9934101333859253, + "tokens_seen": 444858368 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008738565238324506, + "loss": 0.0882, + "theoretical_loss": 3.9931534870595327, + "tokens_seen": 445120512 + }, + { + "epoch": 0.13, + "learning_rate": 0.0008737762798908682, + "loss": 0.0857, + "theoretical_loss": 3.9928970341274517, + "tokens_seen": 445382656 + }, + { + "epoch": 0.13, + "objective/train/advantage_avg": -4.0266091673402116e-05, + "objective/train/docs_used": 168097, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8889011144638062, + "objective/train/original_loss": 1.8889009952545166, + "objective/train/theoretical_loss": 3.992640774330251, + "objective/train/tokens_used": 466104800, + "objective/train/value_avg": -0.007015228271484375, + "objective/train/value_loss": 0.00026242577587254345, + "objective/train/value_max": -0.0002415180206298828, + "objective/train/value_min": -0.1566162109375, + "objective/train/value_reward_corr": 0.5724330601050478, + "objective/train/value_std": 0.0086822509765625, + "objective/train/weight_avg": 1.0000807046890259, + "objective/train/weighted_lm_loss": 1.889707326889038, + "objective/train/weights_max": 1.112181305885315, + "objective/train/weights_min": 0.38205578923225403, + "theoretical_loss": 3.992640774330251, + "tokens_seen": 445644800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008736960359492858, + "loss": 0.0861, + "theoretical_loss": 3.992640774330251, + "tokens_seen": 445644800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008736157920077035, + "loss": 0.084, + "theoretical_loss": 3.9923847074090015, + "tokens_seen": 445906944 + }, + { + "epoch": 0.14, + "learning_rate": 0.000873535548066121, + "loss": 0.0835, + "theoretical_loss": 3.9921288331052702, + "tokens_seen": 446169088 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008734553041245387, + "loss": 0.0856, + "theoretical_loss": 3.991873151161124, + "tokens_seen": 446431232 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008733750601829562, + "loss": 0.0886, + "theoretical_loss": 3.9916176613191263, + "tokens_seen": 446693376 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008732948162413738, + "loss": 0.0862, + "theoretical_loss": 3.9913623633223354, + "tokens_seen": 446955520 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008732145722997914, + "loss": 0.0831, + "theoretical_loss": 3.9911072569143036, + "tokens_seen": 447217664 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008731343283582089, + "loss": 0.0842, + "theoretical_loss": 3.9908523418390764, + "tokens_seen": 447479808 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008730540844166266, + "loss": 0.0841, + "theoretical_loss": 3.990597617841191, + "tokens_seen": 447741952 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008729738404750441, + "loss": 0.0841, + "theoretical_loss": 3.9903430846656742, + "tokens_seen": 448004096 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008728935965334618, + "loss": 0.0871, + "theoretical_loss": 3.990088742058043, + "tokens_seen": 448266240 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008728133525918794, + "loss": 0.0835, + "theoretical_loss": 3.9898345897643024, + "tokens_seen": 448528384 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008727331086502969, + "loss": 0.0835, + "theoretical_loss": 3.989580627530943, + "tokens_seen": 448790528 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0008596550906077027, + "objective/train/docs_used": 169309, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.786888599395752, + "objective/train/original_loss": 1.786888599395752, + "objective/train/theoretical_loss": 3.9894537176078178, + "objective/train/tokens_used": 469381600, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00042329219286330044, + "objective/train/value_max": -0.0002359151840209961, + "objective/train/value_min": -0.681640625, + "objective/train/value_reward_corr": 0.5803828830417395, + "objective/train/value_std": 0.014617919921875, + "objective/train/weight_avg": 1.0010493993759155, + "objective/train/weighted_lm_loss": 1.7893576622009277, + "objective/train/weights_max": 1.8888424634933472, + "objective/train/weights_min": 0.3687688410282135, + "theoretical_loss": 3.9894537176078178, + "tokens_seen": 448921600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008726528647087145, + "loss": 0.0836, + "theoretical_loss": 3.9893268551049417, + "tokens_seen": 449052672 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008725726207671321, + "loss": 0.0848, + "theoretical_loss": 3.9890732722337594, + "tokens_seen": 449314816 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008724923768255497, + "loss": 0.0882, + "theoretical_loss": 3.988819878665341, + "tokens_seen": 449576960 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008724121328839672, + "loss": 0.085, + "theoretical_loss": 3.988566674148111, + "tokens_seen": 449839104 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008723318889423849, + "loss": 0.0854, + "theoretical_loss": 3.988313658430978, + "tokens_seen": 450101248 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008722516450008024, + "loss": 0.0872, + "theoretical_loss": 3.9880608312633274, + "tokens_seen": 450363392 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008721714010592199, + "loss": 0.0875, + "theoretical_loss": 3.9878081923950237, + "tokens_seen": 450625536 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008720911571176377, + "loss": 0.0854, + "theoretical_loss": 3.9875557415764087, + "tokens_seen": 450887680 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008720109131760552, + "loss": 0.0834, + "theoretical_loss": 3.9873034785582995, + "tokens_seen": 451149824 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008719306692344729, + "loss": 0.0829, + "theoretical_loss": 3.9870514030919884, + "tokens_seen": 451411968 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008718504252928904, + "loss": 0.0848, + "theoretical_loss": 3.986799514929242, + "tokens_seen": 451674112 + }, + { + "epoch": 0.14, + "learning_rate": 0.000871770181351308, + "loss": 0.0876, + "theoretical_loss": 3.9865478138222974, + "tokens_seen": 451936256 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0022668084129691124, + "objective/train/docs_used": 170598, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.797487735748291, + "objective/train/original_loss": 1.797487735748291, + "objective/train/theoretical_loss": 3.9862962995238647, + "objective/train/tokens_used": 472658400, + "objective/train/value_avg": -0.0083465576171875, + "objective/train/value_loss": 0.0004899487248621881, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.68115234375, + "objective/train/value_reward_corr": 0.532976155116499, + "objective/train/value_std": 0.0135040283203125, + "objective/train/weight_avg": 1.0024774074554443, + "objective/train/weighted_lm_loss": 1.8017926216125488, + "objective/train/weights_max": 1.5135427713394165, + "objective/train/weights_min": 0.3686288893222809, + "theoretical_loss": 3.9862962995238647, + "tokens_seen": 452198400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008716899374097256, + "loss": 0.086, + "theoretical_loss": 3.9862962995238647, + "tokens_seen": 452198400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008716096934681432, + "loss": 0.0884, + "theoretical_loss": 3.9860449717871234, + "tokens_seen": 452460544 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008715294495265607, + "loss": 0.0851, + "theoretical_loss": 3.9857938303657217, + "tokens_seen": 452722688 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008714492055849783, + "loss": 0.0885, + "theoretical_loss": 3.9855428750137754, + "tokens_seen": 452984832 + }, + { + "epoch": 0.14, + "learning_rate": 0.000871368961643396, + "loss": 0.0889, + "theoretical_loss": 3.9852921054858665, + "tokens_seen": 453246976 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008712887177018135, + "loss": 0.0843, + "theoretical_loss": 3.9850415215370445, + "tokens_seen": 453509120 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008712084737602312, + "loss": 0.0819, + "theoretical_loss": 3.984791122922821, + "tokens_seen": 453771264 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008711282298186487, + "loss": 0.0859, + "theoretical_loss": 3.98454090939917, + "tokens_seen": 454033408 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008710479858770663, + "loss": 0.0861, + "theoretical_loss": 3.984290880722531, + "tokens_seen": 454295552 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008709677419354839, + "loss": 0.0834, + "theoretical_loss": 3.9840410366498, + "tokens_seen": 454557696 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008708874979939014, + "loss": 0.0869, + "theoretical_loss": 3.983791376938336, + "tokens_seen": 454819840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008708072540523191, + "loss": 0.083, + "theoretical_loss": 3.983541901345955, + "tokens_seen": 455081984 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008707270101107366, + "loss": 0.0871, + "theoretical_loss": 3.983292609630931, + "tokens_seen": 455344128 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 6.911411037435755e-05, + "objective/train/docs_used": 171735, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7402760982513428, + "objective/train/original_loss": 1.7402762174606323, + "objective/train/theoretical_loss": 3.983168032652013, + "objective/train/tokens_used": 475935200, + "objective/train/value_avg": -0.01013946533203125, + "objective/train/value_loss": 0.0009821663843467832, + "objective/train/value_max": -0.00021147727966308594, + "objective/train/value_min": -0.916015625, + "objective/train/value_reward_corr": 0.7078605683518372, + "objective/train/value_std": 0.0244293212890625, + "objective/train/weight_avg": 1.000481367111206, + "objective/train/weighted_lm_loss": 1.7407102584838867, + "objective/train/weights_max": 2.12113881111145, + "objective/train/weights_min": 0.36906227469444275, + "theoretical_loss": 3.983168032652013, + "tokens_seen": 455475200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008706467661691543, + "loss": 0.0875, + "theoretical_loss": 3.9830435015519936, + "tokens_seen": 455606272 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008705665222275719, + "loss": 0.086, + "theoretical_loss": 3.982794576868328, + "tokens_seen": 455868416 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008704862782859895, + "loss": 0.0847, + "theoretical_loss": 3.982545835339573, + "tokens_seen": 456130560 + }, + { + "epoch": 0.14, + "learning_rate": 0.000870406034344407, + "loss": 0.0849, + "theoretical_loss": 3.982297276725822, + "tokens_seen": 456392704 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008703257904028246, + "loss": 0.0838, + "theoretical_loss": 3.9820489007876176, + "tokens_seen": 456654848 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008702455464612422, + "loss": 0.0813, + "theoretical_loss": 3.981800707285955, + "tokens_seen": 456916992 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008701653025196597, + "loss": 0.0853, + "theoretical_loss": 3.9815526959822787, + "tokens_seen": 457179136 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008700850585780774, + "loss": 0.0835, + "theoretical_loss": 3.981304866638481, + "tokens_seen": 457441280 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008700048146364949, + "loss": 0.0874, + "theoretical_loss": 3.9810572190169027, + "tokens_seen": 457703424 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008699245706949126, + "loss": 0.0864, + "theoretical_loss": 3.9808097528803295, + "tokens_seen": 457965568 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008698443267533302, + "loss": 0.0861, + "theoretical_loss": 3.9805624679919935, + "tokens_seen": 458227712 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008697640828117477, + "loss": 0.0878, + "theoretical_loss": 3.98031536411557, + "tokens_seen": 458489856 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0018690774450078607, + "objective/train/docs_used": 173009, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6653363704681396, + "objective/train/original_loss": 1.6653363704681396, + "objective/train/theoretical_loss": 3.9800684410151783, + "objective/train/tokens_used": 479212000, + "objective/train/value_avg": -0.007537841796875, + "objective/train/value_loss": 0.00013798951113130897, + "objective/train/value_max": -0.0002846717834472656, + "objective/train/value_min": -0.277099609375, + "objective/train/value_reward_corr": 0.48532692544123834, + "objective/train/value_std": 0.00695037841796875, + "objective/train/weight_avg": 1.0019335746765137, + "objective/train/weighted_lm_loss": 1.6689884662628174, + "objective/train/weights_max": 1.0873464345932007, + "objective/train/weights_min": 0.3710477352142334, + "theoretical_loss": 3.9800684410151783, + "tokens_seen": 458752000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008696838388701653, + "loss": 0.0846, + "theoretical_loss": 3.9800684410151783, + "tokens_seen": 458752000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008696035949285829, + "loss": 0.0826, + "theoretical_loss": 3.979821698455379, + "tokens_seen": 459014144 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008695233509870005, + "loss": 0.0861, + "theoretical_loss": 3.9795751362011735, + "tokens_seen": 459276288 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008694431070454181, + "loss": 0.0848, + "theoretical_loss": 3.979328754018004, + "tokens_seen": 459538432 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008693628631038357, + "loss": 0.085, + "theoretical_loss": 3.979082551671749, + "tokens_seen": 459800576 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008692826191622532, + "loss": 0.0876, + "theoretical_loss": 3.9788365289287286, + "tokens_seen": 460062720 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008692023752206708, + "loss": 0.0879, + "theoretical_loss": 3.9785906855556945, + "tokens_seen": 460324864 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008691221312790885, + "loss": 0.0846, + "theoretical_loss": 3.9783450213198384, + "tokens_seen": 460587008 + }, + { + "epoch": 0.14, + "learning_rate": 0.000869041887337506, + "loss": 0.0837, + "theoretical_loss": 3.9780995359887843, + "tokens_seen": 460849152 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008689616433959237, + "loss": 0.0868, + "theoretical_loss": 3.9778542293305894, + "tokens_seen": 461111296 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008688813994543412, + "loss": 0.0865, + "theoretical_loss": 3.977609101113744, + "tokens_seen": 461373440 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008688011555127588, + "loss": 0.0809, + "theoretical_loss": 3.97736415110717, + "tokens_seen": 461635584 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008687209115711764, + "loss": 0.0903, + "theoretical_loss": 3.977119379080218, + "tokens_seen": 461897728 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0003697322681546211, + "objective/train/docs_used": 174193, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8311926126480103, + "objective/train/original_loss": 1.8311924934387207, + "objective/train/theoretical_loss": 3.9769970597371405, + "objective/train/tokens_used": 482488800, + "objective/train/value_avg": -0.0074005126953125, + "objective/train/value_loss": 0.00031192455207929015, + "objective/train/value_max": -0.00023233890533447266, + "objective/train/value_min": -0.40869140625, + "objective/train/value_reward_corr": 0.5429328033135243, + "objective/train/value_std": 0.011566162109375, + "objective/train/weight_avg": 1.0005161762237549, + "objective/train/weighted_lm_loss": 1.8320584297180176, + "objective/train/weights_max": 1.3934298753738403, + "objective/train/weights_min": 0.6126090288162231, + "theoretical_loss": 3.9769970597371405, + "tokens_seen": 462028800 + }, + { + "epoch": 0.14, + "learning_rate": 0.000868640667629594, + "loss": 0.0886, + "theoretical_loss": 3.9768747848026695, + "tokens_seen": 462159872 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008685604236880115, + "loss": 0.0836, + "theoretical_loss": 3.9766303680447335, + "tokens_seen": 462422016 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008684801797464291, + "loss": 0.0834, + "theoretical_loss": 3.9763861285770457, + "tokens_seen": 462684160 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008683999358048468, + "loss": 0.0862, + "theoretical_loss": 3.9761420661706683, + "tokens_seen": 462946304 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008683196918632643, + "loss": 0.084, + "theoretical_loss": 3.975898180597089, + "tokens_seen": 463208448 + }, + { + "epoch": 0.14, + "learning_rate": 0.000868239447921682, + "loss": 0.0857, + "theoretical_loss": 3.9756544716282187, + "tokens_seen": 463470592 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008681592039800995, + "loss": 0.0837, + "theoretical_loss": 3.975410939036392, + "tokens_seen": 463732736 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008680789600385172, + "loss": 0.0852, + "theoretical_loss": 3.9751675825943638, + "tokens_seen": 463994880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008679987160969347, + "loss": 0.0895, + "theoretical_loss": 3.974924402075313, + "tokens_seen": 464257024 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008679184721553522, + "loss": 0.0863, + "theoretical_loss": 3.9746813972528354, + "tokens_seen": 464519168 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008678382282137699, + "loss": 0.0875, + "theoretical_loss": 3.9744385679009486, + "tokens_seen": 464781312 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008677579842721874, + "loss": 0.0833, + "theoretical_loss": 3.9741959137940848, + "tokens_seen": 465043456 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.002355672651901841, + "objective/train/docs_used": 175471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4537473917007446, + "objective/train/original_loss": 1.4537473917007446, + "objective/train/theoretical_loss": 3.973953434707096, + "objective/train/tokens_used": 485765600, + "objective/train/value_avg": -0.0084075927734375, + "objective/train/value_loss": 0.00017681960889603943, + "objective/train/value_max": -0.00033664703369140625, + "objective/train/value_min": -0.244384765625, + "objective/train/value_reward_corr": 0.6067470079442566, + "objective/train/value_std": 0.011962890625, + "objective/train/weight_avg": 1.0024397373199463, + "objective/train/weighted_lm_loss": 1.4569942951202393, + "objective/train/weights_max": 1.135114312171936, + "objective/train/weights_min": 0.36958563327789307, + "theoretical_loss": 3.973953434707096, + "tokens_seen": 465305600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008676777403306051, + "loss": 0.0827, + "theoretical_loss": 3.973953434707096, + "tokens_seen": 465305600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008675974963890227, + "loss": 0.084, + "theoretical_loss": 3.973711130415248, + "tokens_seen": 465567744 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008675172524474403, + "loss": 0.0855, + "theoretical_loss": 3.973469000694223, + "tokens_seen": 465829888 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008674370085058578, + "loss": 0.084, + "theoretical_loss": 3.973227045320117, + "tokens_seen": 466092032 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008673567645642754, + "loss": 0.0852, + "theoretical_loss": 3.9729852640694383, + "tokens_seen": 466354176 + }, + { + "epoch": 0.14, + "learning_rate": 0.000867276520622693, + "loss": 0.0855, + "theoretical_loss": 3.972743656719107, + "tokens_seen": 466616320 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008671962766811105, + "loss": 0.0819, + "theoretical_loss": 3.9725022230464537, + "tokens_seen": 466878464 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008671160327395282, + "loss": 0.0849, + "theoretical_loss": 3.9722609628292207, + "tokens_seen": 467140608 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008670357887979457, + "loss": 0.084, + "theoretical_loss": 3.9720198758455574, + "tokens_seen": 467402752 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008669555448563635, + "loss": 0.0854, + "theoretical_loss": 3.9717789618740227, + "tokens_seen": 467664896 + }, + { + "epoch": 0.14, + "learning_rate": 0.000866875300914781, + "loss": 0.0864, + "theoretical_loss": 3.9715382206935814, + "tokens_seen": 467927040 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008667950569731985, + "loss": 0.082, + "theoretical_loss": 3.9712976520836043, + "tokens_seen": 468189184 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008667148130316162, + "loss": 0.0839, + "theoretical_loss": 3.971057255823868, + "tokens_seen": 468451328 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0019131185254082084, + "objective/train/docs_used": 176758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8133913278579712, + "objective/train/original_loss": 1.8133914470672607, + "objective/train/theoretical_loss": 3.9709371222566308, + "objective/train/tokens_used": 489042400, + "objective/train/value_avg": -0.00600433349609375, + "objective/train/value_loss": 0.00013036759628448635, + "objective/train/value_max": -0.00028252601623535156, + "objective/train/value_min": -0.2418212890625, + "objective/train/value_reward_corr": 0.4864233011946609, + "objective/train/value_std": 0.006320953369140625, + "objective/train/weight_avg": 1.0019735097885132, + "objective/train/weighted_lm_loss": 1.817116618156433, + "objective/train/weights_max": 1.2735666036605835, + "objective/train/weights_min": 0.3718809485435486, + "theoretical_loss": 3.9709371222566308, + "tokens_seen": 468582400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008666345690900337, + "loss": 0.0859, + "theoretical_loss": 3.9708170316945526, + "tokens_seen": 468713472 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008665543251484513, + "loss": 0.0853, + "theoretical_loss": 3.9705769794762418, + "tokens_seen": 468975616 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008664740812068689, + "loss": 0.0866, + "theoretical_loss": 3.97033709894992, + "tokens_seen": 469237760 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008663938372652865, + "loss": 0.085, + "theoretical_loss": 3.970097389896975, + "tokens_seen": 469499904 + }, + { + "epoch": 0.14, + "learning_rate": 0.000866313593323704, + "loss": 0.0853, + "theoretical_loss": 3.9698578520991936, + "tokens_seen": 469762048 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008662333493821216, + "loss": 0.0873, + "theoretical_loss": 3.9696184853387617, + "tokens_seen": 470024192 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008661531054405393, + "loss": 0.0834, + "theoretical_loss": 3.9693792893982636, + "tokens_seen": 470286336 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008660728614989568, + "loss": 0.0861, + "theoretical_loss": 3.969140264060681, + "tokens_seen": 470548480 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008659926175573745, + "loss": 0.0844, + "theoretical_loss": 3.9689014091093933, + "tokens_seen": 470810624 + }, + { + "epoch": 0.14, + "learning_rate": 0.000865912373615792, + "loss": 0.0843, + "theoretical_loss": 3.9686627243281727, + "tokens_seen": 471072768 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008658321296742096, + "loss": 0.088, + "theoretical_loss": 3.9684242095011886, + "tokens_seen": 471334912 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008657518857326272, + "loss": 0.0841, + "theoretical_loss": 3.9681858644130017, + "tokens_seen": 471597056 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0015276180347427726, + "objective/train/docs_used": 177948, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7598378658294678, + "objective/train/original_loss": 1.7598377466201782, + "objective/train/theoretical_loss": 3.967947688848568, + "objective/train/tokens_used": 492319200, + "objective/train/value_avg": -0.01081085205078125, + "objective/train/value_loss": 0.00034501656773500144, + "objective/train/value_max": -0.00039505958557128906, + "objective/train/value_min": -0.76025390625, + "objective/train/value_reward_corr": 0.7302345418778485, + "objective/train/value_std": 0.0194549560546875, + "objective/train/weight_avg": 1.0016895532608032, + "objective/train/weighted_lm_loss": 1.7620900869369507, + "objective/train/weights_max": 1.419206142425537, + "objective/train/weights_min": 0.39239516854286194, + "theoretical_loss": 3.967947688848568, + "tokens_seen": 471859200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008656716417910447, + "loss": 0.0869, + "theoretical_loss": 3.967947688848568, + "tokens_seen": 471859200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008655913978494624, + "loss": 0.0807, + "theoretical_loss": 3.9677096825932328, + "tokens_seen": 472121344 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008655111539078799, + "loss": 0.0841, + "theoretical_loss": 3.9674718454327325, + "tokens_seen": 472383488 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008654309099662976, + "loss": 0.0864, + "theoretical_loss": 3.9672341771531956, + "tokens_seen": 472645632 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008653506660247152, + "loss": 0.0862, + "theoretical_loss": 3.9669966775411365, + "tokens_seen": 472907776 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008652704220831328, + "loss": 0.0841, + "theoretical_loss": 3.96675934638346, + "tokens_seen": 473169920 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008651901781415503, + "loss": 0.0864, + "theoretical_loss": 3.9665221834674558, + "tokens_seen": 473432064 + }, + { + "epoch": 0.14, + "learning_rate": 0.000865109934199968, + "loss": 0.0841, + "theoretical_loss": 3.9662851885808026, + "tokens_seen": 473694208 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008650296902583855, + "loss": 0.0848, + "theoretical_loss": 3.966048361511562, + "tokens_seen": 473956352 + }, + { + "epoch": 0.14, + "learning_rate": 0.000864949446316803, + "loss": 0.0829, + "theoretical_loss": 3.96581170204818, + "tokens_seen": 474218496 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008648692023752207, + "loss": 0.087, + "theoretical_loss": 3.9655752099794874, + "tokens_seen": 474480640 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008647889584336382, + "loss": 0.0888, + "theoretical_loss": 3.9653388850946976, + "tokens_seen": 474742784 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008647087144920559, + "loss": 0.084, + "theoretical_loss": 3.965102727183404, + "tokens_seen": 475004928 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": 0.0018348618177697062, + "objective/train/docs_used": 179087, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5616334676742554, + "objective/train/original_loss": 1.5616333484649658, + "objective/train/theoretical_loss": 3.9649847107771707, + "objective/train/tokens_used": 495596000, + "objective/train/value_avg": -0.0064697265625, + "objective/train/value_loss": 0.00020544622384477407, + "objective/train/value_max": -0.0003077983856201172, + "objective/train/value_min": -0.6181640625, + "objective/train/value_reward_corr": 0.5330140721340589, + "objective/train/value_std": 0.00867462158203125, + "objective/train/weight_avg": 1.0019270181655884, + "objective/train/weighted_lm_loss": 1.564939260482788, + "objective/train/weights_max": 1.3273746967315674, + "objective/train/weights_min": 0.36936861276626587, + "theoretical_loss": 3.9649847107771707, + "tokens_seen": 475136000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008646284705504735, + "loss": 0.0832, + "theoretical_loss": 3.9648667360355816, + "tokens_seen": 475267072 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008645482266088911, + "loss": 0.0869, + "theoretical_loss": 3.9646309114415863, + "tokens_seen": 475529216 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008644679826673087, + "loss": 0.0838, + "theoretical_loss": 3.9643952531921505, + "tokens_seen": 475791360 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008643877387257262, + "loss": 0.0817, + "theoretical_loss": 3.9641597610783874, + "tokens_seen": 476053504 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008643074947841438, + "loss": 0.0847, + "theoretical_loss": 3.9639244348917853, + "tokens_seen": 476315648 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008642272508425614, + "loss": 0.0826, + "theoretical_loss": 3.96368927442421, + "tokens_seen": 476577792 + }, + { + "epoch": 0.14, + "learning_rate": 0.000864147006900979, + "loss": 0.0855, + "theoretical_loss": 3.9634542794679013, + "tokens_seen": 476839936 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008640667629593965, + "loss": 0.0832, + "theoretical_loss": 3.963219449815475, + "tokens_seen": 477102080 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008639865190178143, + "loss": 0.0809, + "theoretical_loss": 3.9629847852599207, + "tokens_seen": 477364224 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008639062750762318, + "loss": 0.0871, + "theoretical_loss": 3.9627502855945984, + "tokens_seen": 477626368 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008638260311346493, + "loss": 0.0839, + "theoretical_loss": 3.962515950613242, + "tokens_seen": 477888512 + }, + { + "epoch": 0.14, + "learning_rate": 0.000863745787193067, + "loss": 0.0862, + "theoretical_loss": 3.962281780109957, + "tokens_seen": 478150656 + }, + { + "epoch": 0.14, + "objective/train/advantage_avg": -0.0006487751961685717, + "objective/train/docs_used": 180329, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7242850065231323, + "objective/train/original_loss": 1.7242848873138428, + "objective/train/theoretical_loss": 3.9620477738792164, + "objective/train/tokens_used": 498872800, + "objective/train/value_avg": -0.005603790283203125, + "objective/train/value_loss": 0.00016961862274911255, + "objective/train/value_max": -0.00028252601623535156, + "objective/train/value_min": -0.17919921875, + "objective/train/value_reward_corr": 0.5824427047069172, + "objective/train/value_std": 0.0064849853515625, + "objective/train/weight_avg": 0.999433696269989, + "objective/train/weighted_lm_loss": 1.724251389503479, + "objective/train/weights_max": 1.0798728466033936, + "objective/train/weights_min": 0.6110895276069641, + "theoretical_loss": 3.9620477738792164, + "tokens_seen": 478412800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0008636655432514845, + "loss": 0.0843, + "theoretical_loss": 3.9620477738792164, + "tokens_seen": 478412800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008635852993099021, + "loss": 0.0854, + "theoretical_loss": 3.9618139317158647, + "tokens_seen": 478674944 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008635050553683197, + "loss": 0.0847, + "theoretical_loss": 3.961580253415114, + "tokens_seen": 478937088 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008634248114267373, + "loss": 0.0845, + "theoretical_loss": 3.9613467387725434, + "tokens_seen": 479199232 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008633445674851548, + "loss": 0.0807, + "theoretical_loss": 3.9611133875841, + "tokens_seen": 479461376 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008632643235435724, + "loss": 0.0854, + "theoretical_loss": 3.960880199646096, + "tokens_seen": 479723520 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008631840796019901, + "loss": 0.0858, + "theoretical_loss": 3.9606471747552083, + "tokens_seen": 479985664 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008631038356604077, + "loss": 0.0826, + "theoretical_loss": 3.9604143127084774, + "tokens_seen": 480247808 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008630235917188253, + "loss": 0.0847, + "theoretical_loss": 3.960181613303309, + "tokens_seen": 480509952 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008629433477772428, + "loss": 0.0875, + "theoretical_loss": 3.9599490763374687, + "tokens_seen": 480772096 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008628631038356605, + "loss": 0.0857, + "theoretical_loss": 3.9597167016090866, + "tokens_seen": 481034240 + }, + { + "epoch": 0.15, + "learning_rate": 0.000862782859894078, + "loss": 0.0827, + "theoretical_loss": 3.95948448891665, + "tokens_seen": 481296384 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008627026159524955, + "loss": 0.0838, + "theoretical_loss": 3.959252438059009, + "tokens_seen": 481558528 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0011554267257452011, + "objective/train/docs_used": 181505, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.673024296760559, + "objective/train/original_loss": 1.6730244159698486, + "objective/train/theoretical_loss": 3.959136473255475, + "objective/train/tokens_used": 502149600, + "objective/train/value_avg": -0.00830078125, + "objective/train/value_loss": 0.0005257127340883017, + "objective/train/value_max": -0.0005254745483398438, + "objective/train/value_min": -0.478515625, + "objective/train/value_reward_corr": 0.5014164299225942, + "objective/train/value_std": 0.0123291015625, + "objective/train/weight_avg": 1.0013680458068848, + "objective/train/weighted_lm_loss": 1.674941062927246, + "objective/train/weights_max": 1.4412035942077637, + "objective/train/weights_min": 0.1574522852897644, + "theoretical_loss": 3.959136473255475, + "tokens_seen": 481689600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008626223720109132, + "loss": 0.0841, + "theoretical_loss": 3.9590205488353707, + "tokens_seen": 481820672 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008625421280693307, + "loss": 0.0867, + "theoretical_loss": 3.958788821045302, + "tokens_seen": 482082816 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008624618841277484, + "loss": 0.0848, + "theoretical_loss": 3.958557254488727, + "tokens_seen": 482344960 + }, + { + "epoch": 0.15, + "learning_rate": 0.000862381640186166, + "loss": 0.084, + "theoretical_loss": 3.958325848965925, + "tokens_seen": 482607104 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008623013962445836, + "loss": 0.0831, + "theoretical_loss": 3.958094604277532, + "tokens_seen": 482869248 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008622211523030011, + "loss": 0.0863, + "theoretical_loss": 3.9578635202245387, + "tokens_seen": 483131392 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008621409083614188, + "loss": 0.0807, + "theoretical_loss": 3.95763259660829, + "tokens_seen": 483393536 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008620606644198363, + "loss": 0.0829, + "theoretical_loss": 3.957401833230484, + "tokens_seen": 483655680 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008619804204782538, + "loss": 0.0833, + "theoretical_loss": 3.957171229893171, + "tokens_seen": 483917824 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008619001765366715, + "loss": 0.0847, + "theoretical_loss": 3.956940786398753, + "tokens_seen": 484179968 + }, + { + "epoch": 0.15, + "learning_rate": 0.000861819932595089, + "loss": 0.0856, + "theoretical_loss": 3.9567105025499827, + "tokens_seen": 484442112 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008617396886535068, + "loss": 0.0839, + "theoretical_loss": 3.9564803781499633, + "tokens_seen": 484704256 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0023670962546020746, + "objective/train/docs_used": 182723, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4888525009155273, + "objective/train/original_loss": 1.4888522624969482, + "objective/train/theoretical_loss": 3.956250413002146, + "objective/train/tokens_used": 505426400, + "objective/train/value_avg": -0.01146697998046875, + "objective/train/value_loss": 0.0007001186022534966, + "objective/train/value_max": -0.0003249645233154297, + "objective/train/value_min": -0.8916015625, + "objective/train/value_reward_corr": 0.688608831300723, + "objective/train/value_std": 0.02484130859375, + "objective/train/weight_avg": 1.0026851892471313, + "objective/train/weighted_lm_loss": 1.4927496910095215, + "objective/train/weights_max": 2.0180838108062744, + "objective/train/weights_min": 0.3854755163192749, + "theoretical_loss": 3.956250413002146, + "tokens_seen": 484966400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008616594447119243, + "loss": 0.0845, + "theoretical_loss": 3.956250413002146, + "tokens_seen": 484966400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008615792007703419, + "loss": 0.0851, + "theoretical_loss": 3.9560206069103314, + "tokens_seen": 485228544 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008614989568287595, + "loss": 0.086, + "theoretical_loss": 3.9557909596786676, + "tokens_seen": 485490688 + }, + { + "epoch": 0.15, + "learning_rate": 0.000861418712887177, + "loss": 0.0842, + "theoretical_loss": 3.9555614711116487, + "tokens_seen": 485752832 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008613384689455946, + "loss": 0.0841, + "theoretical_loss": 3.9553321410141162, + "tokens_seen": 486014976 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008612582250040122, + "loss": 0.0831, + "theoretical_loss": 3.9551029691912545, + "tokens_seen": 486277120 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008611779810624298, + "loss": 0.0843, + "theoretical_loss": 3.954873955448594, + "tokens_seen": 486539264 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008610977371208473, + "loss": 0.0846, + "theoretical_loss": 3.9546450995920086, + "tokens_seen": 486801408 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008610174931792651, + "loss": 0.0827, + "theoretical_loss": 3.954416401427715, + "tokens_seen": 487063552 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008609372492376826, + "loss": 0.0827, + "theoretical_loss": 3.9541878607622705, + "tokens_seen": 487325696 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008608570052961001, + "loss": 0.0805, + "theoretical_loss": 3.9539594774025755, + "tokens_seen": 487587840 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008607767613545178, + "loss": 0.084, + "theoretical_loss": 3.9537312511558698, + "tokens_seen": 487849984 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008606965174129353, + "loss": 0.0825, + "theoretical_loss": 3.953503181829732, + "tokens_seen": 488112128 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0009867006447166204, + "objective/train/docs_used": 183865, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7441798448562622, + "objective/train/original_loss": 1.7441800832748413, + "objective/train/theoretical_loss": 3.9533892059518383, + "objective/train/tokens_used": 508703200, + "objective/train/value_avg": -0.01270294189453125, + "objective/train/value_loss": 0.0009546683286316693, + "objective/train/value_max": -0.00030303001403808594, + "objective/train/value_min": -0.64697265625, + "objective/train/value_reward_corr": 0.6314438238999378, + "objective/train/value_std": 0.02288818359375, + "objective/train/weight_avg": 1.0013914108276367, + "objective/train/weighted_lm_loss": 1.7459317445755005, + "objective/train/weights_max": 1.6658852100372314, + "objective/train/weights_min": 0.3877171576023102, + "theoretical_loss": 3.9533892059518383, + "tokens_seen": 488243200 + }, + { + "epoch": 0.15, + "learning_rate": 0.000860616273471353, + "loss": 0.0843, + "theoretical_loss": 3.9532752692320816, + "tokens_seen": 488374272 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008605360295297705, + "loss": 0.0811, + "theoretical_loss": 3.9530475131711746, + "tokens_seen": 488636416 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008604557855881881, + "loss": 0.0826, + "theoretical_loss": 3.9528199134556044, + "tokens_seen": 488898560 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008603755416466057, + "loss": 0.0843, + "theoretical_loss": 3.9525924698943022, + "tokens_seen": 489160704 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008602952977050232, + "loss": 0.0833, + "theoretical_loss": 3.952365182296533, + "tokens_seen": 489422848 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008602150537634409, + "loss": 0.0848, + "theoretical_loss": 3.9521380504718975, + "tokens_seen": 489684992 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008601348098218585, + "loss": 0.0852, + "theoretical_loss": 3.9519110742303325, + "tokens_seen": 489947136 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008600545658802761, + "loss": 0.0831, + "theoretical_loss": 3.951684253382105, + "tokens_seen": 490209280 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008599743219386936, + "loss": 0.0832, + "theoretical_loss": 3.951457587737817, + "tokens_seen": 490471424 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008598940779971113, + "loss": 0.0834, + "theoretical_loss": 3.9512310771084014, + "tokens_seen": 490733568 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008598138340555288, + "loss": 0.0848, + "theoretical_loss": 3.951004721305123, + "tokens_seen": 490995712 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008597335901139463, + "loss": 0.0833, + "theoretical_loss": 3.950778520139576, + "tokens_seen": 491257856 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": -0.0002496841480024159, + "objective/train/docs_used": 185162, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6399861574172974, + "objective/train/original_loss": 1.639986276626587, + "objective/train/theoretical_loss": 3.9505524734236848, + "objective/train/tokens_used": 511980000, + "objective/train/value_avg": -0.00838470458984375, + "objective/train/value_loss": 0.00021605678193736821, + "objective/train/value_max": -0.0002148151397705078, + "objective/train/value_min": -0.469970703125, + "objective/train/value_reward_corr": 0.7257053344268602, + "objective/train/value_std": 0.01364898681640625, + "objective/train/weight_avg": 0.9998546242713928, + "objective/train/weighted_lm_loss": 1.6394292116165161, + "objective/train/weights_max": 1.1676921844482422, + "objective/train/weights_min": 0.5068990588188171, + "theoretical_loss": 3.9505524734236848, + "tokens_seen": 491520000 + }, + { + "epoch": 0.15, + "learning_rate": 0.000859653346172364, + "loss": 0.0843, + "theoretical_loss": 3.9505524734236848, + "tokens_seen": 491520000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008595731022307815, + "loss": 0.0814, + "theoretical_loss": 3.950326580969703, + "tokens_seen": 491782144 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008594928582891992, + "loss": 0.0857, + "theoretical_loss": 3.950100842590212, + "tokens_seen": 492044288 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008594126143476168, + "loss": 0.085, + "theoretical_loss": 3.949875258098121, + "tokens_seen": 492306432 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008593323704060344, + "loss": 0.0855, + "theoretical_loss": 3.949649827306665, + "tokens_seen": 492568576 + }, + { + "epoch": 0.15, + "learning_rate": 0.000859252126464452, + "loss": 0.0843, + "theoretical_loss": 3.9494245500294047, + "tokens_seen": 492830720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008591718825228696, + "loss": 0.084, + "theoretical_loss": 3.949199426080228, + "tokens_seen": 493092864 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008590916385812871, + "loss": 0.0851, + "theoretical_loss": 3.9489744552733455, + "tokens_seen": 493355008 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008590113946397047, + "loss": 0.0891, + "theoretical_loss": 3.9487496374232913, + "tokens_seen": 493617152 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008589311506981223, + "loss": 0.0832, + "theoretical_loss": 3.9485249723449236, + "tokens_seen": 493879296 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008588509067565398, + "loss": 0.0826, + "theoretical_loss": 3.9483004598534217, + "tokens_seen": 494141440 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008587706628149576, + "loss": 0.0859, + "theoretical_loss": 3.948076099764288, + "tokens_seen": 494403584 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008586904188733751, + "loss": 0.0891, + "theoretical_loss": 3.947851891893343, + "tokens_seen": 494665728 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0016816434217616916, + "objective/train/docs_used": 186433, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7250248193740845, + "objective/train/original_loss": 1.725024700164795, + "objective/train/theoretical_loss": 3.947739844982224, + "objective/train/tokens_used": 515256800, + "objective/train/value_avg": -0.00727081298828125, + "objective/train/value_loss": 0.00025146797997877, + "objective/train/value_max": -0.0002148151397705078, + "objective/train/value_min": -0.45849609375, + "objective/train/value_reward_corr": 0.5433043884355142, + "objective/train/value_std": 0.01275634765625, + "objective/train/weight_avg": 1.0017974376678467, + "objective/train/weighted_lm_loss": 1.7270184755325317, + "objective/train/weights_max": 1.2517598867416382, + "objective/train/weights_min": 0.3689996004104614, + "theoretical_loss": 3.947739844982224, + "tokens_seen": 494796800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008586101749317927, + "loss": 0.0807, + "theoretical_loss": 3.9476278360567303, + "tokens_seen": 494927872 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008585299309902103, + "loss": 0.0842, + "theoretical_loss": 3.9474039320709107, + "tokens_seen": 495190016 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008584496870486278, + "loss": 0.0869, + "theoretical_loss": 3.9471801797526633, + "tokens_seen": 495452160 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008583694431070454, + "loss": 0.0824, + "theoretical_loss": 3.946956578919088, + "tokens_seen": 495714304 + }, + { + "epoch": 0.15, + "learning_rate": 0.000858289199165463, + "loss": 0.0844, + "theoretical_loss": 3.9467331293875976, + "tokens_seen": 495976448 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008582089552238806, + "loss": 0.0856, + "theoretical_loss": 3.9465098309759252, + "tokens_seen": 496238592 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008581287112822982, + "loss": 0.0844, + "theoretical_loss": 3.9462866835021178, + "tokens_seen": 496500736 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008580484673407159, + "loss": 0.0841, + "theoretical_loss": 3.9460636867845365, + "tokens_seen": 496762880 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008579682233991334, + "loss": 0.0852, + "theoretical_loss": 3.9458408406418584, + "tokens_seen": 497025024 + }, + { + "epoch": 0.15, + "learning_rate": 0.000857887979457551, + "loss": 0.0824, + "theoretical_loss": 3.945618144893073, + "tokens_seen": 497287168 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008578077355159686, + "loss": 0.0869, + "theoretical_loss": 3.9453955993574845, + "tokens_seen": 497549312 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008577274915743861, + "loss": 0.0834, + "theoretical_loss": 3.945173203854707, + "tokens_seen": 497811456 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 6.547255179611966e-05, + "objective/train/docs_used": 187487, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.78741455078125, + "objective/train/original_loss": 1.78741455078125, + "objective/train/theoretical_loss": 3.9449509582046662, + "objective/train/tokens_used": 518533600, + "objective/train/value_avg": -0.00795745849609375, + "objective/train/value_loss": 0.0003167542163282633, + "objective/train/value_max": -0.00017404556274414062, + "objective/train/value_min": -0.7861328125, + "objective/train/value_reward_corr": 0.6039141734249502, + "objective/train/value_std": 0.0124359130859375, + "objective/train/weight_avg": 1.0002011060714722, + "objective/train/weighted_lm_loss": 1.7879151105880737, + "objective/train/weights_max": 1.272313117980957, + "objective/train/weights_min": 0.3683635890483856, + "theoretical_loss": 3.9449509582046662, + "tokens_seen": 498073600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008576472476328038, + "loss": 0.0857, + "theoretical_loss": 3.9449509582046662, + "tokens_seen": 498073600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008575670036912213, + "loss": 0.0836, + "theoretical_loss": 3.944728862227601, + "tokens_seen": 498335744 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008574867597496389, + "loss": 0.0814, + "theoretical_loss": 3.9445069157440575, + "tokens_seen": 498597888 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008574065158080565, + "loss": 0.0849, + "theoretical_loss": 3.944285118574893, + "tokens_seen": 498860032 + }, + { + "epoch": 0.15, + "learning_rate": 0.000857326271866474, + "loss": 0.0845, + "theoretical_loss": 3.9440634705412725, + "tokens_seen": 499122176 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008572460279248917, + "loss": 0.0855, + "theoretical_loss": 3.9438419714646695, + "tokens_seen": 499384320 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008571657839833093, + "loss": 0.0871, + "theoretical_loss": 3.9436206211668647, + "tokens_seen": 499646464 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008570855400417269, + "loss": 0.0809, + "theoretical_loss": 3.9433994194699453, + "tokens_seen": 499908608 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008570052961001444, + "loss": 0.0838, + "theoretical_loss": 3.943178366196304, + "tokens_seen": 500170752 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008569250521585621, + "loss": 0.0851, + "theoretical_loss": 3.942957461168639, + "tokens_seen": 500432896 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008568448082169796, + "loss": 0.0872, + "theoretical_loss": 3.9427367042099544, + "tokens_seen": 500695040 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008567645642753972, + "loss": 0.0811, + "theoretical_loss": 3.942516095143555, + "tokens_seen": 500957184 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008566843203338148, + "loss": 0.0849, + "theoretical_loss": 3.9422956337930524, + "tokens_seen": 501219328 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0007019721670076251, + "objective/train/docs_used": 188727, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5103391408920288, + "objective/train/original_loss": 1.5103389024734497, + "objective/train/theoretical_loss": 3.9421854584562226, + "objective/train/tokens_used": 521810400, + "objective/train/value_avg": -0.0062103271484375, + "objective/train/value_loss": 0.00019669736502692103, + "objective/train/value_max": -0.00018966197967529297, + "objective/train/value_min": -0.333251953125, + "objective/train/value_reward_corr": 0.5123174818364106, + "objective/train/value_std": 0.00872802734375, + "objective/train/weight_avg": 1.0007885694503784, + "objective/train/weighted_lm_loss": 1.5108174085617065, + "objective/train/weights_max": 1.1460858583450317, + "objective/train/weights_min": 0.3740321099758148, + "theoretical_loss": 3.9421854584562226, + "tokens_seen": 501350400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008566040763922323, + "loss": 0.0823, + "theoretical_loss": 3.942075319982358, + "tokens_seen": 501481472 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008565238324506501, + "loss": 0.083, + "theoretical_loss": 3.941855153535686, + "tokens_seen": 501743616 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008564435885090676, + "loss": 0.0841, + "theoretical_loss": 3.9416351342775524, + "tokens_seen": 502005760 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008563633445674852, + "loss": 0.0828, + "theoretical_loss": 3.9414152620327716, + "tokens_seen": 502267904 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008562831006259028, + "loss": 0.0801, + "theoretical_loss": 3.941195536626461, + "tokens_seen": 502530048 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008562028566843203, + "loss": 0.084, + "theoretical_loss": 3.940975957884034, + "tokens_seen": 502792192 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008561226127427379, + "loss": 0.0842, + "theoretical_loss": 3.9407565256312047, + "tokens_seen": 503054336 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008560423688011555, + "loss": 0.0861, + "theoretical_loss": 3.940537239693983, + "tokens_seen": 503316480 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008559621248595731, + "loss": 0.0849, + "theoretical_loss": 3.9403180998986778, + "tokens_seen": 503578624 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008558818809179906, + "loss": 0.081, + "theoretical_loss": 3.9400991060718935, + "tokens_seen": 503840768 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008558016369764084, + "loss": 0.087, + "theoretical_loss": 3.93988025804053, + "tokens_seen": 504102912 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008557213930348259, + "loss": 0.0848, + "theoretical_loss": 3.9396615556317838, + "tokens_seen": 504365056 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0015238908817991614, + "objective/train/docs_used": 189911, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5270758867263794, + "objective/train/original_loss": 1.527076005935669, + "objective/train/theoretical_loss": 3.939442998673144, + "objective/train/tokens_used": 525087200, + "objective/train/value_avg": -0.0095672607421875, + "objective/train/value_loss": 0.0004635912482626736, + "objective/train/value_max": -0.00027370452880859375, + "objective/train/value_min": -0.77197265625, + "objective/train/value_reward_corr": 0.581789120363388, + "objective/train/value_std": 0.0157928466796875, + "objective/train/weight_avg": 1.0017266273498535, + "objective/train/weighted_lm_loss": 1.5284429788589478, + "objective/train/weights_max": 1.6315034627914429, + "objective/train/weights_min": 0.3693629503250122, + "theoretical_loss": 3.939442998673144, + "tokens_seen": 504627200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008556411490932436, + "loss": 0.0826, + "theoretical_loss": 3.939442998673144, + "tokens_seen": 504627200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008555609051516611, + "loss": 0.0827, + "theoretical_loss": 3.9392245869923954, + "tokens_seen": 504889344 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008554806612100786, + "loss": 0.087, + "theoretical_loss": 3.939006320417614, + "tokens_seen": 505151488 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008554004172684963, + "loss": 0.0838, + "theoretical_loss": 3.9387881987771705, + "tokens_seen": 505413632 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008553201733269138, + "loss": 0.0852, + "theoretical_loss": 3.9385702218997247, + "tokens_seen": 505675776 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008552399293853314, + "loss": 0.0852, + "theoretical_loss": 3.9383523896142316, + "tokens_seen": 505937920 + }, + { + "epoch": 0.15, + "learning_rate": 0.000855159685443749, + "loss": 0.0851, + "theoretical_loss": 3.9381347017499326, + "tokens_seen": 506200064 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008550794415021667, + "loss": 0.0852, + "theoretical_loss": 3.9379171581363623, + "tokens_seen": 506462208 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008549991975605842, + "loss": 0.0849, + "theoretical_loss": 3.937699758603342, + "tokens_seen": 506724352 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008549189536190018, + "loss": 0.085, + "theoretical_loss": 3.937482502980985, + "tokens_seen": 506986496 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008548387096774194, + "loss": 0.0878, + "theoretical_loss": 3.937265391099688, + "tokens_seen": 507248640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008547584657358369, + "loss": 0.0814, + "theoretical_loss": 3.9370484227901397, + "tokens_seen": 507510784 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008546782217942546, + "loss": 0.0822, + "theoretical_loss": 3.9368315978833124, + "tokens_seen": 507772928 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0013619712553918362, + "objective/train/docs_used": 191028, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.592982530593872, + "objective/train/original_loss": 1.592982530593872, + "objective/train/theoretical_loss": 3.936723239153177, + "objective/train/tokens_used": 528364000, + "objective/train/value_avg": -0.00579071044921875, + "objective/train/value_loss": 0.00020233231771271676, + "objective/train/value_max": -0.00026535987854003906, + "objective/train/value_min": -0.587890625, + "objective/train/value_reward_corr": 0.7309359003301154, + "objective/train/value_std": 0.0113677978515625, + "objective/train/weight_avg": 1.00145423412323, + "objective/train/weighted_lm_loss": 1.5958586931228638, + "objective/train/weights_max": 1.2670164108276367, + "objective/train/weights_min": 0.38817885518074036, + "theoretical_loss": 3.936723239153177, + "tokens_seen": 507904000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008545979778526721, + "loss": 0.0828, + "theoretical_loss": 3.936614916210466, + "tokens_seen": 508035072 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008545177339110897, + "loss": 0.0816, + "theoretical_loss": 3.9363983776031457, + "tokens_seen": 508297216 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008544374899695073, + "loss": 0.085, + "theoretical_loss": 3.936181981893182, + "tokens_seen": 508559360 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008543572460279248, + "loss": 0.0871, + "theoretical_loss": 3.9359657289126875, + "tokens_seen": 508821504 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008542770020863426, + "loss": 0.0858, + "theoretical_loss": 3.935749618494061, + "tokens_seen": 509083648 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008541967581447601, + "loss": 0.0846, + "theoretical_loss": 3.935533650469983, + "tokens_seen": 509345792 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008541165142031777, + "loss": 0.0831, + "theoretical_loss": 3.935317824673417, + "tokens_seen": 509607936 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008540362702615953, + "loss": 0.0852, + "theoretical_loss": 3.935102140937608, + "tokens_seen": 509870080 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008539560263200129, + "loss": 0.0837, + "theoretical_loss": 3.934886599096081, + "tokens_seen": 510132224 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008538757823784304, + "loss": 0.0828, + "theoretical_loss": 3.9346711989826426, + "tokens_seen": 510394368 + }, + { + "epoch": 0.15, + "learning_rate": 0.000853795538436848, + "loss": 0.0842, + "theoretical_loss": 3.93445594043138, + "tokens_seen": 510656512 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008537152944952656, + "loss": 0.0831, + "theoretical_loss": 3.9342408232766584, + "tokens_seen": 510918656 + }, + { + "epoch": 0.15, + "objective/train/advantage_avg": 0.0022731111384928226, + "objective/train/docs_used": 192198, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7514947652816772, + "objective/train/original_loss": 1.7514947652816772, + "objective/train/theoretical_loss": 3.934025847353122, + "objective/train/tokens_used": 531640800, + "objective/train/value_avg": -0.006824493408203125, + "objective/train/value_loss": 0.00011324948718538508, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.3486328125, + "objective/train/value_reward_corr": 0.5780862010598011, + "objective/train/value_std": 0.007709503173828125, + "objective/train/weight_avg": 1.002328634262085, + "objective/train/weighted_lm_loss": 1.7554271221160889, + "objective/train/weights_max": 1.185212254524231, + "objective/train/weights_min": 0.7214187383651733, + "theoretical_loss": 3.934025847353122, + "tokens_seen": 511180800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008536350505536831, + "loss": 0.0848, + "theoretical_loss": 3.934025847353122, + "tokens_seen": 511180800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0008535548066121009, + "loss": 0.0827, + "theoretical_loss": 3.9338110124956924, + "tokens_seen": 511442944 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008534745626705184, + "loss": 0.0846, + "theoretical_loss": 3.9335963185395713, + "tokens_seen": 511705088 + }, + { + "epoch": 0.16, + "learning_rate": 0.000853394318728936, + "loss": 0.0818, + "theoretical_loss": 3.933381765320233, + "tokens_seen": 511967232 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008533140747873536, + "loss": 0.0821, + "theoretical_loss": 3.933167352673432, + "tokens_seen": 512229376 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008532338308457711, + "loss": 0.0843, + "theoretical_loss": 3.9329530804351958, + "tokens_seen": 512491520 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008531535869041887, + "loss": 0.0862, + "theoretical_loss": 3.9327389484418287, + "tokens_seen": 512753664 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008530733429626063, + "loss": 0.0808, + "theoretical_loss": 3.9325249565299076, + "tokens_seen": 513015808 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008529930990210239, + "loss": 0.085, + "theoretical_loss": 3.932311104536285, + "tokens_seen": 513277952 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008529128550794415, + "loss": 0.0829, + "theoretical_loss": 3.9320973922980844, + "tokens_seen": 513540096 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008528326111378592, + "loss": 0.0849, + "theoretical_loss": 3.931883819652705, + "tokens_seen": 513802240 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008527523671962767, + "loss": 0.0826, + "theoretical_loss": 3.9316703864378155, + "tokens_seen": 514064384 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008526721232546944, + "loss": 0.0825, + "theoretical_loss": 3.9314570924913568, + "tokens_seen": 514326528 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0023866831324994564, + "objective/train/docs_used": 193343, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5726442337036133, + "objective/train/original_loss": 1.5726442337036133, + "objective/train/theoretical_loss": 3.931350497693219, + "objective/train/tokens_used": 534917600, + "objective/train/value_avg": -0.00785064697265625, + "objective/train/value_loss": 0.00021415037917904556, + "objective/train/value_max": -0.0003528594970703125, + "objective/train/value_min": -0.262451171875, + "objective/train/value_reward_corr": 0.5768801997308433, + "objective/train/value_std": 0.01004791259765625, + "objective/train/weight_avg": 1.002478003501892, + "objective/train/weighted_lm_loss": 1.5768412351608276, + "objective/train/weights_max": 1.115512490272522, + "objective/train/weights_min": 0.36873859167099, + "theoretical_loss": 3.931350497693219, + "tokens_seen": 514457600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008525918793131119, + "loss": 0.0841, + "theoretical_loss": 3.9312439376515407, + "tokens_seen": 514588672 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008525116353715294, + "loss": 0.0836, + "theoretical_loss": 3.9310309217568493, + "tokens_seen": 514850816 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008524313914299471, + "loss": 0.0851, + "theoretical_loss": 3.9308180446460343, + "tokens_seen": 515112960 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008523511474883646, + "loss": 0.0845, + "theoretical_loss": 3.9306053061581165, + "tokens_seen": 515375104 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008522709035467822, + "loss": 0.0838, + "theoretical_loss": 3.930392706132385, + "tokens_seen": 515637248 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008521906596051998, + "loss": 0.085, + "theoretical_loss": 3.9301802444083966, + "tokens_seen": 515899392 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008521104156636175, + "loss": 0.0851, + "theoretical_loss": 3.929967920825977, + "tokens_seen": 516161536 + }, + { + "epoch": 0.16, + "learning_rate": 0.000852030171722035, + "loss": 0.0809, + "theoretical_loss": 3.929755735225216, + "tokens_seen": 516423680 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008519499277804526, + "loss": 0.0836, + "theoretical_loss": 3.9295436874464715, + "tokens_seen": 516685824 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008518696838388702, + "loss": 0.0839, + "theoretical_loss": 3.929331777330366, + "tokens_seen": 516947968 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008517894398972877, + "loss": 0.0802, + "theoretical_loss": 3.9291200047177886, + "tokens_seen": 517210112 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008517091959557054, + "loss": 0.0821, + "theoretical_loss": 3.9289083694498905, + "tokens_seen": 517472256 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.00018064206233248115, + "objective/train/docs_used": 194581, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6930550336837769, + "objective/train/original_loss": 1.6930551528930664, + "objective/train/theoretical_loss": 3.9286968713680883, + "objective/train/tokens_used": 538194400, + "objective/train/value_avg": -0.00974273681640625, + "objective/train/value_loss": 0.0005410881130956113, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.72412109375, + "objective/train/value_reward_corr": 0.7755958663238911, + "objective/train/value_std": 0.0196533203125, + "objective/train/weight_avg": 1.0004180669784546, + "objective/train/weighted_lm_loss": 1.694211721420288, + "objective/train/weights_max": 1.331702709197998, + "objective/train/weights_min": 0.3937867283821106, + "theoretical_loss": 3.9286968713680883, + "tokens_seen": 517734400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008516289520141229, + "loss": 0.0811, + "theoretical_loss": 3.9286968713680883, + "tokens_seen": 517734400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008515487080725406, + "loss": 0.0829, + "theoretical_loss": 3.9284855103140615, + "tokens_seen": 517996544 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008514684641309581, + "loss": 0.0827, + "theoretical_loss": 3.9282742861297524, + "tokens_seen": 518258688 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008513882201893756, + "loss": 0.0793, + "theoretical_loss": 3.928063198657365, + "tokens_seen": 518520832 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008513079762477934, + "loss": 0.0814, + "theoretical_loss": 3.9278522477393656, + "tokens_seen": 518782976 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008512277323062109, + "loss": 0.0829, + "theoretical_loss": 3.9276414332184815, + "tokens_seen": 519045120 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008511474883646285, + "loss": 0.0823, + "theoretical_loss": 3.927430754937699, + "tokens_seen": 519307264 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008510672444230461, + "loss": 0.08, + "theoretical_loss": 3.927220212740267, + "tokens_seen": 519569408 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008509870004814637, + "loss": 0.0799, + "theoretical_loss": 3.9270098064696906, + "tokens_seen": 519831552 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008509067565398812, + "loss": 0.0818, + "theoretical_loss": 3.9267995359697356, + "tokens_seen": 520093696 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008508265125982988, + "loss": 0.0823, + "theoretical_loss": 3.926589401084426, + "tokens_seen": 520355840 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008507462686567164, + "loss": 0.0807, + "theoretical_loss": 3.9263794016580427, + "tokens_seen": 520617984 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008506660247151339, + "loss": 0.0803, + "theoretical_loss": 3.9261695375351238, + "tokens_seen": 520880128 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0009029977954924107, + "objective/train/docs_used": 195804, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7540147304534912, + "objective/train/original_loss": 1.7540148496627808, + "objective/train/theoretical_loss": 3.926064656163952, + "objective/train/tokens_used": 541471200, + "objective/train/value_avg": -0.01312255859375, + "objective/train/value_loss": 0.0006540497415699065, + "objective/train/value_max": -0.00019562244415283203, + "objective/train/value_min": -0.7529296875, + "objective/train/value_reward_corr": 0.6588186474670367, + "objective/train/value_std": 0.021728515625, + "objective/train/weight_avg": 1.0011897087097168, + "objective/train/weighted_lm_loss": 1.7548198699951172, + "objective/train/weights_max": 1.322047233581543, + "objective/train/weights_min": 0.3694334030151367, + "theoretical_loss": 3.926064656163952, + "tokens_seen": 521011200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008505857807735517, + "loss": 0.0844, + "theoretical_loss": 3.9259598085604646, + "tokens_seen": 521142272 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008505055368319692, + "loss": 0.0813, + "theoretical_loss": 3.925750214579116, + "tokens_seen": 521404416 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008504252928903869, + "loss": 0.0848, + "theoretical_loss": 3.9255407554363835, + "tokens_seen": 521666560 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008503450489488044, + "loss": 0.0816, + "theoretical_loss": 3.92533143097783, + "tokens_seen": 521928704 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008502648050072219, + "loss": 0.083, + "theoretical_loss": 3.9251222410492694, + "tokens_seen": 522190848 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008501845610656396, + "loss": 0.0834, + "theoretical_loss": 3.924913185496772, + "tokens_seen": 522452992 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008501043171240571, + "loss": 0.0817, + "theoretical_loss": 3.924704264166659, + "tokens_seen": 522715136 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008500240731824747, + "loss": 0.082, + "theoretical_loss": 3.9244954769055074, + "tokens_seen": 522977280 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008499438292408923, + "loss": 0.0844, + "theoretical_loss": 3.924286823560144, + "tokens_seen": 523239424 + }, + { + "epoch": 0.16, + "learning_rate": 0.00084986358529931, + "loss": 0.0809, + "theoretical_loss": 3.9240783039776472, + "tokens_seen": 523501568 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008497833413577275, + "loss": 0.0828, + "theoretical_loss": 3.9238699180053485, + "tokens_seen": 523763712 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008497030974161451, + "loss": 0.0837, + "theoretical_loss": 3.923661665490828, + "tokens_seen": 524025856 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0017735909204930067, + "objective/train/docs_used": 196964, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5325592756271362, + "objective/train/original_loss": 1.5325592756271362, + "objective/train/theoretical_loss": 3.9234535462819156, + "objective/train/tokens_used": 544748000, + "objective/train/value_avg": -0.00875091552734375, + "objective/train/value_loss": 0.00024432019563391805, + "objective/train/value_max": -0.0002378225326538086, + "objective/train/value_min": -0.404052734375, + "objective/train/value_reward_corr": 0.6240298333146145, + "objective/train/value_std": 0.0148773193359375, + "objective/train/weight_avg": 1.0018869638442993, + "objective/train/weighted_lm_loss": 1.5350106954574585, + "objective/train/weights_max": 1.2263628244400024, + "objective/train/weights_min": 0.37659770250320435, + "theoretical_loss": 3.9234535462819156, + "tokens_seen": 524288000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008496228534745627, + "loss": 0.0818, + "theoretical_loss": 3.9234535462819156, + "tokens_seen": 524288000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008495426095329802, + "loss": 0.0825, + "theoretical_loss": 3.923245560226693, + "tokens_seen": 524550144 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008494623655913979, + "loss": 0.0794, + "theoretical_loss": 3.9230377071734885, + "tokens_seen": 524812288 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008493821216498154, + "loss": 0.0822, + "theoretical_loss": 3.9228299869708794, + "tokens_seen": 525074432 + }, + { + "epoch": 0.16, + "learning_rate": 0.000849301877708233, + "loss": 0.0828, + "theoretical_loss": 3.9226223994676923, + "tokens_seen": 525336576 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008492216337666506, + "loss": 0.0855, + "theoretical_loss": 3.9224149445129983, + "tokens_seen": 525598720 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008491413898250683, + "loss": 0.0816, + "theoretical_loss": 3.922207621956119, + "tokens_seen": 525860864 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008490611458834859, + "loss": 0.0808, + "theoretical_loss": 3.9220004316466186, + "tokens_seen": 526123008 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008489809019419034, + "loss": 0.0842, + "theoretical_loss": 3.9217933734343093, + "tokens_seen": 526385152 + }, + { + "epoch": 0.16, + "learning_rate": 0.000848900658000321, + "loss": 0.0824, + "theoretical_loss": 3.9215864471692488, + "tokens_seen": 526647296 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008488204140587386, + "loss": 0.0819, + "theoretical_loss": 3.921379652701738, + "tokens_seen": 526909440 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008487401701171562, + "loss": 0.0812, + "theoretical_loss": 3.9211729898823235, + "tokens_seen": 527171584 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008486599261755737, + "loss": 0.0821, + "theoretical_loss": 3.920966458561794, + "tokens_seen": 527433728 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.00034622373641468585, + "objective/train/docs_used": 198271, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5029301643371582, + "objective/train/original_loss": 1.5029301643371582, + "objective/train/theoretical_loss": 3.9208632421670497, + "objective/train/tokens_used": 548024800, + "objective/train/value_avg": -0.00766754150390625, + "objective/train/value_loss": 0.0003754664212465286, + "objective/train/value_max": -0.0003077983856201172, + "objective/train/value_min": -0.64697265625, + "objective/train/value_reward_corr": 0.5987369406329422, + "objective/train/value_std": 0.0127410888671875, + "objective/train/weight_avg": 1.0005102157592773, + "objective/train/weighted_lm_loss": 1.5032236576080322, + "objective/train/weights_max": 1.5309104919433594, + "objective/train/weights_min": 0.37003564834594727, + "theoretical_loss": 3.9208632421670497, + "tokens_seen": 527564800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008485796822339914, + "loss": 0.0817, + "theoretical_loss": 3.920760058591182, + "tokens_seen": 527695872 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008484994382924089, + "loss": 0.0827, + "theoretical_loss": 3.9205537898217644, + "tokens_seen": 527958016 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008484191943508264, + "loss": 0.0819, + "theoretical_loss": 3.920347652105058, + "tokens_seen": 528220160 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008483389504092442, + "loss": 0.0831, + "theoretical_loss": 3.920141645292821, + "tokens_seen": 528482304 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008482587064676617, + "loss": 0.0815, + "theoretical_loss": 3.919935769237055, + "tokens_seen": 528744448 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008481784625260793, + "loss": 0.0842, + "theoretical_loss": 3.91973002379, + "tokens_seen": 529006592 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008480982185844969, + "loss": 0.082, + "theoretical_loss": 3.919524408804137, + "tokens_seen": 529268736 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008480179746429145, + "loss": 0.0829, + "theoretical_loss": 3.9193189241321873, + "tokens_seen": 529530880 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008479377307013321, + "loss": 0.0823, + "theoretical_loss": 3.9191135696271098, + "tokens_seen": 529793024 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008478574867597496, + "loss": 0.0852, + "theoretical_loss": 3.9189083451421025, + "tokens_seen": 530055168 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008477772428181672, + "loss": 0.0784, + "theoretical_loss": 3.9187032505306023, + "tokens_seen": 530317312 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008476969988765848, + "loss": 0.0813, + "theoretical_loss": 3.918498285646282, + "tokens_seen": 530579456 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.000702679913956672, + "objective/train/docs_used": 199409, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6489094495773315, + "objective/train/original_loss": 1.648909330368042, + "objective/train/theoretical_loss": 3.9182934503430538, + "objective/train/tokens_used": 551301600, + "objective/train/value_avg": -0.00879669189453125, + "objective/train/value_loss": 0.0003273165493737906, + "objective/train/value_max": -0.00041413307189941406, + "objective/train/value_min": -0.771484375, + "objective/train/value_reward_corr": 0.6633614602841378, + "objective/train/value_std": 0.0150604248046875, + "objective/train/weight_avg": 1.0008490085601807, + "objective/train/weighted_lm_loss": 1.6499123573303223, + "objective/train/weights_max": 1.3013832569122314, + "objective/train/weights_min": 0.38973385095596313, + "theoretical_loss": 3.9182934503430538, + "tokens_seen": 530841600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008476167549350025, + "loss": 0.0819, + "theoretical_loss": 3.9182934503430538, + "tokens_seen": 530841600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00084753651099342, + "loss": 0.0795, + "theoretical_loss": 3.918088744475064, + "tokens_seen": 531103744 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008474562670518377, + "loss": 0.0844, + "theoretical_loss": 3.9178841678966956, + "tokens_seen": 531365888 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008473760231102552, + "loss": 0.0846, + "theoretical_loss": 3.9176797204625693, + "tokens_seen": 531628032 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008472957791686727, + "loss": 0.0833, + "theoretical_loss": 3.917475402027537, + "tokens_seen": 531890176 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008472155352270904, + "loss": 0.0799, + "theoretical_loss": 3.917271212446689, + "tokens_seen": 532152320 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008471352912855079, + "loss": 0.0822, + "theoretical_loss": 3.917067151575348, + "tokens_seen": 532414464 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008470550473439255, + "loss": 0.085, + "theoretical_loss": 3.916863219269069, + "tokens_seen": 532676608 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008469748034023431, + "loss": 0.0816, + "theoretical_loss": 3.9166594153836427, + "tokens_seen": 532938752 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008468945594607608, + "loss": 0.0823, + "theoretical_loss": 3.9164557397750897, + "tokens_seen": 533200896 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008468143155191783, + "loss": 0.0817, + "theoretical_loss": 3.916252192299665, + "tokens_seen": 533463040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008467340715775959, + "loss": 0.0818, + "theoretical_loss": 3.9160487728138538, + "tokens_seen": 533725184 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008466538276360135, + "loss": 0.0826, + "theoretical_loss": 3.9158454811743733, + "tokens_seen": 533987328 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0011991261271759868, + "objective/train/docs_used": 200516, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.67826509475708, + "objective/train/original_loss": 1.6782649755477905, + "objective/train/theoretical_loss": 3.9157438832522944, + "objective/train/tokens_used": 554578400, + "objective/train/value_avg": -0.007232666015625, + "objective/train/value_loss": 0.00027039533597417176, + "objective/train/value_max": -0.00033545494079589844, + "objective/train/value_min": -0.525390625, + "objective/train/value_reward_corr": 0.46649640516327573, + "objective/train/value_std": 0.00963592529296875, + "objective/train/weight_avg": 1.0013190507888794, + "objective/train/weighted_lm_loss": 1.6796201467514038, + "objective/train/weights_max": 1.2117398977279663, + "objective/train/weights_min": 0.3694460988044739, + "theoretical_loss": 3.9157438832522944, + "tokens_seen": 534118400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008465735836944311, + "loss": 0.0848, + "theoretical_loss": 3.915642317238171, + "tokens_seen": 534249472 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008464933397528487, + "loss": 0.084, + "theoretical_loss": 3.915439280862423, + "tokens_seen": 534511616 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008464130958112662, + "loss": 0.0814, + "theoretical_loss": 3.915236371904539, + "tokens_seen": 534773760 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008463328518696839, + "loss": 0.0826, + "theoretical_loss": 3.915033590222153, + "tokens_seen": 535035904 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008462526079281014, + "loss": 0.0826, + "theoretical_loss": 3.914830935673132, + "tokens_seen": 535298048 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008461723639865191, + "loss": 0.0795, + "theoretical_loss": 3.914628408115569, + "tokens_seen": 535560192 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008460921200449367, + "loss": 0.0816, + "theoretical_loss": 3.9144260074077843, + "tokens_seen": 535822336 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008460118761033542, + "loss": 0.0843, + "theoretical_loss": 3.9142237334083276, + "tokens_seen": 536084480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008459316321617718, + "loss": 0.0818, + "theoretical_loss": 3.914021585975973, + "tokens_seen": 536346624 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008458513882201894, + "loss": 0.0808, + "theoretical_loss": 3.9138195649697227, + "tokens_seen": 536608768 + }, + { + "epoch": 0.16, + "learning_rate": 0.000845771144278607, + "loss": 0.0827, + "theoretical_loss": 3.9136176702488044, + "tokens_seen": 536870912 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008456909003370245, + "loss": 0.0794, + "theoretical_loss": 3.91341590167267, + "tokens_seen": 537133056 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0008067527669481933, + "objective/train/docs_used": 201641, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5511680841445923, + "objective/train/original_loss": 1.5511682033538818, + "objective/train/theoretical_loss": 3.9132142591009975, + "objective/train/tokens_used": 557855200, + "objective/train/value_avg": -0.00760650634765625, + "objective/train/value_loss": 0.00028524218942038715, + "objective/train/value_max": -0.00030541419982910156, + "objective/train/value_min": -0.60986328125, + "objective/train/value_reward_corr": 0.643433561791619, + "objective/train/value_std": 0.0146026611328125, + "objective/train/weight_avg": 1.0009351968765259, + "objective/train/weighted_lm_loss": 1.551741361618042, + "objective/train/weights_max": 1.4019429683685303, + "objective/train/weights_min": 0.3742033541202545, + "theoretical_loss": 3.9132142591009975, + "tokens_seen": 537395200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008456106563954422, + "loss": 0.0807, + "theoretical_loss": 3.9132142591009975, + "tokens_seen": 537395200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008455304124538597, + "loss": 0.0813, + "theoretical_loss": 3.9130127423936907, + "tokens_seen": 537657344 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008454501685122772, + "loss": 0.0839, + "theoretical_loss": 3.9128113514108733, + "tokens_seen": 537919488 + }, + { + "epoch": 0.16, + "learning_rate": 0.000845369924570695, + "loss": 0.0821, + "theoretical_loss": 3.9126100860128963, + "tokens_seen": 538181632 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008452896806291125, + "loss": 0.0836, + "theoretical_loss": 3.9124089460603324, + "tokens_seen": 538443776 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008452094366875302, + "loss": 0.0828, + "theoretical_loss": 3.9122079314139766, + "tokens_seen": 538705920 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008451291927459477, + "loss": 0.0828, + "theoretical_loss": 3.9120070419348463, + "tokens_seen": 538968064 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008450489488043653, + "loss": 0.0799, + "theoretical_loss": 3.9118062774841804, + "tokens_seen": 539230208 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008449687048627829, + "loss": 0.084, + "theoretical_loss": 3.91160563792344, + "tokens_seen": 539492352 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008448884609212004, + "loss": 0.0829, + "theoretical_loss": 3.911405123114305, + "tokens_seen": 539754496 + }, + { + "epoch": 0.16, + "learning_rate": 0.000844808216979618, + "loss": 0.0847, + "theoretical_loss": 3.9112047329186783, + "tokens_seen": 540016640 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008447279730380356, + "loss": 0.0833, + "theoretical_loss": 3.911004467198679, + "tokens_seen": 540278784 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008446477290964533, + "loss": 0.0842, + "theoretical_loss": 3.9108043258166485, + "tokens_seen": 540540928 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": 0.0016820263117551804, + "objective/train/docs_used": 202818, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5808069705963135, + "objective/train/original_loss": 1.5808069705963135, + "objective/train/theoretical_loss": 3.910704301709413, + "objective/train/tokens_used": 561132000, + "objective/train/value_avg": -0.00955963134765625, + "objective/train/value_loss": 0.0003617853799369186, + "objective/train/value_max": -0.0003407001495361328, + "objective/train/value_min": -0.6962890625, + "objective/train/value_reward_corr": 0.5872441373349445, + "objective/train/value_std": 0.016387939453125, + "objective/train/weight_avg": 1.0018519163131714, + "objective/train/weighted_lm_loss": 1.5838106870651245, + "objective/train/weights_max": 1.8153696060180664, + "objective/train/weights_min": 0.3881314992904663, + "theoretical_loss": 3.910704301709413, + "tokens_seen": 540672000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008445674851548708, + "loss": 0.0816, + "theoretical_loss": 3.910604308635146, + "tokens_seen": 540803072 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008444872412132885, + "loss": 0.0799, + "theoretical_loss": 3.9104044155169495, + "tokens_seen": 541065216 + }, + { + "epoch": 0.16, + "learning_rate": 0.000844406997271706, + "loss": 0.0854, + "theoretical_loss": 3.910204646325055, + "tokens_seen": 541327360 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008443267533301235, + "loss": 0.0843, + "theoretical_loss": 3.9100050009226752, + "tokens_seen": 541589504 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008442465093885412, + "loss": 0.0833, + "theoretical_loss": 3.9098054791732406, + "tokens_seen": 541851648 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008441662654469587, + "loss": 0.0837, + "theoretical_loss": 3.909606080940399, + "tokens_seen": 542113792 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008440860215053764, + "loss": 0.0824, + "theoretical_loss": 3.909406806088013, + "tokens_seen": 542375936 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008440057775637939, + "loss": 0.0841, + "theoretical_loss": 3.909207654480162, + "tokens_seen": 542638080 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008439255336222116, + "loss": 0.082, + "theoretical_loss": 3.9090086259811403, + "tokens_seen": 542900224 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008438452896806292, + "loss": 0.0825, + "theoretical_loss": 3.908809720455457, + "tokens_seen": 543162368 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008437650457390467, + "loss": 0.0824, + "theoretical_loss": 3.908610937767836, + "tokens_seen": 543424512 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008436848017974643, + "loss": 0.083, + "theoretical_loss": 3.9084122777832144, + "tokens_seen": 543686656 + }, + { + "epoch": 0.16, + "objective/train/advantage_avg": -0.0008586941985413432, + "objective/train/docs_used": 204061, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6755656003952026, + "objective/train/original_loss": 1.675565481185913, + "objective/train/theoretical_loss": 3.908213740366744, + "objective/train/tokens_used": 564408800, + "objective/train/value_avg": -0.00843048095703125, + "objective/train/value_loss": 0.0002858492953237146, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.22607421875, + "objective/train/value_reward_corr": 0.6344704655141316, + "objective/train/value_std": 0.01132965087890625, + "objective/train/weight_avg": 0.9992753863334656, + "objective/train/weighted_lm_loss": 1.6743135452270508, + "objective/train/weights_max": 1.2291204929351807, + "objective/train/weights_min": 0.37150946259498596, + "theoretical_loss": 3.908213740366744, + "tokens_seen": 543948800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008436045578558819, + "loss": 0.0836, + "theoretical_loss": 3.908213740366744, + "tokens_seen": 543948800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0008435243139142995, + "loss": 0.083, + "theoretical_loss": 3.908015325383788, + "tokens_seen": 544210944 + }, + { + "epoch": 0.16, + "learning_rate": 0.000843444069972717, + "loss": 0.0823, + "theoretical_loss": 3.907817032699924, + "tokens_seen": 544473088 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008433638260311347, + "loss": 0.0813, + "theoretical_loss": 3.9076188621809416, + "tokens_seen": 544735232 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008432835820895522, + "loss": 0.0842, + "theoretical_loss": 3.9074208136928408, + "tokens_seen": 544997376 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008432033381479699, + "loss": 0.0833, + "theoretical_loss": 3.907222887101834, + "tokens_seen": 545259520 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008431230942063875, + "loss": 0.0858, + "theoretical_loss": 3.9070250822743446, + "tokens_seen": 545521664 + }, + { + "epoch": 0.17, + "learning_rate": 0.000843042850264805, + "loss": 0.0846, + "theoretical_loss": 3.906827399077006, + "tokens_seen": 545783808 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008429626063232226, + "loss": 0.083, + "theoretical_loss": 3.9066298373766615, + "tokens_seen": 546045952 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008428823623816402, + "loss": 0.0835, + "theoretical_loss": 3.9064323970403656, + "tokens_seen": 546308096 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008428021184400578, + "loss": 0.0835, + "theoretical_loss": 3.9062350779353787, + "tokens_seen": 546570240 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008427218744984754, + "loss": 0.083, + "theoretical_loss": 3.906037879929174, + "tokens_seen": 546832384 + }, + { + "epoch": 0.17, + "learning_rate": 0.000842641630556893, + "loss": 0.0816, + "theoretical_loss": 3.90584080288943, + "tokens_seen": 547094528 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.00023532530758529902, + "objective/train/docs_used": 204823, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5397506952285767, + "objective/train/original_loss": 1.5397508144378662, + "objective/train/theoretical_loss": 3.9057423096906874, + "objective/train/tokens_used": 567685600, + "objective/train/value_avg": -0.00724029541015625, + "objective/train/value_loss": 0.0003734648635145277, + "objective/train/value_max": -0.00015354156494140625, + "objective/train/value_min": -0.76318359375, + "objective/train/value_reward_corr": 0.8074329039979607, + "objective/train/value_std": 0.0235595703125, + "objective/train/weight_avg": 1.0004050731658936, + "objective/train/weighted_lm_loss": 1.539077639579773, + "objective/train/weights_max": 1.4787062406539917, + "objective/train/weights_min": 0.3683115839958191, + "theoretical_loss": 3.9057423096906874, + "tokens_seen": 547225600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008425613866153105, + "loss": 0.0832, + "theoretical_loss": 3.905643846684034, + "tokens_seen": 547356672 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008424811426737282, + "loss": 0.0801, + "theoretical_loss": 3.9054470111810815, + "tokens_seen": 547618816 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008424008987321458, + "loss": 0.0815, + "theoretical_loss": 3.9052502962488735, + "tokens_seen": 547880960 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008423206547905633, + "loss": 0.083, + "theoretical_loss": 3.9050537017559197, + "tokens_seen": 548143104 + }, + { + "epoch": 0.17, + "learning_rate": 0.000842240410848981, + "loss": 0.0831, + "theoretical_loss": 3.904857227570934, + "tokens_seen": 548405248 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008421601669073985, + "loss": 0.0789, + "theoretical_loss": 3.904660873562837, + "tokens_seen": 548667392 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008420799229658161, + "loss": 0.0817, + "theoretical_loss": 3.9044646396007545, + "tokens_seen": 548929536 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008419996790242337, + "loss": 0.0823, + "theoretical_loss": 3.9042685255540177, + "tokens_seen": 549191680 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008419194350826512, + "loss": 0.0831, + "theoretical_loss": 3.9040725312921616, + "tokens_seen": 549453824 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008418391911410688, + "loss": 0.0827, + "theoretical_loss": 3.9038766566849263, + "tokens_seen": 549715968 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008417589471994864, + "loss": 0.0825, + "theoretical_loss": 3.903680901602254, + "tokens_seen": 549978112 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008416787032579041, + "loss": 0.0814, + "theoretical_loss": 3.9034852659142913, + "tokens_seen": 550240256 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0012200219789519906, + "objective/train/docs_used": 206347, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7369581460952759, + "objective/train/original_loss": 1.7369580268859863, + "objective/train/theoretical_loss": 3.9032897494913876, + "objective/train/tokens_used": 570962400, + "objective/train/value_avg": -0.00876617431640625, + "objective/train/value_loss": 0.0002591983356978744, + "objective/train/value_max": -0.0002359151840209961, + "objective/train/value_min": -0.483154296875, + "objective/train/value_reward_corr": 0.6927815042926995, + "objective/train/value_std": 0.0133209228515625, + "objective/train/weight_avg": 1.0013376474380493, + "objective/train/weighted_lm_loss": 1.73981511592865, + "objective/train/weights_max": 1.3604191541671753, + "objective/train/weights_min": 0.38016000390052795, + "theoretical_loss": 3.9032897494913876, + "tokens_seen": 550502400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008415984593163216, + "loss": 0.0813, + "theoretical_loss": 3.9032897494913876, + "tokens_seen": 550502400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008415182153747393, + "loss": 0.0812, + "theoretical_loss": 3.9030943522040946, + "tokens_seen": 550764544 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008414379714331568, + "loss": 0.0848, + "theoretical_loss": 3.902899073923166, + "tokens_seen": 551026688 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008413577274915744, + "loss": 0.0828, + "theoretical_loss": 3.902703914519557, + "tokens_seen": 551288832 + }, + { + "epoch": 0.17, + "learning_rate": 0.000841277483549992, + "loss": 0.083, + "theoretical_loss": 3.9025088738644236, + "tokens_seen": 551550976 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008411972396084095, + "loss": 0.0801, + "theoretical_loss": 3.9023139518291243, + "tokens_seen": 551813120 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008411169956668272, + "loss": 0.0809, + "theoretical_loss": 3.902119148285216, + "tokens_seen": 552075264 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008410367517252447, + "loss": 0.0828, + "theoretical_loss": 3.9019244631044563, + "tokens_seen": 552337408 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008409565077836624, + "loss": 0.0801, + "theoretical_loss": 3.9017298961588027, + "tokens_seen": 552599552 + }, + { + "epoch": 0.17, + "learning_rate": 0.00084087626384208, + "loss": 0.0819, + "theoretical_loss": 3.901535447320412, + "tokens_seen": 552861696 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008407960199004975, + "loss": 0.0812, + "theoretical_loss": 3.901341116461639, + "tokens_seen": 553123840 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008407157759589151, + "loss": 0.0807, + "theoretical_loss": 3.9011469034550372, + "tokens_seen": 553385984 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008406355320173327, + "loss": 0.0819, + "theoretical_loss": 3.900952808173358, + "tokens_seen": 553648128 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.00015300983795896173, + "objective/train/docs_used": 207589, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8608996868133545, + "objective/train/original_loss": 1.8608994483947754, + "objective/train/theoretical_loss": 3.9008558046396535, + "objective/train/tokens_used": 574239200, + "objective/train/value_avg": -0.0084075927734375, + "objective/train/value_loss": 0.0002866761351469904, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.2509765625, + "objective/train/value_reward_corr": 0.6109745044269907, + "objective/train/value_std": 0.01207733154296875, + "objective/train/weight_avg": 0.999976634979248, + "objective/train/weighted_lm_loss": 1.8602499961853027, + "objective/train/weights_max": 1.1194891929626465, + "objective/train/weights_min": 0.36891940236091614, + "theoretical_loss": 3.9008558046396535, + "tokens_seen": 553779200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008405552880757503, + "loss": 0.0839, + "theoretical_loss": 3.900758830489551, + "tokens_seen": 553910272 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008404750441341678, + "loss": 0.083, + "theoretical_loss": 3.900564970276762, + "tokens_seen": 554172416 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008403948001925855, + "loss": 0.0812, + "theoretical_loss": 3.9003712274083346, + "tokens_seen": 554434560 + }, + { + "epoch": 0.17, + "learning_rate": 0.000840314556251003, + "loss": 0.0826, + "theoretical_loss": 3.9001776017578074, + "tokens_seen": 554696704 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008402343123094207, + "loss": 0.0817, + "theoretical_loss": 3.899984093198916, + "tokens_seen": 554958848 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008401540683678383, + "loss": 0.0802, + "theoretical_loss": 3.899790701605592, + "tokens_seen": 555220992 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008400738244262558, + "loss": 0.081, + "theoretical_loss": 3.899597426851961, + "tokens_seen": 555483136 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008399935804846735, + "loss": 0.0781, + "theoretical_loss": 3.899404268812343, + "tokens_seen": 555745280 + }, + { + "epoch": 0.17, + "learning_rate": 0.000839913336543091, + "loss": 0.0806, + "theoretical_loss": 3.8992112273612545, + "tokens_seen": 556007424 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008398330926015086, + "loss": 0.0824, + "theoretical_loss": 3.8990183023734044, + "tokens_seen": 556269568 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008397528486599262, + "loss": 0.0818, + "theoretical_loss": 3.8988254937236952, + "tokens_seen": 556531712 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008396726047183438, + "loss": 0.0819, + "theoretical_loss": 3.8986328012872233, + "tokens_seen": 556793856 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0018912331433966756, + "objective/train/docs_used": 208740, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6763869524002075, + "objective/train/original_loss": 1.6763869524002075, + "objective/train/theoretical_loss": 3.8984402249392778, + "objective/train/tokens_used": 577516000, + "objective/train/value_avg": -0.00763702392578125, + "objective/train/value_loss": 0.00018121585890185088, + "objective/train/value_max": -0.00034880638122558594, + "objective/train/value_min": -0.187255859375, + "objective/train/value_reward_corr": 0.5036202731375501, + "objective/train/value_std": 0.00873565673828125, + "objective/train/weight_avg": 1.0019742250442505, + "objective/train/weighted_lm_loss": 1.6807695627212524, + "objective/train/weights_max": 1.0873464345932007, + "objective/train/weights_min": 0.368623286485672, + "theoretical_loss": 3.8984402249392778, + "tokens_seen": 557056000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008395923607767613, + "loss": 0.0794, + "theoretical_loss": 3.8984402249392778, + "tokens_seen": 557056000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000839512116835179, + "loss": 0.0784, + "theoretical_loss": 3.8982477645553395, + "tokens_seen": 557318144 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008394318728935966, + "loss": 0.0799, + "theoretical_loss": 3.898055420011082, + "tokens_seen": 557580288 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008393516289520141, + "loss": 0.0782, + "theoretical_loss": 3.8978631911823705, + "tokens_seen": 557842432 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008392713850104318, + "loss": 0.0802, + "theoretical_loss": 3.8976710779452612, + "tokens_seen": 558104576 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008391911410688493, + "loss": 0.0815, + "theoretical_loss": 3.8974790801760015, + "tokens_seen": 558366720 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008391108971272669, + "loss": 0.0841, + "theoretical_loss": 3.897287197751029, + "tokens_seen": 558628864 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008390306531856845, + "loss": 0.0831, + "theoretical_loss": 3.897095430546971, + "tokens_seen": 558891008 + }, + { + "epoch": 0.17, + "learning_rate": 0.000838950409244102, + "loss": 0.0834, + "theoretical_loss": 3.896903778440646, + "tokens_seen": 559153152 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008388701653025197, + "loss": 0.0823, + "theoretical_loss": 3.896712241309061, + "tokens_seen": 559415296 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008387899213609372, + "loss": 0.0833, + "theoretical_loss": 3.896520819029411, + "tokens_seen": 559677440 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008387096774193549, + "loss": 0.082, + "theoretical_loss": 3.896329511479082, + "tokens_seen": 559939584 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008386294334777725, + "loss": 0.0792, + "theoretical_loss": 3.8961383185356455, + "tokens_seen": 560201728 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0007566718268208206, + "objective/train/docs_used": 209977, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.573256254196167, + "objective/train/original_loss": 1.573256015777588, + "objective/train/theoretical_loss": 3.8960427650033047, + "objective/train/tokens_used": 580792800, + "objective/train/value_avg": -0.00795745849609375, + "objective/train/value_loss": 0.00029120329418219626, + "objective/train/value_max": -0.00023055076599121094, + "objective/train/value_min": -0.6328125, + "objective/train/value_reward_corr": 0.6374020027425222, + "objective/train/value_std": 0.012054443359375, + "objective/train/weight_avg": 1.0008906126022339, + "objective/train/weighted_lm_loss": 1.5738105773925781, + "objective/train/weights_max": 1.2740330696105957, + "objective/train/weights_min": 0.3893653154373169, + "theoretical_loss": 3.8960427650033047, + "tokens_seen": 560332800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008385491895361901, + "loss": 0.0794, + "theoretical_loss": 3.895947240076862, + "tokens_seen": 560463872 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008384689455946076, + "loss": 0.0784, + "theoretical_loss": 3.895756275980681, + "tokens_seen": 560726016 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008383887016530252, + "loss": 0.0817, + "theoretical_loss": 3.895565426125237, + "tokens_seen": 560988160 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008383084577114428, + "loss": 0.0812, + "theoretical_loss": 3.8953746903888513, + "tokens_seen": 561250304 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008382282137698603, + "loss": 0.0787, + "theoretical_loss": 3.895184068650033, + "tokens_seen": 561512448 + }, + { + "epoch": 0.17, + "learning_rate": 0.000838147969828278, + "loss": 0.0817, + "theoretical_loss": 3.8949935607874764, + "tokens_seen": 561774592 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008380677258866955, + "loss": 0.0806, + "theoretical_loss": 3.8948031666800613, + "tokens_seen": 562036736 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008379874819451132, + "loss": 0.0838, + "theoretical_loss": 3.8946128862068528, + "tokens_seen": 562298880 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008379072380035308, + "loss": 0.0804, + "theoretical_loss": 3.8944227192471006, + "tokens_seen": 562561024 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008378269940619483, + "loss": 0.082, + "theoretical_loss": 3.8942326656802395, + "tokens_seen": 562823168 + }, + { + "epoch": 0.17, + "learning_rate": 0.000837746750120366, + "loss": 0.0829, + "theoretical_loss": 3.894042725385888, + "tokens_seen": 563085312 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008376665061787835, + "loss": 0.0837, + "theoretical_loss": 3.893852898243849, + "tokens_seen": 563347456 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0011895080097019672, + "objective/train/docs_used": 211161, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.739345908164978, + "objective/train/original_loss": 1.7393460273742676, + "objective/train/theoretical_loss": 3.8936631841341076, + "objective/train/tokens_used": 584069600, + "objective/train/value_avg": -0.009521484375, + "objective/train/value_loss": 0.000342073617503047, + "objective/train/value_max": -0.00018525123596191406, + "objective/train/value_min": -0.6494140625, + "objective/train/value_reward_corr": 0.6713985854388784, + "objective/train/value_std": 0.0169830322265625, + "objective/train/weight_avg": 1.0013439655303955, + "objective/train/weighted_lm_loss": 1.7415797710418701, + "objective/train/weights_max": 1.3866688013076782, + "objective/train/weights_min": 0.3763507008552551, + "theoretical_loss": 3.8936631841341076, + "tokens_seen": 563609600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008375862622372011, + "loss": 0.0801, + "theoretical_loss": 3.8936631841341076, + "tokens_seen": 563609600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008375060182956187, + "loss": 0.0814, + "theoretical_loss": 3.893473582936833, + "tokens_seen": 563871744 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008374257743540363, + "loss": 0.083, + "theoretical_loss": 3.8932840945323774, + "tokens_seen": 564133888 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008373455304124538, + "loss": 0.0809, + "theoretical_loss": 3.8930947188012737, + "tokens_seen": 564396032 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008372652864708715, + "loss": 0.0784, + "theoretical_loss": 3.8929054556242377, + "tokens_seen": 564658176 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008371850425292891, + "loss": 0.0789, + "theoretical_loss": 3.892716304882167, + "tokens_seen": 564920320 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008371047985877066, + "loss": 0.0815, + "theoretical_loss": 3.892527266456141, + "tokens_seen": 565182464 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008370245546461243, + "loss": 0.0829, + "theoretical_loss": 3.8923383402274174, + "tokens_seen": 565444608 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008369443107045418, + "loss": 0.0822, + "theoretical_loss": 3.8921495260774375, + "tokens_seen": 565706752 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008368640667629594, + "loss": 0.0811, + "theoretical_loss": 3.8919608238878216, + "tokens_seen": 565968896 + }, + { + "epoch": 0.17, + "learning_rate": 0.000836783822821377, + "loss": 0.0774, + "theoretical_loss": 3.891772233540369, + "tokens_seen": 566231040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008367035788797946, + "loss": 0.08, + "theoretical_loss": 3.8915837549170584, + "tokens_seen": 566493184 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008366233349382121, + "loss": 0.0794, + "theoretical_loss": 3.89139538790005, + "tokens_seen": 566755328 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0005871827597729862, + "objective/train/docs_used": 212374, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6017706394195557, + "objective/train/original_loss": 1.6017706394195557, + "objective/train/theoretical_loss": 3.8913012462071315, + "objective/train/tokens_used": 587346400, + "objective/train/value_avg": -0.00801849365234375, + "objective/train/value_loss": 0.00017772662977222353, + "objective/train/value_max": -0.00031757354736328125, + "objective/train/value_min": -0.368408203125, + "objective/train/value_reward_corr": 0.6835943969490854, + "objective/train/value_std": 0.0125732421875, + "objective/train/weight_avg": 1.0006738901138306, + "objective/train/weighted_lm_loss": 1.6030828952789307, + "objective/train/weights_max": 1.1462258100509644, + "objective/train/weights_min": 0.7185884118080139, + "theoretical_loss": 3.8913012462071315, + "tokens_seen": 566886400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008365430909966297, + "loss": 0.0827, + "theoretical_loss": 3.8912071323716795, + "tokens_seen": 567017472 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008364628470550474, + "loss": 0.0814, + "theoretical_loss": 3.8910189882144626, + "tokens_seen": 567279616 + }, + { + "epoch": 0.17, + "learning_rate": 0.000836382603113465, + "loss": 0.0837, + "theoretical_loss": 3.8908309553110936, + "tokens_seen": 567541760 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008363023591718826, + "loss": 0.0806, + "theoretical_loss": 3.8906430335444426, + "tokens_seen": 567803904 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008362221152303001, + "loss": 0.0813, + "theoretical_loss": 3.8904552227975593, + "tokens_seen": 568066048 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008361418712887178, + "loss": 0.082, + "theoretical_loss": 3.8902675229536685, + "tokens_seen": 568328192 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008360616273471353, + "loss": 0.0825, + "theoretical_loss": 3.8900799338961725, + "tokens_seen": 568590336 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008359813834055528, + "loss": 0.0832, + "theoretical_loss": 3.8898924555086496, + "tokens_seen": 568852480 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008359011394639705, + "loss": 0.0811, + "theoretical_loss": 3.8897050876748542, + "tokens_seen": 569114624 + }, + { + "epoch": 0.17, + "learning_rate": 0.000835820895522388, + "loss": 0.0825, + "theoretical_loss": 3.8895178302787166, + "tokens_seen": 569376768 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008357406515808057, + "loss": 0.0817, + "theoretical_loss": 3.8893306832043404, + "tokens_seen": 569638912 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008356604076392233, + "loss": 0.0835, + "theoretical_loss": 3.8891436463360076, + "tokens_seen": 569901056 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": -0.0002838138898368925, + "objective/train/docs_used": 213119, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5427865982055664, + "objective/train/original_loss": 1.5427868366241455, + "objective/train/theoretical_loss": 3.8889567195581716, + "objective/train/tokens_used": 590623200, + "objective/train/value_avg": -0.007099151611328125, + "objective/train/value_loss": 0.0001902274671010673, + "objective/train/value_max": -0.00019109249114990234, + "objective/train/value_min": -0.2220458984375, + "objective/train/value_reward_corr": 0.6440366057374965, + "objective/train/value_std": 0.00963592529296875, + "objective/train/weight_avg": 0.9998065829277039, + "objective/train/weighted_lm_loss": 1.543387532234192, + "objective/train/weights_max": 1.1371427774429321, + "objective/train/weights_min": 0.6790033578872681, + "theoretical_loss": 3.8889567195581716, + "tokens_seen": 570163200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008355801636976409, + "loss": 0.082, + "theoretical_loss": 3.8889567195581716, + "tokens_seen": 570163200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008354999197560584, + "loss": 0.0789, + "theoretical_loss": 3.8887699027554614, + "tokens_seen": 570425344 + }, + { + "epoch": 0.17, + "learning_rate": 0.000835419675814476, + "loss": 0.0805, + "theoretical_loss": 3.8885831958126786, + "tokens_seen": 570687488 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008353394318728936, + "loss": 0.0826, + "theoretical_loss": 3.8883965986148015, + "tokens_seen": 570949632 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008352591879313111, + "loss": 0.0792, + "theoretical_loss": 3.888210111046978, + "tokens_seen": 571211776 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008351789439897288, + "loss": 0.0824, + "theoretical_loss": 3.8880237329945295, + "tokens_seen": 571473920 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008350987000481463, + "loss": 0.0839, + "theoretical_loss": 3.887837464342952, + "tokens_seen": 571736064 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008350184561065641, + "loss": 0.0831, + "theoretical_loss": 3.8876513049779113, + "tokens_seen": 571998208 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008349382121649816, + "loss": 0.083, + "theoretical_loss": 3.887465254785246, + "tokens_seen": 572260352 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008348579682233991, + "loss": 0.0838, + "theoretical_loss": 3.887279313650967, + "tokens_seen": 572522496 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008347777242818168, + "loss": 0.0834, + "theoretical_loss": 3.8870934814612546, + "tokens_seen": 572784640 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008346974803402343, + "loss": 0.0786, + "theoretical_loss": 3.886907758102461, + "tokens_seen": 573046784 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008346172363986519, + "loss": 0.0831, + "theoretical_loss": 3.8867221434611094, + "tokens_seen": 573308928 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.00023172213695943356, + "objective/train/docs_used": 214228, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.640936255455017, + "objective/train/original_loss": 1.640936255455017, + "objective/train/theoretical_loss": 3.8866293768740587, + "objective/train/tokens_used": 593900000, + "objective/train/value_avg": -0.00962066650390625, + "objective/train/value_loss": 0.00022791478841099888, + "objective/train/value_max": -0.0002434253692626953, + "objective/train/value_min": -0.2286376953125, + "objective/train/value_reward_corr": 0.7870548857924071, + "objective/train/value_std": 0.0165863037109375, + "objective/train/weight_avg": 1.0003434419631958, + "objective/train/weighted_lm_loss": 1.641239881515503, + "objective/train/weights_max": 1.2273142337799072, + "objective/train/weights_min": 0.6072003245353699, + "theoretical_loss": 3.8866293768740587, + "tokens_seen": 573440000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008345369924570695, + "loss": 0.0818, + "theoretical_loss": 3.8865366374238914, + "tokens_seen": 573571072 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008344567485154871, + "loss": 0.0821, + "theoretical_loss": 3.88635123987767, + "tokens_seen": 573833216 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008343765045739046, + "loss": 0.0825, + "theoretical_loss": 3.8861659507094766, + "tokens_seen": 574095360 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008342962606323223, + "loss": 0.0816, + "theoretical_loss": 3.885980769806513, + "tokens_seen": 574357504 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008342160166907399, + "loss": 0.0846, + "theoretical_loss": 3.8857956970561487, + "tokens_seen": 574619648 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008341357727491574, + "loss": 0.0788, + "theoretical_loss": 3.8856107323459215, + "tokens_seen": 574881792 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008340555288075751, + "loss": 0.0822, + "theoretical_loss": 3.8854258755635387, + "tokens_seen": 575143936 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008339752848659926, + "loss": 0.0831, + "theoretical_loss": 3.885241126596874, + "tokens_seen": 575406080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008338950409244103, + "loss": 0.0803, + "theoretical_loss": 3.885056485333969, + "tokens_seen": 575668224 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008338147969828278, + "loss": 0.083, + "theoretical_loss": 3.884871951663034, + "tokens_seen": 575930368 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008337345530412454, + "loss": 0.078, + "theoretical_loss": 3.8846875254724442, + "tokens_seen": 576192512 + }, + { + "epoch": 0.17, + "learning_rate": 0.000833654309099663, + "loss": 0.08, + "theoretical_loss": 3.8845032066507414, + "tokens_seen": 576454656 + }, + { + "epoch": 0.17, + "objective/train/advantage_avg": 0.0014360037166625261, + "objective/train/docs_used": 215374, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6849347352981567, + "objective/train/original_loss": 1.6849347352981567, + "objective/train/theoretical_loss": 3.884318995086635, + "objective/train/tokens_used": 597176800, + "objective/train/value_avg": -0.00965118408203125, + "objective/train/value_loss": 0.00028802509768866, + "objective/train/value_max": -0.0003101825714111328, + "objective/train/value_min": -0.41748046875, + "objective/train/value_reward_corr": 0.6993208086391764, + "objective/train/value_std": 0.015655517578125, + "objective/train/weight_avg": 1.0015720129013062, + "objective/train/weighted_lm_loss": 1.6866198778152466, + "objective/train/weights_max": 1.1339787244796753, + "objective/train/weights_min": 0.41069987416267395, + "theoretical_loss": 3.884318995086635, + "tokens_seen": 576716800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008335740651580805, + "loss": 0.0794, + "theoretical_loss": 3.884318995086635, + "tokens_seen": 576716800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008334938212164982, + "loss": 0.0816, + "theoretical_loss": 3.8841348906689985, + "tokens_seen": 576978944 + }, + { + "epoch": 0.17, + "learning_rate": 0.0008334135772749158, + "loss": 0.0831, + "theoretical_loss": 3.8839508932868725, + "tokens_seen": 577241088 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008333333333333334, + "loss": 0.0806, + "theoretical_loss": 3.8837670028294626, + "tokens_seen": 577503232 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008332530893917509, + "loss": 0.0822, + "theoretical_loss": 3.883583219186138, + "tokens_seen": 577765376 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008331728454501686, + "loss": 0.0824, + "theoretical_loss": 3.8833995422464342, + "tokens_seen": 578027520 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008330926015085861, + "loss": 0.0821, + "theoretical_loss": 3.88321597190005, + "tokens_seen": 578289664 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008330123575670036, + "loss": 0.0824, + "theoretical_loss": 3.883032508036848, + "tokens_seen": 578551808 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008329321136254213, + "loss": 0.0797, + "theoretical_loss": 3.882849150546856, + "tokens_seen": 578813952 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008328518696838388, + "loss": 0.0833, + "theoretical_loss": 3.8826658993202625, + "tokens_seen": 579076096 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008327716257422565, + "loss": 0.0817, + "theoretical_loss": 3.8824827542474214, + "tokens_seen": 579338240 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008326913818006741, + "loss": 0.0822, + "theoretical_loss": 3.882299715218848, + "tokens_seen": 579600384 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008326111378590917, + "loss": 0.0817, + "theoretical_loss": 3.8821167821252196, + "tokens_seen": 579862528 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0010556569322943687, + "objective/train/docs_used": 216668, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7441470623016357, + "objective/train/original_loss": 1.7441470623016357, + "objective/train/theoretical_loss": 3.8820253552698913, + "objective/train/tokens_used": 600453600, + "objective/train/value_avg": -0.01125335693359375, + "objective/train/value_loss": 0.0008476045331917703, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.91845703125, + "objective/train/value_reward_corr": 0.5798144182694726, + "objective/train/value_std": 0.020416259765625, + "objective/train/weight_avg": 1.0014194250106812, + "objective/train/weighted_lm_loss": 1.7468537092208862, + "objective/train/weights_max": 1.7236318588256836, + "objective/train/weights_min": 0.3710477352142334, + "theoretical_loss": 3.8820253552698913, + "tokens_seen": 579993600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008325308939175093, + "loss": 0.083, + "theoretical_loss": 3.8819339548573772, + "tokens_seen": 580124672 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008324506499759268, + "loss": 0.0819, + "theoretical_loss": 3.881751233306322, + "tokens_seen": 580386816 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008323704060343444, + "loss": 0.0818, + "theoretical_loss": 3.881568617363218, + "tokens_seen": 580648960 + }, + { + "epoch": 0.18, + "learning_rate": 0.000832290162092762, + "loss": 0.0827, + "theoretical_loss": 3.881386106919389, + "tokens_seen": 580911104 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008322099181511796, + "loss": 0.083, + "theoretical_loss": 3.88120370186632, + "tokens_seen": 581173248 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008321296742095971, + "loss": 0.0806, + "theoretical_loss": 3.881021402095657, + "tokens_seen": 581435392 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008320494302680149, + "loss": 0.081, + "theoretical_loss": 3.880839207499205, + "tokens_seen": 581697536 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008319691863264324, + "loss": 0.0823, + "theoretical_loss": 3.880657117968931, + "tokens_seen": 581959680 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008318889423848499, + "loss": 0.0846, + "theoretical_loss": 3.880475133396959, + "tokens_seen": 582221824 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008318086984432676, + "loss": 0.0817, + "theoretical_loss": 3.8802932536755748, + "tokens_seen": 582483968 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008317284545016851, + "loss": 0.0835, + "theoretical_loss": 3.880111478697221, + "tokens_seen": 582746112 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008316482105601027, + "loss": 0.0851, + "theoretical_loss": 3.8799298083545004, + "tokens_seen": 583008256 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0006918111466802657, + "objective/train/docs_used": 217803, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7163397073745728, + "objective/train/original_loss": 1.7163399457931519, + "objective/train/theoretical_loss": 3.879748242540173, + "objective/train/tokens_used": 603730400, + "objective/train/value_avg": -0.0118408203125, + "objective/train/value_loss": 0.00036437122616916895, + "objective/train/value_max": -0.0002892017364501953, + "objective/train/value_min": -0.70654296875, + "objective/train/value_reward_corr": 0.6686834210191256, + "objective/train/value_std": 0.0202178955078125, + "objective/train/weight_avg": 1.0008691549301147, + "objective/train/weighted_lm_loss": 1.7178725004196167, + "objective/train/weights_max": 1.8649041652679443, + "objective/train/weights_min": 0.3742033541202545, + "theoretical_loss": 3.879748242540173, + "tokens_seen": 583270400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008315679666185203, + "loss": 0.0817, + "theoretical_loss": 3.879748242540173, + "tokens_seen": 583270400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008314877226769379, + "loss": 0.084, + "theoretical_loss": 3.8795667811471573, + "tokens_seen": 583532544 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008314074787353555, + "loss": 0.0824, + "theoretical_loss": 3.8793854240685306, + "tokens_seen": 583794688 + }, + { + "epoch": 0.18, + "learning_rate": 0.000831327234793773, + "loss": 0.0821, + "theoretical_loss": 3.879204171197525, + "tokens_seen": 584056832 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008312469908521907, + "loss": 0.0813, + "theoretical_loss": 3.879023022427533, + "tokens_seen": 584318976 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008311667469106083, + "loss": 0.0818, + "theoretical_loss": 3.878841977652101, + "tokens_seen": 584581120 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008310865029690259, + "loss": 0.0819, + "theoretical_loss": 3.8786610367649343, + "tokens_seen": 584843264 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008310062590274434, + "loss": 0.0829, + "theoretical_loss": 3.8784801996598928, + "tokens_seen": 585105408 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008309260150858611, + "loss": 0.0824, + "theoretical_loss": 3.878299466230992, + "tokens_seen": 585367552 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008308457711442786, + "loss": 0.0813, + "theoretical_loss": 3.8781188363724057, + "tokens_seen": 585629696 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008307655272026961, + "loss": 0.0813, + "theoretical_loss": 3.87793830997846, + "tokens_seen": 585891840 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008306852832611138, + "loss": 0.0807, + "theoretical_loss": 3.8777578869436384, + "tokens_seen": 586153984 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008306050393195313, + "loss": 0.0822, + "theoretical_loss": 3.8775775671625765, + "tokens_seen": 586416128 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0013700941344723105, + "objective/train/docs_used": 218927, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.789874792098999, + "objective/train/original_loss": 1.789874792098999, + "objective/train/theoretical_loss": 3.8774874459593223, + "objective/train/tokens_used": 607007200, + "objective/train/value_avg": -0.00925445556640625, + "objective/train/value_loss": 0.0004925947869196534, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.70556640625, + "objective/train/value_reward_corr": 0.5972247492646405, + "objective/train/value_std": 0.0181732177734375, + "objective/train/weight_avg": 1.0015876293182373, + "objective/train/weighted_lm_loss": 1.7908934354782104, + "objective/train/weights_max": 1.9563560485839844, + "objective/train/weights_min": 0.3702559173107147, + "theoretical_loss": 3.8774874459593223, + "tokens_seen": 586547200 + }, + { + "epoch": 0.18, + "learning_rate": 0.000830524795377949, + "loss": 0.0824, + "theoretical_loss": 3.8773973505300674, + "tokens_seen": 586678272 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008304445514363666, + "loss": 0.0803, + "theoretical_loss": 3.877217236941055, + "tokens_seen": 586940416 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008303643074947842, + "loss": 0.0836, + "theoretical_loss": 3.877037226290641, + "tokens_seen": 587202560 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008302840635532017, + "loss": 0.0801, + "theoretical_loss": 3.8768573184740767, + "tokens_seen": 587464704 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008302038196116194, + "loss": 0.0806, + "theoretical_loss": 3.87667751338677, + "tokens_seen": 587726848 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008301235756700369, + "loss": 0.0803, + "theoretical_loss": 3.8764978109242794, + "tokens_seen": 587988992 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008300433317284545, + "loss": 0.0822, + "theoretical_loss": 3.8763182109823173, + "tokens_seen": 588251136 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008299630877868721, + "loss": 0.0763, + "theoretical_loss": 3.8761387134567475, + "tokens_seen": 588513280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008298828438452896, + "loss": 0.0813, + "theoretical_loss": 3.8759593182435874, + "tokens_seen": 588775424 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008298025999037074, + "loss": 0.0849, + "theoretical_loss": 3.875780025239005, + "tokens_seen": 589037568 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008297223559621249, + "loss": 0.0797, + "theoretical_loss": 3.8756008343393202, + "tokens_seen": 589299712 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008296421120205425, + "loss": 0.0835, + "theoretical_loss": 3.8754217454410043, + "tokens_seen": 589561856 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -6.183000368764624e-05, + "objective/train/docs_used": 220165, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6896483898162842, + "objective/train/original_loss": 1.6896482706069946, + "objective/train/theoretical_loss": 3.875242758440679, + "objective/train/tokens_used": 610284000, + "objective/train/value_avg": -0.0089569091796875, + "objective/train/value_loss": 0.00046085796202532947, + "objective/train/value_max": -0.0002415180206298828, + "objective/train/value_min": -0.673828125, + "objective/train/value_reward_corr": 0.5928429396910613, + "objective/train/value_std": 0.015838623046875, + "objective/train/weight_avg": 1.0001388788223267, + "objective/train/weighted_lm_loss": 1.6889548301696777, + "objective/train/weights_max": 1.2902297973632812, + "objective/train/weights_min": 0.36886030435562134, + "theoretical_loss": 3.875242758440679, + "tokens_seen": 589824000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008295618680789601, + "loss": 0.0821, + "theoretical_loss": 3.875242758440679, + "tokens_seen": 589824000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008294816241373776, + "loss": 0.0809, + "theoretical_loss": 3.875063873235117, + "tokens_seen": 590086144 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008294013801957952, + "loss": 0.0854, + "theoretical_loss": 3.874885089721242, + "tokens_seen": 590348288 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008293211362542128, + "loss": 0.0782, + "theoretical_loss": 3.8747064077961264, + "tokens_seen": 590610432 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008292408923126304, + "loss": 0.079, + "theoretical_loss": 3.874527827356994, + "tokens_seen": 590872576 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008291606483710479, + "loss": 0.0832, + "theoretical_loss": 3.8743493483012172, + "tokens_seen": 591134720 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008290804044294657, + "loss": 0.0806, + "theoretical_loss": 3.874170970526317, + "tokens_seen": 591396864 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008290001604878832, + "loss": 0.0826, + "theoretical_loss": 3.873992693929965, + "tokens_seen": 591659008 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008289199165463007, + "loss": 0.0782, + "theoretical_loss": 3.8738145184099797, + "tokens_seen": 591921152 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008288396726047184, + "loss": 0.0798, + "theoretical_loss": 3.8736364438643296, + "tokens_seen": 592183296 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008287594286631359, + "loss": 0.0833, + "theoretical_loss": 3.87345847019113, + "tokens_seen": 592445440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008286791847215536, + "loss": 0.079, + "theoretical_loss": 3.8732805972886446, + "tokens_seen": 592707584 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008285989407799711, + "loss": 0.0794, + "theoretical_loss": 3.873102825055285, + "tokens_seen": 592969728 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0022287839092314243, + "objective/train/docs_used": 221319, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6529507637023926, + "objective/train/original_loss": 1.6529508829116821, + "objective/train/theoretical_loss": 3.873013976657821, + "objective/train/tokens_used": 613560800, + "objective/train/value_avg": -0.01081085205078125, + "objective/train/value_loss": 0.0005324392695911229, + "objective/train/value_max": -0.00033926963806152344, + "objective/train/value_min": -0.43798828125, + "objective/train/value_reward_corr": 0.6031346357258855, + "objective/train/value_std": 0.0146331787109375, + "objective/train/weight_avg": 1.0024420022964478, + "objective/train/weighted_lm_loss": 1.6559507846832275, + "objective/train/weights_max": 1.158463716506958, + "objective/train/weights_min": 0.2581728398799896, + "theoretical_loss": 3.873013976657821, + "tokens_seen": 593100800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008285186968383887, + "loss": 0.0797, + "theoretical_loss": 3.87292515338961, + "tokens_seen": 593231872 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008284384528968063, + "loss": 0.0804, + "theoretical_loss": 3.872747582190324, + "tokens_seen": 593494016 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008283582089552239, + "loss": 0.0818, + "theoretical_loss": 3.8725701113562794, + "tokens_seen": 593756160 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008282779650136415, + "loss": 0.0793, + "theoretical_loss": 3.8723927407864758, + "tokens_seen": 594018304 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008281977210720591, + "loss": 0.0818, + "theoretical_loss": 3.8722154703800573, + "tokens_seen": 594280448 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008281174771304767, + "loss": 0.0825, + "theoretical_loss": 3.8720383000363148, + "tokens_seen": 594542592 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008280372331888942, + "loss": 0.081, + "theoretical_loss": 3.871861229654684, + "tokens_seen": 594804736 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008279569892473119, + "loss": 0.0779, + "theoretical_loss": 3.8716842591347476, + "tokens_seen": 595066880 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008278767453057294, + "loss": 0.0822, + "theoretical_loss": 3.871507388376231, + "tokens_seen": 595329024 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008277965013641469, + "loss": 0.0796, + "theoretical_loss": 3.871330617279006, + "tokens_seen": 595591168 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008277162574225646, + "loss": 0.0815, + "theoretical_loss": 3.8711539457430897, + "tokens_seen": 595853312 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008276360134809821, + "loss": 0.0797, + "theoretical_loss": 3.87097737366864, + "tokens_seen": 596115456 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.00167937483638525, + "objective/train/docs_used": 222574, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.542468547821045, + "objective/train/original_loss": 1.5424683094024658, + "objective/train/theoretical_loss": 3.870800900955963, + "objective/train/tokens_used": 616837600, + "objective/train/value_avg": -0.0085296630859375, + "objective/train/value_loss": 0.0005001907702535391, + "objective/train/value_max": -0.00029587745666503906, + "objective/train/value_min": -0.86962890625, + "objective/train/value_reward_corr": 0.6588531620632475, + "objective/train/value_std": 0.0174713134765625, + "objective/train/weight_avg": 1.0018889904022217, + "objective/train/weighted_lm_loss": 1.5452805757522583, + "objective/train/weights_max": 1.3651901483535767, + "objective/train/weights_min": 0.37030255794525146, + "theoretical_loss": 3.870800900955963, + "tokens_seen": 596377600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008275557695393999, + "loss": 0.0794, + "theoretical_loss": 3.870800900955963, + "tokens_seen": 596377600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008274755255978174, + "loss": 0.0816, + "theoretical_loss": 3.8706245275055062, + "tokens_seen": 596639744 + }, + { + "epoch": 0.18, + "learning_rate": 0.000827395281656235, + "loss": 0.08, + "theoretical_loss": 3.8704482532178606, + "tokens_seen": 596901888 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008273150377146526, + "loss": 0.0839, + "theoretical_loss": 3.8702720779937607, + "tokens_seen": 597164032 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008272347937730702, + "loss": 0.0793, + "theoretical_loss": 3.8700960017340833, + "tokens_seen": 597426176 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008271545498314877, + "loss": 0.08, + "theoretical_loss": 3.8699200243398493, + "tokens_seen": 597688320 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008270743058899053, + "loss": 0.0848, + "theoretical_loss": 3.8697441457122204, + "tokens_seen": 597950464 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008269940619483229, + "loss": 0.0823, + "theoretical_loss": 3.8695683657525013, + "tokens_seen": 598212608 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008269138180067404, + "loss": 0.0826, + "theoretical_loss": 3.8693926843621376, + "tokens_seen": 598474752 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008268335740651582, + "loss": 0.0805, + "theoretical_loss": 3.8692171014427177, + "tokens_seen": 598736896 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008267533301235757, + "loss": 0.0796, + "theoretical_loss": 3.86904161689597, + "tokens_seen": 598999040 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008266730861819933, + "loss": 0.0832, + "theoretical_loss": 3.868866230623766, + "tokens_seen": 599261184 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008265928422404109, + "loss": 0.0809, + "theoretical_loss": 3.8686909425281146, + "tokens_seen": 599523328 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0010342653840780258, + "objective/train/docs_used": 223680, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6975395679473877, + "objective/train/original_loss": 1.6975395679473877, + "objective/train/theoretical_loss": 3.868603335265913, + "objective/train/tokens_used": 620114400, + "objective/train/value_avg": -0.010101318359375, + "objective/train/value_loss": 0.00020030724408570677, + "objective/train/value_max": -0.00017404556274414062, + "objective/train/value_min": -0.347900390625, + "objective/train/value_reward_corr": 0.8020624486643968, + "objective/train/value_std": 0.0178985595703125, + "objective/train/weight_avg": 1.00112783908844, + "objective/train/weighted_lm_loss": 1.6995559930801392, + "objective/train/weights_max": 1.3582996129989624, + "objective/train/weights_min": 0.39774826169013977, + "theoretical_loss": 3.868603335265913, + "tokens_seen": 599654400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008265125982988284, + "loss": 0.0814, + "theoretical_loss": 3.8685157525111684, + "tokens_seen": 599785472 + }, + { + "epoch": 0.18, + "learning_rate": 0.000826432354357246, + "loss": 0.0816, + "theoretical_loss": 3.8683406604752184, + "tokens_seen": 600047616 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008263521104156636, + "loss": 0.0829, + "theoretical_loss": 3.868165666322696, + "tokens_seen": 600309760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008262718664740812, + "loss": 0.0828, + "theoretical_loss": 3.8679907699561733, + "tokens_seen": 600571904 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008261916225324988, + "loss": 0.0822, + "theoretical_loss": 3.86781597127836, + "tokens_seen": 600834048 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008261113785909165, + "loss": 0.0826, + "theoretical_loss": 3.867641270192107, + "tokens_seen": 601096192 + }, + { + "epoch": 0.18, + "learning_rate": 0.000826031134649334, + "loss": 0.0834, + "theoretical_loss": 3.867466666600402, + "tokens_seen": 601358336 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008259508907077516, + "loss": 0.0819, + "theoretical_loss": 3.867292160406373, + "tokens_seen": 601620480 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008258706467661692, + "loss": 0.0798, + "theoretical_loss": 3.8671177515132857, + "tokens_seen": 601882624 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008257904028245867, + "loss": 0.0824, + "theoretical_loss": 3.866943439824545, + "tokens_seen": 602144768 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008257101588830044, + "loss": 0.0801, + "theoretical_loss": 3.8667692252436914, + "tokens_seen": 602406912 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008256299149414219, + "loss": 0.0822, + "theoretical_loss": 3.8665951076744056, + "tokens_seen": 602669056 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": -8.796821930445731e-05, + "objective/train/docs_used": 224736, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5687521696090698, + "objective/train/original_loss": 1.5687522888183594, + "objective/train/theoretical_loss": 3.866421087020504, + "objective/train/tokens_used": 623391200, + "objective/train/value_avg": -0.00591278076171875, + "objective/train/value_loss": 0.00038589219911955297, + "objective/train/value_max": -0.00026535987854003906, + "objective/train/value_min": -0.270263671875, + "objective/train/value_reward_corr": 0.5731155725865233, + "objective/train/value_std": 0.00867462158203125, + "objective/train/weight_avg": 1.0000841617584229, + "objective/train/weighted_lm_loss": 1.569719672203064, + "objective/train/weights_max": 1.1083186864852905, + "objective/train/weights_min": 0.36869922280311584, + "theoretical_loss": 3.866421087020504, + "tokens_seen": 602931200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008255496709998395, + "loss": 0.0794, + "theoretical_loss": 3.866421087020504, + "tokens_seen": 602931200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008254694270582571, + "loss": 0.0768, + "theoretical_loss": 3.8662471631859407, + "tokens_seen": 603193344 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008253891831166747, + "loss": 0.0821, + "theoretical_loss": 3.866073336074807, + "tokens_seen": 603455488 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008253089391750923, + "loss": 0.0787, + "theoretical_loss": 3.8658996055913297, + "tokens_seen": 603717632 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008252286952335099, + "loss": 0.0838, + "theoretical_loss": 3.8657259716398737, + "tokens_seen": 603979776 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008251484512919275, + "loss": 0.0801, + "theoretical_loss": 3.8655524341249388, + "tokens_seen": 604241920 + }, + { + "epoch": 0.18, + "learning_rate": 0.000825068207350345, + "loss": 0.0827, + "theoretical_loss": 3.865378992951161, + "tokens_seen": 604504064 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008249879634087627, + "loss": 0.0812, + "theoretical_loss": 3.865205648023311, + "tokens_seen": 604766208 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008249077194671802, + "loss": 0.0821, + "theoretical_loss": 3.8650323992462963, + "tokens_seen": 605028352 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008248274755255978, + "loss": 0.0807, + "theoretical_loss": 3.8648592465251586, + "tokens_seen": 605290496 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008247472315840154, + "loss": 0.0817, + "theoretical_loss": 3.864686189765075, + "tokens_seen": 605552640 + }, + { + "epoch": 0.18, + "learning_rate": 0.000824666987642433, + "loss": 0.082, + "theoretical_loss": 3.864513228871357, + "tokens_seen": 605814784 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008245867437008507, + "loss": 0.0809, + "theoretical_loss": 3.8643403637494504, + "tokens_seen": 606076928 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.0005140507710166276, + "objective/train/docs_used": 225863, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.601678729057312, + "objective/train/original_loss": 1.6016786098480225, + "objective/train/theoretical_loss": 3.8642539670734144, + "objective/train/tokens_used": 626668000, + "objective/train/value_avg": -0.006038665771484375, + "objective/train/value_loss": 9.395569941261783e-05, + "objective/train/value_max": -0.00026535987854003906, + "objective/train/value_min": -0.2125244140625, + "objective/train/value_reward_corr": 0.690265895238366, + "objective/train/value_std": 0.0079193115234375, + "objective/train/weight_avg": 1.0005604028701782, + "objective/train/weighted_lm_loss": 1.6026065349578857, + "objective/train/weights_max": 1.0788187980651855, + "objective/train/weights_min": 0.8205270171165466, + "theoretical_loss": 3.8642539670734144, + "tokens_seen": 606208000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008245064997592682, + "loss": 0.0797, + "theoretical_loss": 3.8641675943049343, + "tokens_seen": 606339072 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008244262558176858, + "loss": 0.0831, + "theoretical_loss": 3.863994920443523, + "tokens_seen": 606601216 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008243460118761034, + "loss": 0.0815, + "theoretical_loss": 3.8638223420710647, + "tokens_seen": 606863360 + }, + { + "epoch": 0.18, + "learning_rate": 0.000824265767934521, + "loss": 0.079, + "theoretical_loss": 3.863649859093538, + "tokens_seen": 607125504 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008241855239929385, + "loss": 0.0795, + "theoretical_loss": 3.863477471417059, + "tokens_seen": 607387648 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008241052800513561, + "loss": 0.0821, + "theoretical_loss": 3.8633051789478734, + "tokens_seen": 607649792 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008240250361097737, + "loss": 0.0841, + "theoretical_loss": 3.8631329815923605, + "tokens_seen": 607911936 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008239447921681912, + "loss": 0.079, + "theoretical_loss": 3.862960879257032, + "tokens_seen": 608174080 + }, + { + "epoch": 0.18, + "learning_rate": 0.000823864548226609, + "loss": 0.0832, + "theoretical_loss": 3.8627888718485313, + "tokens_seen": 608436224 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008237843042850265, + "loss": 0.0798, + "theoretical_loss": 3.862616959273635, + "tokens_seen": 608698368 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008237040603434442, + "loss": 0.0803, + "theoretical_loss": 3.8624451414392498, + "tokens_seen": 608960512 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008236238164018617, + "loss": 0.0785, + "theoretical_loss": 3.8622734182524154, + "tokens_seen": 609222656 + }, + { + "epoch": 0.18, + "objective/train/advantage_avg": 0.00217598513700068, + "objective/train/docs_used": 227108, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5498684644699097, + "objective/train/original_loss": 1.5498683452606201, + "objective/train/theoretical_loss": 3.8621017896203007, + "objective/train/tokens_used": 629944800, + "objective/train/value_avg": -0.00588226318359375, + "objective/train/value_loss": 8.349979179911315e-05, + "objective/train/value_max": -0.0002779960632324219, + "objective/train/value_min": -0.254150390625, + "objective/train/value_reward_corr": 0.5373123562529432, + "objective/train/value_std": 0.00652313232421875, + "objective/train/weight_avg": 1.0022169351577759, + "objective/train/weighted_lm_loss": 1.5533596277236938, + "objective/train/weights_max": 1.130799412727356, + "objective/train/weights_min": 0.6347857117652893, + "theoretical_loss": 3.8621017896203007, + "tokens_seen": 609484800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008235435724602792, + "loss": 0.0803, + "theoretical_loss": 3.8621017896203007, + "tokens_seen": 609484800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008234633285186969, + "loss": 0.0793, + "theoretical_loss": 3.8619302554502077, + "tokens_seen": 609746944 + }, + { + "epoch": 0.18, + "learning_rate": 0.0008233830845771144, + "loss": 0.0787, + "theoretical_loss": 3.8617588156495666, + "tokens_seen": 610009088 + }, + { + "epoch": 0.18, + "learning_rate": 0.000823302840635532, + "loss": 0.0829, + "theoretical_loss": 3.861587470125941, + "tokens_seen": 610271232 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008232225966939496, + "loss": 0.081, + "theoretical_loss": 3.8614162187870216, + "tokens_seen": 610533376 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008231423527523673, + "loss": 0.0804, + "theoretical_loss": 3.861245061540631, + "tokens_seen": 610795520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008230621088107848, + "loss": 0.0806, + "theoretical_loss": 3.8610739982947218, + "tokens_seen": 611057664 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008229818648692024, + "loss": 0.0773, + "theoretical_loss": 3.8609030289573747, + "tokens_seen": 611319808 + }, + { + "epoch": 0.19, + "learning_rate": 0.00082290162092762, + "loss": 0.0826, + "theoretical_loss": 3.8607321534368007, + "tokens_seen": 611581952 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008228213769860375, + "loss": 0.0772, + "theoretical_loss": 3.8605613716413396, + "tokens_seen": 611844096 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008227411330444552, + "loss": 0.0822, + "theoretical_loss": 3.860390683479459, + "tokens_seen": 612106240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008226608891028727, + "loss": 0.0778, + "theoretical_loss": 3.860220088859757, + "tokens_seen": 612368384 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008225806451612903, + "loss": 0.0774, + "theoretical_loss": 3.860049587690958, + "tokens_seen": 612630528 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0016880445182323456, + "objective/train/docs_used": 228263, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.764600396156311, + "objective/train/original_loss": 1.7646005153656006, + "objective/train/theoretical_loss": 3.8599643721221586, + "objective/train/tokens_used": 633221600, + "objective/train/value_avg": -0.006122589111328125, + "objective/train/value_loss": 0.00013401157048065215, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.330322265625, + "objective/train/value_reward_corr": 0.6502286444986164, + "objective/train/value_std": 0.01097869873046875, + "objective/train/weight_avg": 1.0017540454864502, + "objective/train/weighted_lm_loss": 1.767932415008545, + "objective/train/weights_max": 1.193005919456482, + "objective/train/weights_min": 0.8255929350852966, + "theoretical_loss": 3.8599643721221586, + "tokens_seen": 612761600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008225004012197079, + "loss": 0.0799, + "theoretical_loss": 3.8598791798819154, + "tokens_seen": 612892672 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008224201572781254, + "loss": 0.0771, + "theoretical_loss": 3.859708865341611, + "tokens_seen": 613154816 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008223399133365432, + "loss": 0.0819, + "theoretical_loss": 3.8595386439791532, + "tokens_seen": 613416960 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008222596693949607, + "loss": 0.0794, + "theoretical_loss": 3.859368515703778, + "tokens_seen": 613679104 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008221794254533783, + "loss": 0.0779, + "theoretical_loss": 3.859198480424849, + "tokens_seen": 613941248 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008220991815117959, + "loss": 0.0787, + "theoretical_loss": 3.859028538051856, + "tokens_seen": 614203392 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008220189375702135, + "loss": 0.0799, + "theoretical_loss": 3.858858688494416, + "tokens_seen": 614465536 + }, + { + "epoch": 0.19, + "learning_rate": 0.000821938693628631, + "loss": 0.0789, + "theoretical_loss": 3.8586889316622726, + "tokens_seen": 614727680 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008218584496870486, + "loss": 0.076, + "theoretical_loss": 3.8585192674652955, + "tokens_seen": 614989824 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008217782057454662, + "loss": 0.0788, + "theoretical_loss": 3.8583496958134793, + "tokens_seen": 615251968 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008216979618038837, + "loss": 0.0768, + "theoretical_loss": 3.8581802166169457, + "tokens_seen": 615514112 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008216177178623015, + "loss": 0.0801, + "theoretical_loss": 3.8580108297859415, + "tokens_seen": 615776256 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.001968114171177149, + "objective/train/docs_used": 229528, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.539759874343872, + "objective/train/original_loss": 1.5397597551345825, + "objective/train/theoretical_loss": 3.857841535230839, + "objective/train/tokens_used": 636498400, + "objective/train/value_avg": -0.0091400146484375, + "objective/train/value_loss": 0.00024838329409249127, + "objective/train/value_max": -0.00023412704467773438, + "objective/train/value_min": -0.63916015625, + "objective/train/value_reward_corr": 0.6929620425353863, + "objective/train/value_std": 0.01442718505859375, + "objective/train/weight_avg": 1.0020780563354492, + "objective/train/weighted_lm_loss": 1.543168306350708, + "objective/train/weights_max": 1.582837700843811, + "objective/train/weights_min": 0.23275621235370636, + "theoretical_loss": 3.857841535230839, + "tokens_seen": 616038400 + }, + { + "epoch": 0.19, + "learning_rate": 0.000821537473920719, + "loss": 0.0777, + "theoretical_loss": 3.857841535230839, + "tokens_seen": 616038400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008214572299791366, + "loss": 0.0793, + "theoretical_loss": 3.8576723328621347, + "tokens_seen": 616300544 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008213769860375542, + "loss": 0.0794, + "theoretical_loss": 3.8575032225904513, + "tokens_seen": 616562688 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008212967420959717, + "loss": 0.0775, + "theoretical_loss": 3.8573342043265346, + "tokens_seen": 616824832 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008212164981543894, + "loss": 0.0832, + "theoretical_loss": 3.857165277981256, + "tokens_seen": 617086976 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008211362542128069, + "loss": 0.0811, + "theoretical_loss": 3.8569964434656105, + "tokens_seen": 617349120 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008210560102712245, + "loss": 0.0811, + "theoretical_loss": 3.856827700690718, + "tokens_seen": 617611264 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008209757663296422, + "loss": 0.0795, + "theoretical_loss": 3.8566590495678192, + "tokens_seen": 617873408 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008208955223880598, + "loss": 0.0817, + "theoretical_loss": 3.856490490008282, + "tokens_seen": 618135552 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008208152784464773, + "loss": 0.0809, + "theoretical_loss": 3.856322021923595, + "tokens_seen": 618397696 + }, + { + "epoch": 0.19, + "learning_rate": 0.000820735034504895, + "loss": 0.08, + "theoretical_loss": 3.8561536452253713, + "tokens_seen": 618659840 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008206547905633125, + "loss": 0.0773, + "theoretical_loss": 3.855985359825346, + "tokens_seen": 618921984 + }, + { + "epoch": 0.19, + "learning_rate": 0.00082057454662173, + "loss": 0.0827, + "theoretical_loss": 3.855817165635377, + "tokens_seen": 619184128 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0019921851344406605, + "objective/train/docs_used": 230719, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.756550669670105, + "objective/train/original_loss": 1.756550669670105, + "objective/train/theoretical_loss": 3.855733102716653, + "objective/train/tokens_used": 639775200, + "objective/train/value_avg": -0.006992340087890625, + "objective/train/value_loss": 0.0001579941454110667, + "objective/train/value_max": -0.0001926422119140625, + "objective/train/value_min": -0.21337890625, + "objective/train/value_reward_corr": 0.45218047642197834, + "objective/train/value_std": 0.00893402099609375, + "objective/train/weight_avg": 1.002066969871521, + "objective/train/weighted_lm_loss": 1.7593868970870972, + "objective/train/weights_max": 1.1907228231430054, + "objective/train/weights_min": 0.36894404888153076, + "theoretical_loss": 3.855733102716653, + "tokens_seen": 619315200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008204943026801477, + "loss": 0.082, + "theoretical_loss": 3.8556490625674447, + "tokens_seen": 619446272 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008204140587385652, + "loss": 0.0815, + "theoretical_loss": 3.855481050533651, + "tokens_seen": 619708416 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008203338147969828, + "loss": 0.0814, + "theoretical_loss": 3.8553131294462206, + "tokens_seen": 619970560 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008202535708554004, + "loss": 0.0814, + "theoretical_loss": 3.8551452992175, + "tokens_seen": 620232704 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008201733269138181, + "loss": 0.0813, + "theoretical_loss": 3.8549775597599556, + "tokens_seen": 620494848 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008200930829722356, + "loss": 0.0788, + "theoretical_loss": 3.8548099109861775, + "tokens_seen": 620756992 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008200128390306532, + "loss": 0.0811, + "theoretical_loss": 3.8546423528088747, + "tokens_seen": 621019136 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008199325950890708, + "loss": 0.0804, + "theoretical_loss": 3.8544748851408777, + "tokens_seen": 621281280 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008198523511474884, + "loss": 0.0784, + "theoretical_loss": 3.8543075078951388, + "tokens_seen": 621543424 + }, + { + "epoch": 0.19, + "learning_rate": 0.000819772107205906, + "loss": 0.0784, + "theoretical_loss": 3.8541402209847284, + "tokens_seen": 621805568 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008196918632643235, + "loss": 0.0819, + "theoretical_loss": 3.8539730243228387, + "tokens_seen": 622067712 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008196116193227412, + "loss": 0.0778, + "theoretical_loss": 3.8538059178227817, + "tokens_seen": 622329856 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0007625448633916676, + "objective/train/docs_used": 231894, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6909435987472534, + "objective/train/original_loss": 1.690943717956543, + "objective/train/theoretical_loss": 3.8536389013979893, + "objective/train/tokens_used": 643052000, + "objective/train/value_avg": -0.0119781494140625, + "objective/train/value_loss": 0.00041994385537691414, + "objective/train/value_max": -0.00027370452880859375, + "objective/train/value_min": -0.638671875, + "objective/train/value_reward_corr": 0.6989086451961908, + "objective/train/value_std": 0.0189056396484375, + "objective/train/weight_avg": 1.0009546279907227, + "objective/train/weighted_lm_loss": 1.6919503211975098, + "objective/train/weights_max": 1.6490602493286133, + "objective/train/weights_min": 0.37530699372291565, + "theoretical_loss": 3.8536389013979893, + "tokens_seen": 622592000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008195313753811587, + "loss": 0.0836, + "theoretical_loss": 3.8536389013979893, + "tokens_seen": 622592000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008194511314395762, + "loss": 0.0795, + "theoretical_loss": 3.8534719749620114, + "tokens_seen": 622854144 + }, + { + "epoch": 0.19, + "learning_rate": 0.000819370887497994, + "loss": 0.0788, + "theoretical_loss": 3.8533051384285195, + "tokens_seen": 623116288 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008192906435564115, + "loss": 0.0814, + "theoretical_loss": 3.853138391711303, + "tokens_seen": 623378432 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008192103996148291, + "loss": 0.0795, + "theoretical_loss": 3.852971734724269, + "tokens_seen": 623640576 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008191301556732467, + "loss": 0.0787, + "theoretical_loss": 3.8528051673814456, + "tokens_seen": 623902720 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008190499117316643, + "loss": 0.081, + "theoretical_loss": 3.8526386895969775, + "tokens_seen": 624164864 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008189696677900818, + "loss": 0.0826, + "theoretical_loss": 3.8524723012851294, + "tokens_seen": 624427008 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008188894238484994, + "loss": 0.081, + "theoretical_loss": 3.852306002360282, + "tokens_seen": 624689152 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818809179906917, + "loss": 0.0816, + "theoretical_loss": 3.852139792736936, + "tokens_seen": 624951296 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008187289359653345, + "loss": 0.0787, + "theoretical_loss": 3.8519736723297067, + "tokens_seen": 625213440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008186486920237523, + "loss": 0.0791, + "theoretical_loss": 3.8518076410533304, + "tokens_seen": 625475584 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008185684480821698, + "loss": 0.0789, + "theoretical_loss": 3.8516416988226574, + "tokens_seen": 625737728 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0014086907031014562, + "objective/train/docs_used": 233149, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4563045501708984, + "objective/train/original_loss": 1.4563043117523193, + "objective/train/theoretical_loss": 3.8515587610728836, + "objective/train/tokens_used": 646328800, + "objective/train/value_avg": -0.00971221923828125, + "objective/train/value_loss": 0.0002561133005656302, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.36669921875, + "objective/train/value_reward_corr": 0.7151985996360277, + "objective/train/value_std": 0.01548004150390625, + "objective/train/weight_avg": 1.0015289783477783, + "objective/train/weighted_lm_loss": 1.457317590713501, + "objective/train/weights_max": 1.146925687789917, + "objective/train/weights_min": 0.3961734175682068, + "theoretical_loss": 3.8515587610728836, + "tokens_seen": 625868800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008184882041405875, + "loss": 0.0795, + "theoretical_loss": 3.851475845552658, + "tokens_seen": 625999872 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818407960199005, + "loss": 0.0821, + "theoretical_loss": 3.8513100811584158, + "tokens_seen": 626262016 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008183277162574225, + "loss": 0.0818, + "theoretical_loss": 3.851144405555134, + "tokens_seen": 626524160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008182474723158402, + "loss": 0.0788, + "theoretical_loss": 3.85097881865813, + "tokens_seen": 626786304 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008181672283742577, + "loss": 0.0809, + "theoretical_loss": 3.850813320382839, + "tokens_seen": 627048448 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008180869844326753, + "loss": 0.0812, + "theoretical_loss": 3.8506479106448115, + "tokens_seen": 627310592 + }, + { + "epoch": 0.19, + "learning_rate": 0.000818006740491093, + "loss": 0.0824, + "theoretical_loss": 3.8504825893597134, + "tokens_seen": 627572736 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008179264965495106, + "loss": 0.0803, + "theoretical_loss": 3.850317356443326, + "tokens_seen": 627834880 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008178462526079281, + "loss": 0.079, + "theoretical_loss": 3.8501522118115465, + "tokens_seen": 628097024 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008177660086663458, + "loss": 0.0827, + "theoretical_loss": 3.8499871553803873, + "tokens_seen": 628359168 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008176857647247633, + "loss": 0.0806, + "theoretical_loss": 3.8498221870659743, + "tokens_seen": 628621312 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008176055207831808, + "loss": 0.0763, + "theoretical_loss": 3.8496573067845503, + "tokens_seen": 628883456 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0006253430037759244, + "objective/train/docs_used": 234355, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4915151596069336, + "objective/train/original_loss": 1.4915152788162231, + "objective/train/theoretical_loss": 3.8494925144524705, + "objective/train/tokens_used": 649605600, + "objective/train/value_avg": -0.00966644287109375, + "objective/train/value_loss": 0.0007618270465172827, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.759765625, + "objective/train/value_reward_corr": 0.5889753777352726, + "objective/train/value_std": 0.0186614990234375, + "objective/train/weight_avg": 1.0009468793869019, + "objective/train/weighted_lm_loss": 1.4915693998336792, + "objective/train/weights_max": 1.9064472913742065, + "objective/train/weights_min": 0.3697463870048523, + "theoretical_loss": 3.8494925144524705, + "tokens_seen": 629145600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008175252768415985, + "loss": 0.0813, + "theoretical_loss": 3.8494925144524705, + "tokens_seen": 629145600 + }, + { + "epoch": 0.19, + "learning_rate": 0.000817445032900016, + "loss": 0.0806, + "theoretical_loss": 3.849327809986206, + "tokens_seen": 629407744 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008173647889584337, + "loss": 0.0772, + "theoretical_loss": 3.8491631933023407, + "tokens_seen": 629669888 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008172845450168512, + "loss": 0.0817, + "theoretical_loss": 3.8489986643175733, + "tokens_seen": 629932032 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008172043010752689, + "loss": 0.0805, + "theoretical_loss": 3.8488342229487156, + "tokens_seen": 630194176 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008171240571336865, + "loss": 0.0822, + "theoretical_loss": 3.8486698691126935, + "tokens_seen": 630456320 + }, + { + "epoch": 0.19, + "learning_rate": 0.000817043813192104, + "loss": 0.081, + "theoretical_loss": 3.8485056027265454, + "tokens_seen": 630718464 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008169635692505216, + "loss": 0.0802, + "theoretical_loss": 3.848341423707423, + "tokens_seen": 630980608 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008168833253089392, + "loss": 0.0791, + "theoretical_loss": 3.8481773319725914, + "tokens_seen": 631242752 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008168030813673568, + "loss": 0.0834, + "theoretical_loss": 3.8480133274394275, + "tokens_seen": 631504896 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008167228374257743, + "loss": 0.0764, + "theoretical_loss": 3.8478494100254217, + "tokens_seen": 631767040 + }, + { + "epoch": 0.19, + "learning_rate": 0.000816642593484192, + "loss": 0.0828, + "theoretical_loss": 3.847685579648176, + "tokens_seen": 632029184 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008165623495426095, + "loss": 0.0826, + "theoretical_loss": 3.847521836225404, + "tokens_seen": 632291328 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0012705049011856318, + "objective/train/docs_used": 235545, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.8972015380859375, + "objective/train/original_loss": 1.8972015380859375, + "objective/train/theoretical_loss": 3.847439997096262, + "objective/train/tokens_used": 652882400, + "objective/train/value_avg": -0.0090484619140625, + "objective/train/value_loss": 0.0002362952072871849, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.40380859375, + "objective/train/value_reward_corr": 0.6654254308226828, + "objective/train/value_std": 0.0135650634765625, + "objective/train/weight_avg": 1.0013830661773682, + "objective/train/weighted_lm_loss": 1.8993679285049438, + "objective/train/weights_max": 1.2981646060943604, + "objective/train/weights_min": 0.36874493956565857, + "theoretical_loss": 3.847439997096262, + "tokens_seen": 632422400 + }, + { + "epoch": 0.19, + "learning_rate": 0.000816482105601027, + "loss": 0.0829, + "theoretical_loss": 3.8473581796749317, + "tokens_seen": 632553472 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008164018616594448, + "loss": 0.0774, + "theoretical_loss": 3.8471946099146983, + "tokens_seen": 632815616 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008163216177178623, + "loss": 0.0793, + "theoretical_loss": 3.847031126862751, + "tokens_seen": 633077760 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008162413737762799, + "loss": 0.0793, + "theoretical_loss": 3.8468677304372507, + "tokens_seen": 633339904 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008161611298346975, + "loss": 0.0781, + "theoretical_loss": 3.8467044205564704, + "tokens_seen": 633602048 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008160808858931151, + "loss": 0.0804, + "theoretical_loss": 3.846541197138791, + "tokens_seen": 633864192 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008160006419515327, + "loss": 0.0839, + "theoretical_loss": 3.8463780601027056, + "tokens_seen": 634126336 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008159203980099502, + "loss": 0.0786, + "theoretical_loss": 3.846215009366819, + "tokens_seen": 634388480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008158401540683678, + "loss": 0.0812, + "theoretical_loss": 3.846052044849843, + "tokens_seen": 634650624 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008157599101267855, + "loss": 0.0796, + "theoretical_loss": 3.845889166470604, + "tokens_seen": 634912768 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008156796661852031, + "loss": 0.0804, + "theoretical_loss": 3.8457263741480343, + "tokens_seen": 635174912 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008155994222436206, + "loss": 0.0791, + "theoretical_loss": 3.845563667801178, + "tokens_seen": 635437056 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0004186597652733326, + "objective/train/docs_used": 236526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6329374313354492, + "objective/train/original_loss": 1.6329374313354492, + "objective/train/theoretical_loss": 3.8454010473491884, + "objective/train/tokens_used": 656159200, + "objective/train/value_avg": -0.007476806640625, + "objective/train/value_loss": 0.00015600294864270836, + "objective/train/value_max": -0.0002512931823730469, + "objective/train/value_min": -0.230712890625, + "objective/train/value_reward_corr": 0.6553133134938699, + "objective/train/value_std": 0.01104736328125, + "objective/train/weight_avg": 1.0004956722259521, + "objective/train/weighted_lm_loss": 1.6333415508270264, + "objective/train/weights_max": 1.2097692489624023, + "objective/train/weights_min": 0.7830726504325867, + "theoretical_loss": 3.8454010473491884, + "tokens_seen": 635699200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008155191783020383, + "loss": 0.081, + "theoretical_loss": 3.8454010473491884, + "tokens_seen": 635699200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008154389343604558, + "loss": 0.0795, + "theoretical_loss": 3.845238512711327, + "tokens_seen": 635961344 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008153586904188733, + "loss": 0.0817, + "theoretical_loss": 3.845076063806966, + "tokens_seen": 636223488 + }, + { + "epoch": 0.19, + "learning_rate": 0.000815278446477291, + "loss": 0.0833, + "theoretical_loss": 3.844913700555586, + "tokens_seen": 636485632 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008151982025357085, + "loss": 0.0782, + "theoretical_loss": 3.8447514228767763, + "tokens_seen": 636747776 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008151179585941261, + "loss": 0.0831, + "theoretical_loss": 3.844589230690234, + "tokens_seen": 637009920 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008150377146525437, + "loss": 0.0797, + "theoretical_loss": 3.844427123915766, + "tokens_seen": 637272064 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008149574707109614, + "loss": 0.0781, + "theoretical_loss": 3.8442651024732863, + "tokens_seen": 637534208 + }, + { + "epoch": 0.19, + "learning_rate": 0.000814877226769379, + "loss": 0.0784, + "theoretical_loss": 3.8441031662828173, + "tokens_seen": 637796352 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008147969828277966, + "loss": 0.0799, + "theoretical_loss": 3.843941315264489, + "tokens_seen": 638058496 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008147167388862141, + "loss": 0.0788, + "theoretical_loss": 3.8437795493385387, + "tokens_seen": 638320640 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008146364949446317, + "loss": 0.0813, + "theoretical_loss": 3.8436178684253126, + "tokens_seen": 638582784 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008145562510030493, + "loss": 0.0819, + "theoretical_loss": 3.843456272445262, + "tokens_seen": 638844928 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": 0.0014796998584643006, + "objective/train/docs_used": 237858, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4043687582015991, + "objective/train/original_loss": 1.4043687582015991, + "objective/train/theoretical_loss": 3.8433755062803483, + "objective/train/tokens_used": 659436000, + "objective/train/value_avg": -0.0091400146484375, + "objective/train/value_loss": 0.0002844578993972391, + "objective/train/value_max": -0.0001881122589111328, + "objective/train/value_min": -0.2335205078125, + "objective/train/value_reward_corr": 0.6038268655262016, + "objective/train/value_std": 0.0128326416015625, + "objective/train/weight_avg": 1.0016050338745117, + "objective/train/weighted_lm_loss": 1.4062108993530273, + "objective/train/weights_max": 1.1530903577804565, + "objective/train/weights_min": 0.4062868654727936, + "theoretical_loss": 3.8433755062803483, + "tokens_seen": 638976000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008144760070614668, + "loss": 0.078, + "theoretical_loss": 3.8432947613189468, + "tokens_seen": 639107072 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008143957631198845, + "loss": 0.0825, + "theoretical_loss": 3.8431333349670336, + "tokens_seen": 639369216 + }, + { + "epoch": 0.19, + "learning_rate": 0.000814315519178302, + "loss": 0.0788, + "theoretical_loss": 3.842971993310294, + "tokens_seen": 639631360 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008142352752367197, + "loss": 0.0793, + "theoretical_loss": 3.8428107362696085, + "tokens_seen": 639893504 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008141550312951373, + "loss": 0.0782, + "theoretical_loss": 3.842649563765962, + "tokens_seen": 640155648 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008140747873535548, + "loss": 0.0818, + "theoretical_loss": 3.8424884757204474, + "tokens_seen": 640417792 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008139945434119724, + "loss": 0.08, + "theoretical_loss": 3.8423274720542606, + "tokens_seen": 640679936 + }, + { + "epoch": 0.19, + "learning_rate": 0.00081391429947039, + "loss": 0.0786, + "theoretical_loss": 3.842166552688706, + "tokens_seen": 640942080 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008138340555288076, + "loss": 0.0806, + "theoretical_loss": 3.8420057175451934, + "tokens_seen": 641204224 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008137538115872251, + "loss": 0.0812, + "theoretical_loss": 3.841844966545236, + "tokens_seen": 641466368 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008136735676456428, + "loss": 0.0802, + "theoretical_loss": 3.841684299610453, + "tokens_seen": 641728512 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008135933237040603, + "loss": 0.079, + "theoretical_loss": 3.8415237166625698, + "tokens_seen": 641990656 + }, + { + "epoch": 0.19, + "objective/train/advantage_avg": -2.3364991648122668e-05, + "objective/train/docs_used": 239059, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5975748300552368, + "objective/train/original_loss": 1.5975749492645264, + "objective/train/theoretical_loss": 3.8413632176234147, + "objective/train/tokens_used": 662712800, + "objective/train/value_avg": -0.006862640380859375, + "objective/train/value_loss": 0.0001523960818303749, + "objective/train/value_max": -0.0002033710479736328, + "objective/train/value_min": -0.264892578125, + "objective/train/value_reward_corr": 0.6269222691924341, + "objective/train/value_std": 0.00958251953125, + "objective/train/weight_avg": 1.0000513792037964, + "objective/train/weighted_lm_loss": 1.597486138343811, + "objective/train/weights_max": 1.1600202322006226, + "objective/train/weights_min": 0.6130765676498413, + "theoretical_loss": 3.8413632176234147, + "tokens_seen": 642252800 + }, + { + "epoch": 0.19, + "learning_rate": 0.000813513079762478, + "loss": 0.0793, + "theoretical_loss": 3.8413632176234147, + "tokens_seen": 642252800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008134328358208956, + "loss": 0.0834, + "theoretical_loss": 3.8412028024149225, + "tokens_seen": 642514944 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008133525918793131, + "loss": 0.079, + "theoretical_loss": 3.841042470959131, + "tokens_seen": 642777088 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008132723479377308, + "loss": 0.0791, + "theoretical_loss": 3.840882223178183, + "tokens_seen": 643039232 + }, + { + "epoch": 0.19, + "learning_rate": 0.0008131921039961483, + "loss": 0.0811, + "theoretical_loss": 3.8407220589943254, + "tokens_seen": 643301376 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008131118600545659, + "loss": 0.0778, + "theoretical_loss": 3.8405619783299083, + "tokens_seen": 643563520 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008130316161129835, + "loss": 0.0784, + "theoretical_loss": 3.8404019811073864, + "tokens_seen": 643825664 + }, + { + "epoch": 0.2, + "learning_rate": 0.000812951372171401, + "loss": 0.0787, + "theoretical_loss": 3.840242067249317, + "tokens_seen": 644087808 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008128711282298186, + "loss": 0.0783, + "theoretical_loss": 3.840082236678362, + "tokens_seen": 644349952 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008127908842882363, + "loss": 0.08, + "theoretical_loss": 3.8399224893172854, + "tokens_seen": 644612096 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008127106403466539, + "loss": 0.0794, + "theoretical_loss": 3.839762825088955, + "tokens_seen": 644874240 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008126303964050714, + "loss": 0.0833, + "theoretical_loss": 3.8396032439163394, + "tokens_seen": 645136384 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008125501524634891, + "loss": 0.0806, + "theoretical_loss": 3.8394437457225132, + "tokens_seen": 645398528 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -3.4955566661665216e-05, + "objective/train/docs_used": 240316, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6006709337234497, + "objective/train/original_loss": 1.6006708145141602, + "objective/train/theoretical_loss": 3.8393640277186343, + "objective/train/tokens_used": 665989600, + "objective/train/value_avg": -0.01154327392578125, + "objective/train/value_loss": 0.00029328398522920907, + "objective/train/value_max": -0.00016224384307861328, + "objective/train/value_min": -0.419189453125, + "objective/train/value_reward_corr": 0.8165028368002758, + "objective/train/value_std": 0.0216522216796875, + "objective/train/weight_avg": 1.0001044273376465, + "objective/train/weighted_lm_loss": 1.5991564989089966, + "objective/train/weights_max": 1.134090781211853, + "objective/train/weights_min": 0.3825574517250061, + "theoretical_loss": 3.8393640277186343, + "tokens_seen": 645529600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008124699085219066, + "loss": 0.0832, + "theoretical_loss": 3.839284330430651, + "tokens_seen": 645660672 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008123896645803241, + "loss": 0.081, + "theoretical_loss": 3.8391249979640305, + "tokens_seen": 645922816 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008123094206387418, + "loss": 0.0802, + "theoretical_loss": 3.8389657482460313, + "tokens_seen": 646184960 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008122291766971593, + "loss": 0.0774, + "theoretical_loss": 3.838806581200134, + "tokens_seen": 646447104 + }, + { + "epoch": 0.2, + "learning_rate": 0.000812148932755577, + "loss": 0.0808, + "theoretical_loss": 3.838647496749924, + "tokens_seen": 646709248 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008120686888139945, + "loss": 0.0818, + "theoretical_loss": 3.8384884948190847, + "tokens_seen": 646971392 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008119884448724122, + "loss": 0.0809, + "theoretical_loss": 3.838329575331403, + "tokens_seen": 647233536 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008119082009308298, + "loss": 0.0798, + "theoretical_loss": 3.8381707382107657, + "tokens_seen": 647495680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008118279569892473, + "loss": 0.0836, + "theoretical_loss": 3.838011983381162, + "tokens_seen": 647757824 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008117477130476649, + "loss": 0.0792, + "theoretical_loss": 3.8378533107666817, + "tokens_seen": 648019968 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008116674691060825, + "loss": 0.0833, + "theoretical_loss": 3.8376947202915144, + "tokens_seen": 648282112 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008115872251645001, + "loss": 0.0801, + "theoretical_loss": 3.8375362118799505, + "tokens_seen": 648544256 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.002151393797248602, + "objective/train/docs_used": 241544, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5193239450454712, + "objective/train/original_loss": 1.5193241834640503, + "objective/train/theoretical_loss": 3.8373777854563813, + "objective/train/tokens_used": 669266400, + "objective/train/value_avg": -0.007175445556640625, + "objective/train/value_loss": 0.0002748131228145212, + "objective/train/value_max": -0.00010722875595092773, + "objective/train/value_min": -0.6533203125, + "objective/train/value_reward_corr": 0.5608935796152028, + "objective/train/value_std": 0.01296234130859375, + "objective/train/weight_avg": 1.002267599105835, + "objective/train/weighted_lm_loss": 1.5228184461593628, + "objective/train/weights_max": 1.1310755014419556, + "objective/train/weights_min": 0.36963358521461487, + "theoretical_loss": 3.8373777854563813, + "tokens_seen": 648806400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008115069812229176, + "loss": 0.0798, + "theoretical_loss": 3.8373777854563813, + "tokens_seen": 648806400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008114267372813353, + "loss": 0.0816, + "theoretical_loss": 3.837219440945298, + "tokens_seen": 649068544 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008113464933397528, + "loss": 0.08, + "theoretical_loss": 3.8370611782712922, + "tokens_seen": 649330688 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008112662493981705, + "loss": 0.0823, + "theoretical_loss": 3.8369029973590543, + "tokens_seen": 649592832 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008111860054565881, + "loss": 0.0796, + "theoretical_loss": 3.836744898133376, + "tokens_seen": 649854976 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008111057615150056, + "loss": 0.079, + "theoretical_loss": 3.8365868805191456, + "tokens_seen": 650117120 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008110255175734233, + "loss": 0.0801, + "theoretical_loss": 3.836428944441354, + "tokens_seen": 650379264 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008109452736318408, + "loss": 0.0804, + "theoretical_loss": 3.8362710898250896, + "tokens_seen": 650641408 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008108650296902584, + "loss": 0.0819, + "theoretical_loss": 3.83611331659554, + "tokens_seen": 650903552 + }, + { + "epoch": 0.2, + "learning_rate": 0.000810784785748676, + "loss": 0.0765, + "theoretical_loss": 3.8359556246779913, + "tokens_seen": 651165696 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008107045418070936, + "loss": 0.0828, + "theoretical_loss": 3.8357980139978283, + "tokens_seen": 651427840 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008106242978655111, + "loss": 0.0827, + "theoretical_loss": 3.8356404844805354, + "tokens_seen": 651689984 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008105440539239288, + "loss": 0.0817, + "theoretical_loss": 3.835483036051694, + "tokens_seen": 651952128 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0021593719720840454, + "objective/train/docs_used": 242832, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6270049810409546, + "objective/train/original_loss": 1.6270049810409546, + "objective/train/theoretical_loss": 3.8354043422222133, + "objective/train/tokens_used": 672543200, + "objective/train/value_avg": -0.00695037841796875, + "objective/train/value_loss": 0.0001394545251969248, + "objective/train/value_max": -0.00016736984252929688, + "objective/train/value_min": -0.36376953125, + "objective/train/value_reward_corr": 0.674807627723227, + "objective/train/value_std": 0.0125274658203125, + "objective/train/weight_avg": 1.0022249221801758, + "objective/train/weighted_lm_loss": 1.6309560537338257, + "objective/train/weights_max": 1.1779428720474243, + "objective/train/weights_min": 0.3801831901073456, + "theoretical_loss": 3.8354043422222133, + "tokens_seen": 652083200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008104638099823464, + "loss": 0.0802, + "theoretical_loss": 3.835325668636983, + "tokens_seen": 652214272 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008103835660407639, + "loss": 0.0816, + "theoretical_loss": 3.8351683821621814, + "tokens_seen": 652476416 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008103033220991816, + "loss": 0.0819, + "theoretical_loss": 3.8350111765531647, + "tokens_seen": 652738560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008102230781575991, + "loss": 0.0797, + "theoretical_loss": 3.834854051735906, + "tokens_seen": 653000704 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008101428342160167, + "loss": 0.0824, + "theoretical_loss": 3.8346970076364757, + "tokens_seen": 653262848 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008100625902744343, + "loss": 0.0798, + "theoretical_loss": 3.8345400441810424, + "tokens_seen": 653524992 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008099823463328518, + "loss": 0.0818, + "theoretical_loss": 3.8343831612958703, + "tokens_seen": 653787136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008099021023912694, + "loss": 0.0817, + "theoretical_loss": 3.834226358907322, + "tokens_seen": 654049280 + }, + { + "epoch": 0.2, + "learning_rate": 0.000809821858449687, + "loss": 0.0817, + "theoretical_loss": 3.8340696369418565, + "tokens_seen": 654311424 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008097416145081047, + "loss": 0.0785, + "theoretical_loss": 3.833912995326029, + "tokens_seen": 654573568 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008096613705665223, + "loss": 0.0791, + "theoretical_loss": 3.8337564339864914, + "tokens_seen": 654835712 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008095811266249399, + "loss": 0.0801, + "theoretical_loss": 3.8335999528499913, + "tokens_seen": 655097856 + }, + { + "debugging/Compilability": 0.9230769230769231, + "debugging/distinct-1-grams": 0.7560193640250044, + "debugging/entropy-1-grams": 5.2534808493234895, + "debugging/length": 422.2307692307692, + "debugging/num_segments": 13, + "debugging/raw_token_scores_avg": 0.00857908371835947, + "debugging/raw_token_scores_std": 0.02154596894979477, + "debugging/score": 0.01638830111902192, + "debugging/score_std": 0.019821935735086043, + "epoch": 0.2, + "objective/train/advantage_avg": 0.002418318996205926, + "objective/train/docs_used": 243976, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5416268110275269, + "objective/train/original_loss": 1.5416266918182373, + "objective/train/theoretical_loss": 3.833443551843374, + "objective/train/tokens_used": 675820000, + "objective/train/value_avg": -0.01099395751953125, + "objective/train/value_loss": 0.00022288458421826363, + "objective/train/value_max": -0.00031256675720214844, + "objective/train/value_min": -0.326904296875, + "objective/train/value_reward_corr": 0.7334904683634422, + "objective/train/value_std": 0.0173797607421875, + "objective/train/weight_avg": 1.0025254487991333, + "objective/train/weighted_lm_loss": 1.5436171293258667, + "objective/train/weights_max": 1.2229843139648438, + "objective/train/weights_min": 0.37925034761428833, + "theoretical_loss": 3.833443551843374, + "tokens_seen": 655360000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008095008826833574, + "loss": 0.0824, + "theoretical_loss": 3.833443551843374, + "tokens_seen": 655360000 + }, + { + "epoch": 0.2, + "learning_rate": 0.000809420638741775, + "loss": 0.0802, + "theoretical_loss": 3.8332872308935793, + "tokens_seen": 655622144 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008093403948001926, + "loss": 0.081, + "theoretical_loss": 3.833130989927643, + "tokens_seen": 655884288 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008092601508586101, + "loss": 0.0797, + "theoretical_loss": 3.8329748288726972, + "tokens_seen": 656146432 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008091799069170278, + "loss": 0.0838, + "theoretical_loss": 3.8328187476559687, + "tokens_seen": 656408576 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008090996629754453, + "loss": 0.082, + "theoretical_loss": 3.83266274620478, + "tokens_seen": 656670720 + }, + { + "epoch": 0.2, + "learning_rate": 0.000809019419033863, + "loss": 0.0783, + "theoretical_loss": 3.832506824446549, + "tokens_seen": 656932864 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008089391750922806, + "loss": 0.0795, + "theoretical_loss": 3.832350982308788, + "tokens_seen": 657195008 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008088589311506981, + "loss": 0.0784, + "theoretical_loss": 3.8321952197191043, + "tokens_seen": 657457152 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008087786872091157, + "loss": 0.0816, + "theoretical_loss": 3.8320395366052, + "tokens_seen": 657719296 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008086984432675333, + "loss": 0.0795, + "theoretical_loss": 3.8318839328948715, + "tokens_seen": 657981440 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008086181993259509, + "loss": 0.0807, + "theoretical_loss": 3.8317284085160095, + "tokens_seen": 658243584 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008085379553843684, + "loss": 0.0805, + "theoretical_loss": 3.8315729633965994, + "tokens_seen": 658505728 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.001669611083343625, + "objective/train/docs_used": 245288, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5030797719955444, + "objective/train/original_loss": 1.503079891204834, + "objective/train/theoretical_loss": 3.83149527053671, + "objective/train/tokens_used": 679096800, + "objective/train/value_avg": -0.00933837890625, + "objective/train/value_loss": 0.0003506782522890717, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.263427734375, + "objective/train/value_reward_corr": 0.5930356277614546, + "objective/train/value_std": 0.01061248779296875, + "objective/train/weight_avg": 0.9984805583953857, + "objective/train/weighted_lm_loss": 1.5003520250320435, + "objective/train/weights_max": 1.164748191833496, + "objective/train/weights_min": 0.22747893631458282, + "theoretical_loss": 3.83149527053671, + "tokens_seen": 658636800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008084577114427861, + "loss": 0.0786, + "theoretical_loss": 3.8314175974647195, + "tokens_seen": 658767872 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008083774675012036, + "loss": 0.0808, + "theoretical_loss": 3.831262310648544, + "tokens_seen": 659030016 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008082972235596214, + "loss": 0.0816, + "theoretical_loss": 3.831107102876338, + "tokens_seen": 659292160 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008082169796180389, + "loss": 0.0829, + "theoretical_loss": 3.830951974076463, + "tokens_seen": 659554304 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008081367356764564, + "loss": 0.0825, + "theoretical_loss": 3.830796924177371, + "tokens_seen": 659816448 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008080564917348741, + "loss": 0.0792, + "theoretical_loss": 3.830641953107609, + "tokens_seen": 660078592 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008079762477932916, + "loss": 0.0806, + "theoretical_loss": 3.8304870607958175, + "tokens_seen": 660340736 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008078960038517092, + "loss": 0.0807, + "theoretical_loss": 3.8303322471707286, + "tokens_seen": 660602880 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008078157599101268, + "loss": 0.0784, + "theoretical_loss": 3.830177512161167, + "tokens_seen": 660865024 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008077355159685444, + "loss": 0.0818, + "theoretical_loss": 3.8300228556960523, + "tokens_seen": 661127168 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008076552720269619, + "loss": 0.0778, + "theoretical_loss": 3.829868277704393, + "tokens_seen": 661389312 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008075750280853796, + "loss": 0.0808, + "theoretical_loss": 3.829713778115293, + "tokens_seen": 661651456 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0014472852926701307, + "objective/train/docs_used": 246530, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6413122415542603, + "objective/train/original_loss": 1.6413123607635498, + "objective/train/theoretical_loss": 3.8295593568579465, + "objective/train/tokens_used": 682373600, + "objective/train/value_avg": -0.0119781494140625, + "objective/train/value_loss": 0.000297568243695423, + "objective/train/value_max": -0.00022172927856445312, + "objective/train/value_min": -0.2919921875, + "objective/train/value_reward_corr": 0.7385408733331538, + "objective/train/value_std": 0.019012451171875, + "objective/train/weight_avg": 1.0015898942947388, + "objective/train/weighted_lm_loss": 1.6430256366729736, + "objective/train/weights_max": 1.2300209999084473, + "objective/train/weights_min": 0.36995241045951843, + "theoretical_loss": 3.8295593568579465, + "tokens_seen": 661913600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008074947841437972, + "loss": 0.0816, + "theoretical_loss": 3.8295593568579465, + "tokens_seen": 661913600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008074145402022147, + "loss": 0.0813, + "theoretical_loss": 3.8294050138616402, + "tokens_seen": 662175744 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008073342962606324, + "loss": 0.081, + "theoretical_loss": 3.8292507490557526, + "tokens_seen": 662437888 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008072540523190499, + "loss": 0.0768, + "theoretical_loss": 3.8290965623697537, + "tokens_seen": 662700032 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008071738083774676, + "loss": 0.0783, + "theoretical_loss": 3.8289424537332053, + "tokens_seen": 662962176 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008070935644358851, + "loss": 0.0828, + "theoretical_loss": 3.82878842307576, + "tokens_seen": 663224320 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008070133204943026, + "loss": 0.0797, + "theoretical_loss": 3.828634470327162, + "tokens_seen": 663486464 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008069330765527203, + "loss": 0.0808, + "theoretical_loss": 3.8284805954172474, + "tokens_seen": 663748608 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008068528326111379, + "loss": 0.0812, + "theoretical_loss": 3.828326798275941, + "tokens_seen": 664010752 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008067725886695555, + "loss": 0.0797, + "theoretical_loss": 3.82817307883326, + "tokens_seen": 664272896 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008066923447279731, + "loss": 0.0797, + "theoretical_loss": 3.8280194370193112, + "tokens_seen": 664535040 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008066121007863907, + "loss": 0.0799, + "theoretical_loss": 3.827865872764293, + "tokens_seen": 664797184 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008065318568448082, + "loss": 0.0802, + "theoretical_loss": 3.8277123859984936, + "tokens_seen": 665059328 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.00026472590980120003, + "objective/train/docs_used": 247583, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7137538194656372, + "objective/train/original_loss": 1.713753581047058, + "objective/train/theoretical_loss": 3.82763567165229, + "objective/train/tokens_used": 685650400, + "objective/train/value_avg": -0.00846099853515625, + "objective/train/value_loss": 0.00032397088943980634, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.64111328125, + "objective/train/value_reward_corr": 0.6828980449967452, + "objective/train/value_std": 0.01425933837890625, + "objective/train/weight_avg": 1.00041663646698, + "objective/train/weighted_lm_loss": 1.7142980098724365, + "objective/train/weights_max": 1.409537672996521, + "objective/train/weights_min": 0.5323238372802734, + "theoretical_loss": 3.82763567165229, + "tokens_seen": 665190400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008064516129032258, + "loss": 0.0815, + "theoretical_loss": 3.8275589766522895, + "tokens_seen": 665321472 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008063713689616434, + "loss": 0.0795, + "theoretical_loss": 3.8274056446561504, + "tokens_seen": 665583616 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008062911250200609, + "loss": 0.077, + "theoretical_loss": 3.827252389940633, + "tokens_seen": 665845760 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008062108810784786, + "loss": 0.079, + "theoretical_loss": 3.827099212436386, + "tokens_seen": 666107904 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008061306371368961, + "loss": 0.0802, + "theoretical_loss": 3.826946112074145, + "tokens_seen": 666370048 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008060503931953138, + "loss": 0.0791, + "theoretical_loss": 3.826793088784737, + "tokens_seen": 666632192 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008059701492537314, + "loss": 0.0817, + "theoretical_loss": 3.826640142499077, + "tokens_seen": 666894336 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008058899053121489, + "loss": 0.0786, + "theoretical_loss": 3.8264872731481705, + "tokens_seen": 667156480 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008058096613705666, + "loss": 0.0786, + "theoretical_loss": 3.8263344806631103, + "tokens_seen": 667418624 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008057294174289841, + "loss": 0.0804, + "theoretical_loss": 3.8261817649750784, + "tokens_seen": 667680768 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008056491734874017, + "loss": 0.0781, + "theoretical_loss": 3.8260291260153463, + "tokens_seen": 667942912 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008055689295458193, + "loss": 0.0798, + "theoretical_loss": 3.8258765637152727, + "tokens_seen": 668205056 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.00018435930542182177, + "objective/train/docs_used": 248744, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.638197660446167, + "objective/train/original_loss": 1.638197660446167, + "objective/train/theoretical_loss": 3.8257240780063055, + "objective/train/tokens_used": 688927200, + "objective/train/value_avg": -0.006977081298828125, + "objective/train/value_loss": 0.0003241746162530035, + "objective/train/value_max": -0.00019109249114990234, + "objective/train/value_min": -0.227783203125, + "objective/train/value_reward_corr": 0.7174840285003992, + "objective/train/value_std": 0.0107421875, + "objective/train/weight_avg": 0.9999673962593079, + "objective/train/weighted_lm_loss": 1.6396377086639404, + "objective/train/weights_max": 1.1206990480422974, + "objective/train/weights_min": 0.39359450340270996, + "theoretical_loss": 3.8257240780063055, + "tokens_seen": 668467200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008054886856042369, + "loss": 0.0806, + "theoretical_loss": 3.8257240780063055, + "tokens_seen": 668467200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008054084416626544, + "loss": 0.078, + "theoretical_loss": 3.8255716688199803, + "tokens_seen": 668729344 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008053281977210722, + "loss": 0.079, + "theoretical_loss": 3.825419336087921, + "tokens_seen": 668991488 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008052479537794897, + "loss": 0.0788, + "theoretical_loss": 3.825267079741839, + "tokens_seen": 669253632 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008051677098379072, + "loss": 0.0796, + "theoretical_loss": 3.825114899713533, + "tokens_seen": 669515776 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008050874658963249, + "loss": 0.0808, + "theoretical_loss": 3.8249627959348915, + "tokens_seen": 669777920 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008050072219547424, + "loss": 0.0791, + "theoretical_loss": 3.824810768337887, + "tokens_seen": 670040064 + }, + { + "epoch": 0.2, + "learning_rate": 0.00080492697801316, + "loss": 0.0801, + "theoretical_loss": 3.8246588168545816, + "tokens_seen": 670302208 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008048467340715776, + "loss": 0.0747, + "theoretical_loss": 3.824506941417125, + "tokens_seen": 670564352 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008047664901299952, + "loss": 0.0809, + "theoretical_loss": 3.824355141957752, + "tokens_seen": 670826496 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008046862461884128, + "loss": 0.0801, + "theoretical_loss": 3.8242034184087847, + "tokens_seen": 671088640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008046060022468304, + "loss": 0.0774, + "theoretical_loss": 3.824051770702633, + "tokens_seen": 671350784 + }, + { + "epoch": 0.2, + "learning_rate": 0.000804525758305248, + "loss": 0.0821, + "theoretical_loss": 3.823900198771792, + "tokens_seen": 671612928 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": 0.0005832649767398834, + "objective/train/docs_used": 249880, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4792834520339966, + "objective/train/original_loss": 1.4792835712432861, + "objective/train/theoretical_loss": 3.823824441201042, + "objective/train/tokens_used": 692204000, + "objective/train/value_avg": -0.00798797607421875, + "objective/train/value_loss": 0.0001945794647326693, + "objective/train/value_max": -0.00016224384307861328, + "objective/train/value_min": -0.30322265625, + "objective/train/value_reward_corr": 0.7662795001045949, + "objective/train/value_std": 0.0170745849609375, + "objective/train/weight_avg": 1.0006768703460693, + "objective/train/weighted_lm_loss": 1.479491949081421, + "objective/train/weights_max": 1.267970323562622, + "objective/train/weights_min": 0.5421772599220276, + "theoretical_loss": 3.823824441201042, + "tokens_seen": 671744000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008044455143636656, + "loss": 0.0767, + "theoretical_loss": 3.823748702548845, + "tokens_seen": 671875072 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008043652704220832, + "loss": 0.0775, + "theoretical_loss": 3.823597281966459, + "tokens_seen": 672137216 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008042850264805007, + "loss": 0.0787, + "theoretical_loss": 3.8234459369573894, + "tokens_seen": 672399360 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008042047825389184, + "loss": 0.0793, + "theoretical_loss": 3.8232946674544763, + "tokens_seen": 672661504 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008041245385973359, + "loss": 0.0805, + "theoretical_loss": 3.8231434733906458, + "tokens_seen": 672923648 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008040442946557534, + "loss": 0.0803, + "theoretical_loss": 3.822992354698911, + "tokens_seen": 673185792 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008039640507141711, + "loss": 0.0784, + "theoretical_loss": 3.822841311312368, + "tokens_seen": 673447936 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008038838067725887, + "loss": 0.0793, + "theoretical_loss": 3.8226903431642008, + "tokens_seen": 673710080 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008038035628310063, + "loss": 0.0789, + "theoretical_loss": 3.8225394501876764, + "tokens_seen": 673972224 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008037233188894239, + "loss": 0.0802, + "theoretical_loss": 3.822388632316149, + "tokens_seen": 674234368 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008036430749478415, + "loss": 0.0779, + "theoretical_loss": 3.822237889483057, + "tokens_seen": 674496512 + }, + { + "epoch": 0.2, + "learning_rate": 0.000803562831006259, + "loss": 0.0794, + "theoretical_loss": 3.822087221621923, + "tokens_seen": 674758656 + }, + { + "epoch": 0.2, + "objective/train/advantage_avg": -0.00032604276202619076, + "objective/train/docs_used": 251058, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5687280893325806, + "objective/train/original_loss": 1.5687280893325806, + "objective/train/theoretical_loss": 3.821936628666355, + "objective/train/tokens_used": 695480800, + "objective/train/value_avg": -0.006984710693359375, + "objective/train/value_loss": 0.00032798174652270973, + "objective/train/value_max": -0.0001926422119140625, + "objective/train/value_min": -0.90283203125, + "objective/train/value_reward_corr": 0.6570254406817982, + "objective/train/value_std": 0.01174163818359375, + "objective/train/weight_avg": 0.9998152256011963, + "objective/train/weighted_lm_loss": 1.568171501159668, + "objective/train/weights_max": 1.1402249336242676, + "objective/train/weights_min": 0.2320682257413864, + "theoretical_loss": 3.821936628666355, + "tokens_seen": 675020800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008034825870646766, + "loss": 0.0753, + "theoretical_loss": 3.821936628666355, + "tokens_seen": 675020800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008034023431230942, + "loss": 0.0777, + "theoretical_loss": 3.821786110550045, + "tokens_seen": 675282944 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008033220991815118, + "loss": 0.0768, + "theoretical_loss": 3.8216356672067704, + "tokens_seen": 675545088 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008032418552399294, + "loss": 0.0794, + "theoretical_loss": 3.821485298570391, + "tokens_seen": 675807232 + }, + { + "epoch": 0.2, + "learning_rate": 0.000803161611298347, + "loss": 0.0774, + "theoretical_loss": 3.8213350045748533, + "tokens_seen": 676069376 + }, + { + "epoch": 0.2, + "learning_rate": 0.0008030813673567647, + "loss": 0.0787, + "theoretical_loss": 3.821184785154186, + "tokens_seen": 676331520 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008030011234151822, + "loss": 0.0792, + "theoretical_loss": 3.8210346402425013, + "tokens_seen": 676593664 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008029208794735997, + "loss": 0.0791, + "theoretical_loss": 3.820884569773997, + "tokens_seen": 676855808 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008028406355320174, + "loss": 0.0771, + "theoretical_loss": 3.8207345736829526, + "tokens_seen": 677117952 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008027603915904349, + "loss": 0.078, + "theoretical_loss": 3.820584651903732, + "tokens_seen": 677380096 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008026801476488525, + "loss": 0.0784, + "theoretical_loss": 3.820434804370782, + "tokens_seen": 677642240 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008025999037072701, + "loss": 0.0781, + "theoretical_loss": 3.820285031018633, + "tokens_seen": 677904384 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008025196597656877, + "loss": 0.0788, + "theoretical_loss": 3.8201353317818985, + "tokens_seen": 678166528 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0017591392388567328, + "objective/train/docs_used": 252282, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.499388575553894, + "objective/train/original_loss": 1.4993884563446045, + "objective/train/theoretical_loss": 3.8200605099364005, + "objective/train/tokens_used": 698757600, + "objective/train/value_avg": -0.0076446533203125, + "objective/train/value_loss": 0.00020787572429981083, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.267333984375, + "objective/train/value_reward_corr": 0.6231772208041639, + "objective/train/value_std": 0.0118560791015625, + "objective/train/weight_avg": 1.001856803894043, + "objective/train/weighted_lm_loss": 1.5024080276489258, + "objective/train/weights_max": 1.1745542287826538, + "objective/train/weights_min": 0.3710364103317261, + "theoretical_loss": 3.8200605099364005, + "tokens_seen": 678297600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008024394158241052, + "loss": 0.0767, + "theoretical_loss": 3.819985706595274, + "tokens_seen": 678428672 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008023591718825229, + "loss": 0.0816, + "theoretical_loss": 3.8198361553935385, + "tokens_seen": 678690816 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008022789279409405, + "loss": 0.0814, + "theoretical_loss": 3.8196866781115526, + "tokens_seen": 678952960 + }, + { + "epoch": 0.21, + "learning_rate": 0.000802198683999358, + "loss": 0.0831, + "theoretical_loss": 3.8195372746842615, + "tokens_seen": 679215104 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008021184400577757, + "loss": 0.0767, + "theoretical_loss": 3.8193879450466905, + "tokens_seen": 679477248 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008020381961161932, + "loss": 0.0798, + "theoretical_loss": 3.819238689133948, + "tokens_seen": 679739392 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008019579521746109, + "loss": 0.0795, + "theoretical_loss": 3.819089506881225, + "tokens_seen": 680001536 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008018777082330284, + "loss": 0.079, + "theoretical_loss": 3.8189403982237935, + "tokens_seen": 680263680 + }, + { + "epoch": 0.21, + "learning_rate": 0.000801797464291446, + "loss": 0.0773, + "theoretical_loss": 3.818791363097008, + "tokens_seen": 680525824 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008017172203498636, + "loss": 0.0789, + "theoretical_loss": 3.818642401436304, + "tokens_seen": 680787968 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008016369764082812, + "loss": 0.0793, + "theoretical_loss": 3.8184935131771987, + "tokens_seen": 681050112 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008015567324666988, + "loss": 0.0787, + "theoretical_loss": 3.8183446982552915, + "tokens_seen": 681312256 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0013181634712964296, + "objective/train/docs_used": 253498, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5834970474243164, + "objective/train/original_loss": 1.5834970474243164, + "objective/train/theoretical_loss": 3.818195956606262, + "objective/train/tokens_used": 702034400, + "objective/train/value_avg": -0.00580596923828125, + "objective/train/value_loss": 0.00019775994587689638, + "objective/train/value_max": -0.000263214111328125, + "objective/train/value_min": -0.3359375, + "objective/train/value_reward_corr": 0.4194674562674259, + "objective/train/value_std": 0.007694244384765625, + "objective/train/weight_avg": 1.0014029741287231, + "objective/train/weighted_lm_loss": 1.5860118865966797, + "objective/train/weights_max": 1.3310092687606812, + "objective/train/weights_min": 0.3714952766895294, + "theoretical_loss": 3.818195956606262, + "tokens_seen": 681574400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008014764885251164, + "loss": 0.0807, + "theoretical_loss": 3.818195956606262, + "tokens_seen": 681574400 + }, + { + "epoch": 0.21, + "learning_rate": 0.000801396244583534, + "loss": 0.0802, + "theoretical_loss": 3.8180472881658707, + "tokens_seen": 681836544 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008013160006419515, + "loss": 0.0807, + "theoretical_loss": 3.817898692869961, + "tokens_seen": 682098688 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008012357567003692, + "loss": 0.0812, + "theoretical_loss": 3.817750170654455, + "tokens_seen": 682360832 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008011555127587867, + "loss": 0.0797, + "theoretical_loss": 3.8176017214553575, + "tokens_seen": 682622976 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008010752688172042, + "loss": 0.0803, + "theoretical_loss": 3.8174533452087513, + "tokens_seen": 682885120 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008009950248756219, + "loss": 0.0774, + "theoretical_loss": 3.817305041850802, + "tokens_seen": 683147264 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008009147809340394, + "loss": 0.0806, + "theoretical_loss": 3.817156811317754, + "tokens_seen": 683409408 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008008345369924572, + "loss": 0.0802, + "theoretical_loss": 3.8170086535459333, + "tokens_seen": 683671552 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008007542930508747, + "loss": 0.0799, + "theoretical_loss": 3.8168605684717454, + "tokens_seen": 683933696 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008006740491092923, + "loss": 0.08, + "theoretical_loss": 3.816712556031675, + "tokens_seen": 684195840 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008005938051677099, + "loss": 0.079, + "theoretical_loss": 3.816564616162287, + "tokens_seen": 684457984 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008005135612261274, + "loss": 0.078, + "theoretical_loss": 3.8164167488002265, + "tokens_seen": 684720128 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.001422036555595696, + "objective/train/docs_used": 254674, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.559423565864563, + "objective/train/original_loss": 1.5594233274459839, + "objective/train/theoretical_loss": 3.816342842289668, + "objective/train/tokens_used": 705311200, + "objective/train/value_avg": -0.006954193115234375, + "objective/train/value_loss": 0.00011249267845414579, + "objective/train/value_max": -0.0001823902130126953, + "objective/train/value_min": -0.30419921875, + "objective/train/value_reward_corr": 0.6799434767604788, + "objective/train/value_std": 0.01035308837890625, + "objective/train/weight_avg": 1.0014777183532715, + "objective/train/weighted_lm_loss": 1.5613473653793335, + "objective/train/weights_max": 1.134773850440979, + "objective/train/weights_min": 0.7942905426025391, + "theoretical_loss": 3.816342842289668, + "tokens_seen": 684851200 + }, + { + "epoch": 0.21, + "learning_rate": 0.000800433317284545, + "loss": 0.0798, + "theoretical_loss": 3.816268953882218, + "tokens_seen": 684982272 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008003530733429626, + "loss": 0.0777, + "theoretical_loss": 3.8161212313450648, + "tokens_seen": 685244416 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008002728294013802, + "loss": 0.0793, + "theoretical_loss": 3.81597358112565, + "tokens_seen": 685506560 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008001925854597977, + "loss": 0.0798, + "theoretical_loss": 3.815826003160935, + "tokens_seen": 685768704 + }, + { + "epoch": 0.21, + "learning_rate": 0.0008001123415182155, + "loss": 0.0811, + "theoretical_loss": 3.815678497387962, + "tokens_seen": 686030848 + }, + { + "epoch": 0.21, + "learning_rate": 0.000800032097576633, + "loss": 0.0788, + "theoretical_loss": 3.8155310637438506, + "tokens_seen": 686292992 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007999518536350505, + "loss": 0.079, + "theoretical_loss": 3.8153837021657995, + "tokens_seen": 686555136 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007998716096934682, + "loss": 0.0776, + "theoretical_loss": 3.8152364125910863, + "tokens_seen": 686817280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007997913657518857, + "loss": 0.0781, + "theoretical_loss": 3.8150891949570664, + "tokens_seen": 687079424 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007997111218103033, + "loss": 0.0782, + "theoretical_loss": 3.814942049201175, + "tokens_seen": 687341568 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007996308778687209, + "loss": 0.0817, + "theoretical_loss": 3.8147949752609236, + "tokens_seen": 687603712 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007995506339271385, + "loss": 0.0789, + "theoretical_loss": 3.814647973073903, + "tokens_seen": 687865856 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0019079339690506458, + "objective/train/docs_used": 255827, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5390352010726929, + "objective/train/original_loss": 1.5390353202819824, + "objective/train/theoretical_loss": 3.8145010425777826, + "objective/train/tokens_used": 708588000, + "objective/train/value_avg": -0.0116119384765625, + "objective/train/value_loss": 0.0002800696238409728, + "objective/train/value_max": -0.00017261505126953125, + "objective/train/value_min": -0.5703125, + "objective/train/value_reward_corr": 0.7645409376422845, + "objective/train/value_std": 0.020599365234375, + "objective/train/weight_avg": 1.0020438432693481, + "objective/train/weighted_lm_loss": 1.5430036783218384, + "objective/train/weights_max": 1.2020081281661987, + "objective/train/weights_min": 0.627696692943573, + "theoretical_loss": 3.8145010425777826, + "tokens_seen": 688128000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007994703899855562, + "loss": 0.0788, + "theoretical_loss": 3.8145010425777826, + "tokens_seen": 688128000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007993901460439737, + "loss": 0.0796, + "theoretical_loss": 3.814354183710308, + "tokens_seen": 688390144 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007993099021023913, + "loss": 0.0796, + "theoretical_loss": 3.8142073964093046, + "tokens_seen": 688652288 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007992296581608089, + "loss": 0.0782, + "theoretical_loss": 3.8140606806126733, + "tokens_seen": 688914432 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007991494142192265, + "loss": 0.0796, + "theoretical_loss": 3.813914036258393, + "tokens_seen": 689176576 + }, + { + "epoch": 0.21, + "learning_rate": 0.000799069170277644, + "loss": 0.0789, + "theoretical_loss": 3.813767463284522, + "tokens_seen": 689438720 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007989889263360617, + "loss": 0.0797, + "theoretical_loss": 3.8136209616291934, + "tokens_seen": 689700864 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007989086823944792, + "loss": 0.0796, + "theoretical_loss": 3.813474531230618, + "tokens_seen": 689963008 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007988284384528968, + "loss": 0.0785, + "theoretical_loss": 3.8133281720270835, + "tokens_seen": 690225152 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007987481945113144, + "loss": 0.0806, + "theoretical_loss": 3.8131818839569562, + "tokens_seen": 690487296 + }, + { + "epoch": 0.21, + "learning_rate": 0.000798667950569732, + "loss": 0.0806, + "theoretical_loss": 3.8130356669586765, + "tokens_seen": 690749440 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007985877066281496, + "loss": 0.0768, + "theoretical_loss": 3.8128895209707627, + "tokens_seen": 691011584 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007985074626865672, + "loss": 0.0819, + "theoretical_loss": 3.81274344593181, + "tokens_seen": 691273728 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.001648097182624042, + "objective/train/docs_used": 257040, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.549683690071106, + "objective/train/original_loss": 1.5496838092803955, + "objective/train/theoretical_loss": 3.812670434999026, + "objective/train/tokens_used": 711864800, + "objective/train/value_avg": -0.0057830810546875, + "objective/train/value_loss": 0.00016456494631711394, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.3056640625, + "objective/train/value_reward_corr": 0.6402803699022559, + "objective/train/value_std": 0.00890350341796875, + "objective/train/weight_avg": 1.0017253160476685, + "objective/train/weighted_lm_loss": 1.5529325008392334, + "objective/train/weights_max": 1.138139009475708, + "objective/train/weights_min": 0.368623286485672, + "theoretical_loss": 3.812670434999026, + "tokens_seen": 691404800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007984272187449848, + "loss": 0.0774, + "theoretical_loss": 3.8125974417804893, + "tokens_seen": 691535872 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007983469748034023, + "loss": 0.0767, + "theoretical_loss": 3.8124515084555477, + "tokens_seen": 691798016 + }, + { + "epoch": 0.21, + "learning_rate": 0.00079826673086182, + "loss": 0.0786, + "theoretical_loss": 3.8123056458958087, + "tokens_seen": 692060160 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007981864869202375, + "loss": 0.078, + "theoretical_loss": 3.812159854040172, + "tokens_seen": 692322304 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007981062429786551, + "loss": 0.0796, + "theoretical_loss": 3.8120141328276125, + "tokens_seen": 692584448 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007980259990370727, + "loss": 0.0813, + "theoretical_loss": 3.8118684821971813, + "tokens_seen": 692846592 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007979457550954902, + "loss": 0.0807, + "theoretical_loss": 3.8117229020880057, + "tokens_seen": 693108736 + }, + { + "epoch": 0.21, + "learning_rate": 0.000797865511153908, + "loss": 0.0804, + "theoretical_loss": 3.811577392439287, + "tokens_seen": 693370880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007977852672123255, + "loss": 0.0801, + "theoretical_loss": 3.8114319531903025, + "tokens_seen": 693633024 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007977050232707431, + "loss": 0.0807, + "theoretical_loss": 3.811286584280406, + "tokens_seen": 693895168 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007976247793291607, + "loss": 0.0799, + "theoretical_loss": 3.8111412856490245, + "tokens_seen": 694157312 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007975445353875782, + "loss": 0.08, + "theoretical_loss": 3.810996057235661, + "tokens_seen": 694419456 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0017769966507330537, + "objective/train/docs_used": 258341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6005815267562866, + "objective/train/original_loss": 1.6005815267562866, + "objective/train/theoretical_loss": 3.810850898979894, + "objective/train/tokens_used": 715141600, + "objective/train/value_avg": -0.00868988037109375, + "objective/train/value_loss": 0.00037193644675426185, + "objective/train/value_max": -0.00015234947204589844, + "objective/train/value_min": -0.72021484375, + "objective/train/value_reward_corr": 0.5291458465205756, + "objective/train/value_std": 0.0122528076171875, + "objective/train/weight_avg": 1.001926302909851, + "objective/train/weighted_lm_loss": 1.60252046585083, + "objective/train/weights_max": 1.3593815565109253, + "objective/train/weights_min": 0.23074059188365936, + "theoretical_loss": 3.810850898979894, + "tokens_seen": 694681600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007974642914459958, + "loss": 0.08, + "theoretical_loss": 3.810850898979894, + "tokens_seen": 694681600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007973840475044134, + "loss": 0.0808, + "theoretical_loss": 3.810705810821375, + "tokens_seen": 694943744 + }, + { + "epoch": 0.21, + "learning_rate": 0.000797303803562831, + "loss": 0.0795, + "theoretical_loss": 3.8105607926998326, + "tokens_seen": 695205888 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007972235596212485, + "loss": 0.0785, + "theoretical_loss": 3.810415844555067, + "tokens_seen": 695468032 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007971433156796663, + "loss": 0.0778, + "theoretical_loss": 3.8102709663269554, + "tokens_seen": 695730176 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007970630717380838, + "loss": 0.0791, + "theoretical_loss": 3.810126157955448, + "tokens_seen": 695992320 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007969828277965014, + "loss": 0.0779, + "theoretical_loss": 3.809981419380569, + "tokens_seen": 696254464 + }, + { + "epoch": 0.21, + "learning_rate": 0.000796902583854919, + "loss": 0.0797, + "theoretical_loss": 3.809836750542418, + "tokens_seen": 696516608 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007968223399133365, + "loss": 0.0803, + "theoretical_loss": 3.8096921513811663, + "tokens_seen": 696778752 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007967420959717542, + "loss": 0.0804, + "theoretical_loss": 3.809547621837061, + "tokens_seen": 697040896 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007966618520301717, + "loss": 0.0765, + "theoretical_loss": 3.809403161850423, + "tokens_seen": 697303040 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007965816080885893, + "loss": 0.0823, + "theoretical_loss": 3.8092587713616446, + "tokens_seen": 697565184 + }, + { + "epoch": 0.21, + "learning_rate": 0.000796501364147007, + "loss": 0.081, + "theoretical_loss": 3.809114450311193, + "tokens_seen": 697827328 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": -0.0008030504104681313, + "objective/train/docs_used": 259086, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6132134199142456, + "objective/train/original_loss": 1.613213300704956, + "objective/train/theoretical_loss": 3.8090423158067566, + "objective/train/tokens_used": 718418400, + "objective/train/value_avg": -0.00884246826171875, + "objective/train/value_loss": 0.00026143278228119016, + "objective/train/value_max": -0.0002033710479736328, + "objective/train/value_min": -0.25, + "objective/train/value_reward_corr": 0.8812842327841226, + "objective/train/value_std": 0.0213165283203125, + "objective/train/weight_avg": 0.9993177652359009, + "objective/train/weighted_lm_loss": 1.6114174127578735, + "objective/train/weights_max": 1.1213833093643188, + "objective/train/weights_min": 0.37042829394340515, + "theoretical_loss": 3.8090423158067566, + "tokens_seen": 697958400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007964211202054245, + "loss": 0.08, + "theoretical_loss": 3.808970198639609, + "tokens_seen": 698089472 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007963408762638421, + "loss": 0.0781, + "theoretical_loss": 3.808826016287507, + "tokens_seen": 698351616 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007962606323222597, + "loss": 0.0804, + "theoretical_loss": 3.8086819031955725, + "tokens_seen": 698613760 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007961803883806773, + "loss": 0.0787, + "theoretical_loss": 3.8085378593045665, + "tokens_seen": 698875904 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007961001444390948, + "loss": 0.0817, + "theoretical_loss": 3.8083938845553202, + "tokens_seen": 699138048 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007960199004975125, + "loss": 0.0783, + "theoretical_loss": 3.80824997888874, + "tokens_seen": 699400192 + }, + { + "epoch": 0.21, + "learning_rate": 0.00079593965655593, + "loss": 0.0807, + "theoretical_loss": 3.8081061422458036, + "tokens_seen": 699662336 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007958594126143476, + "loss": 0.0785, + "theoretical_loss": 3.8079623745675613, + "tokens_seen": 699924480 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007957791686727652, + "loss": 0.0755, + "theoretical_loss": 3.8078186757951364, + "tokens_seen": 700186624 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007956989247311828, + "loss": 0.0807, + "theoretical_loss": 3.8076750458697237, + "tokens_seen": 700448768 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007956186807896005, + "loss": 0.0789, + "theoretical_loss": 3.807531484732591, + "tokens_seen": 700710912 + }, + { + "epoch": 0.21, + "learning_rate": 0.000795538436848018, + "loss": 0.0793, + "theoretical_loss": 3.8073879923250775, + "tokens_seen": 700973056 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.0006472544046118855, + "objective/train/docs_used": 260260, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.584854006767273, + "objective/train/original_loss": 1.5848541259765625, + "objective/train/theoretical_loss": 3.8072445685885947, + "objective/train/tokens_used": 721695200, + "objective/train/value_avg": -0.007328033447265625, + "objective/train/value_loss": 0.0002117718249792233, + "objective/train/value_max": -7.784366607666016e-05, + "objective/train/value_min": -0.60693359375, + "objective/train/value_reward_corr": 0.616764286984292, + "objective/train/value_std": 0.011688232421875, + "objective/train/weight_avg": 1.000746250152588, + "objective/train/weighted_lm_loss": 1.5859571695327759, + "objective/train/weights_max": 1.590544581413269, + "objective/train/weights_min": 0.38592278957366943, + "theoretical_loss": 3.8072445685885947, + "tokens_seen": 701235200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007954581929064356, + "loss": 0.0775, + "theoretical_loss": 3.8072445685885947, + "tokens_seen": 701235200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007953779489648532, + "loss": 0.0785, + "theoretical_loss": 3.807101213464625, + "tokens_seen": 701497344 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007952977050232708, + "loss": 0.0803, + "theoretical_loss": 3.8069579268947242, + "tokens_seen": 701759488 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007952174610816883, + "loss": 0.0812, + "theoretical_loss": 3.806814708820519, + "tokens_seen": 702021632 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007951372171401059, + "loss": 0.0759, + "theoretical_loss": 3.806671559183706, + "tokens_seen": 702283776 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007950569731985235, + "loss": 0.078, + "theoretical_loss": 3.806528477926056, + "tokens_seen": 702545920 + }, + { + "epoch": 0.21, + "learning_rate": 0.000794976729256941, + "loss": 0.0811, + "theoretical_loss": 3.806385464989409, + "tokens_seen": 702808064 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007948964853153588, + "loss": 0.0808, + "theoretical_loss": 3.806242520315676, + "tokens_seen": 703070208 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007948162413737763, + "loss": 0.0788, + "theoretical_loss": 3.806099643846841, + "tokens_seen": 703332352 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007947359974321939, + "loss": 0.0792, + "theoretical_loss": 3.8059568355249564, + "tokens_seen": 703594496 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007946557534906115, + "loss": 0.0789, + "theoretical_loss": 3.8058140952921478, + "tokens_seen": 703856640 + }, + { + "epoch": 0.21, + "learning_rate": 0.000794575509549029, + "loss": 0.0796, + "theoretical_loss": 3.805671423090609, + "tokens_seen": 704118784 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007944952656074467, + "loss": 0.0781, + "theoretical_loss": 3.805528818862607, + "tokens_seen": 704380928 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.00023795383458491415, + "objective/train/docs_used": 261464, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4358924627304077, + "objective/train/original_loss": 1.4358923435211182, + "objective/train/theoretical_loss": 3.8054575422206596, + "objective/train/tokens_used": 724972000, + "objective/train/value_avg": -0.00519561767578125, + "objective/train/value_loss": 0.00011082483979407698, + "objective/train/value_max": -0.00012433528900146484, + "objective/train/value_min": -0.2100830078125, + "objective/train/value_reward_corr": 0.568821173994195, + "objective/train/value_std": 0.00617218017578125, + "objective/train/weight_avg": 1.000292420387268, + "objective/train/weighted_lm_loss": 1.4362776279449463, + "objective/train/weights_max": 1.1238564252853394, + "objective/train/weights_min": 0.7808533906936646, + "theoretical_loss": 3.8054575422206596, + "tokens_seen": 704512000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007944150216658642, + "loss": 0.078, + "theoretical_loss": 3.8053862825504776, + "tokens_seen": 704643072 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007943347777242818, + "loss": 0.0806, + "theoretical_loss": 3.8052438140966265, + "tokens_seen": 704905216 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007942545337826995, + "loss": 0.0804, + "theoretical_loss": 3.8051014134435315, + "tokens_seen": 705167360 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007941742898411171, + "loss": 0.0751, + "theoretical_loss": 3.804959080533739, + "tokens_seen": 705429504 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007940940458995346, + "loss": 0.0791, + "theoretical_loss": 3.8048168153098656, + "tokens_seen": 705691648 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007940138019579522, + "loss": 0.0787, + "theoretical_loss": 3.8046746177145985, + "tokens_seen": 705953792 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007939335580163698, + "loss": 0.0799, + "theoretical_loss": 3.804532487690694, + "tokens_seen": 706215936 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007938533140747873, + "loss": 0.0782, + "theoretical_loss": 3.8043904251809786, + "tokens_seen": 706478080 + }, + { + "epoch": 0.21, + "learning_rate": 0.000793773070133205, + "loss": 0.0796, + "theoretical_loss": 3.8042484301283475, + "tokens_seen": 706740224 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007936928261916225, + "loss": 0.0805, + "theoretical_loss": 3.8041065024757668, + "tokens_seen": 707002368 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007936125822500401, + "loss": 0.0791, + "theoretical_loss": 3.8039646421662705, + "tokens_seen": 707264512 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007935323383084578, + "loss": 0.0783, + "theoretical_loss": 3.8038228491429624, + "tokens_seen": 707526656 + }, + { + "epoch": 0.21, + "objective/train/advantage_avg": 0.00028257613303139806, + "objective/train/docs_used": 262675, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7718579769134521, + "objective/train/original_loss": 1.7718580961227417, + "objective/train/theoretical_loss": 3.8036811233490164, + "objective/train/tokens_used": 728248800, + "objective/train/value_avg": -0.01105499267578125, + "objective/train/value_loss": 0.00044336056453175843, + "objective/train/value_max": -0.0001442432403564453, + "objective/train/value_min": -0.88037109375, + "objective/train/value_reward_corr": 0.5845583896713396, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.0004814863204956, + "objective/train/weighted_lm_loss": 1.772655725479126, + "objective/train/weights_max": 1.4637386798858643, + "objective/train/weights_min": 0.3728155791759491, + "theoretical_loss": 3.8036811233490164, + "tokens_seen": 707788800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007934520943668753, + "loss": 0.0805, + "theoretical_loss": 3.8036811233490164, + "tokens_seen": 707788800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007933718504252929, + "loss": 0.0776, + "theoretical_loss": 3.803539464727673, + "tokens_seen": 708050944 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007932916064837105, + "loss": 0.0815, + "theoretical_loss": 3.803397873222244, + "tokens_seen": 708313088 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007932113625421281, + "loss": 0.0802, + "theoretical_loss": 3.8032563487761095, + "tokens_seen": 708575232 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007931311186005457, + "loss": 0.0775, + "theoretical_loss": 3.8031148913327177, + "tokens_seen": 708837376 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007930508746589633, + "loss": 0.0807, + "theoretical_loss": 3.8029735008355843, + "tokens_seen": 709099520 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007929706307173808, + "loss": 0.0802, + "theoretical_loss": 3.8028321772282965, + "tokens_seen": 709361664 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007928903867757984, + "loss": 0.079, + "theoretical_loss": 3.8026909204545065, + "tokens_seen": 709623808 + }, + { + "epoch": 0.22, + "learning_rate": 0.000792810142834216, + "loss": 0.0813, + "theoretical_loss": 3.8025497304579376, + "tokens_seen": 709885952 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007927298988926336, + "loss": 0.0786, + "theoretical_loss": 3.80240860718238, + "tokens_seen": 710148096 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007926496549510513, + "loss": 0.0829, + "theoretical_loss": 3.802267550571691, + "tokens_seen": 710410240 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007925694110094688, + "loss": 0.082, + "theoretical_loss": 3.802126560569798, + "tokens_seen": 710672384 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007924891670678864, + "loss": 0.0815, + "theoretical_loss": 3.801985637120694, + "tokens_seen": 710934528 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0011543107684701681, + "objective/train/docs_used": 263840, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6443710327148438, + "objective/train/original_loss": 1.6443710327148438, + "objective/train/theoretical_loss": 3.801915200335954, + "objective/train/tokens_used": 731525600, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.00040373342926613986, + "objective/train/value_max": -0.0002199411392211914, + "objective/train/value_min": -0.466552734375, + "objective/train/value_reward_corr": 0.6620460795564023, + "objective/train/value_std": 0.01470947265625, + "objective/train/weight_avg": 1.0013347864151, + "objective/train/weighted_lm_loss": 1.6481596231460571, + "objective/train/weights_max": 1.1715375185012817, + "objective/train/weights_min": 0.37442609667778015, + "theoretical_loss": 3.801915200335954, + "tokens_seen": 711065600 + }, + { + "epoch": 0.22, + "learning_rate": 0.000792408923126304, + "loss": 0.0788, + "theoretical_loss": 3.801844780168441, + "tokens_seen": 711196672 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007923286791847216, + "loss": 0.0816, + "theoretical_loss": 3.8017039896571685, + "tokens_seen": 711458816 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007922484352431391, + "loss": 0.0787, + "theoretical_loss": 3.8015632655310734, + "tokens_seen": 711720960 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007921681913015567, + "loss": 0.0801, + "theoretical_loss": 3.8014226077344198, + "tokens_seen": 711983104 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007920879473599743, + "loss": 0.0779, + "theoretical_loss": 3.8012820162115393, + "tokens_seen": 712245248 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007920077034183918, + "loss": 0.0772, + "theoretical_loss": 3.801141490906831, + "tokens_seen": 712507392 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007919274594768096, + "loss": 0.078, + "theoretical_loss": 3.80100103176476, + "tokens_seen": 712769536 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007918472155352271, + "loss": 0.0816, + "theoretical_loss": 3.8008606387298594, + "tokens_seen": 713031680 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007917669715936448, + "loss": 0.0767, + "theoretical_loss": 3.80072031174673, + "tokens_seen": 713293824 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007916867276520623, + "loss": 0.0804, + "theoretical_loss": 3.800580050760036, + "tokens_seen": 713555968 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007916064837104798, + "loss": 0.0764, + "theoretical_loss": 3.800439855714512, + "tokens_seen": 713818112 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007915262397688975, + "loss": 0.08, + "theoretical_loss": 3.8002997265549574, + "tokens_seen": 714080256 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.001398808672092855, + "objective/train/docs_used": 265036, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4737719297409058, + "objective/train/original_loss": 1.4737719297409058, + "objective/train/theoretical_loss": 3.8001596632262387, + "objective/train/tokens_used": 734802400, + "objective/train/value_avg": -0.009124755859375, + "objective/train/value_loss": 0.00014532258501276374, + "objective/train/value_max": -0.00020992755889892578, + "objective/train/value_min": -0.1900634765625, + "objective/train/value_reward_corr": 0.6496980006355382, + "objective/train/value_std": 0.0111541748046875, + "objective/train/weight_avg": 1.0014667510986328, + "objective/train/weighted_lm_loss": 1.4759221076965332, + "objective/train/weights_max": 1.1377222537994385, + "objective/train/weights_min": 0.3690439462661743, + "theoretical_loss": 3.8001596632262387, + "tokens_seen": 714342400 + }, + { + "epoch": 0.22, + "learning_rate": 0.000791445995827315, + "loss": 0.0801, + "theoretical_loss": 3.8001596632262387, + "tokens_seen": 714342400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007913657518857326, + "loss": 0.0779, + "theoretical_loss": 3.8000196656732874, + "tokens_seen": 714604544 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007912855079441503, + "loss": 0.078, + "theoretical_loss": 3.7998797338411032, + "tokens_seen": 714866688 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007912052640025679, + "loss": 0.0797, + "theoretical_loss": 3.7997398676747496, + "tokens_seen": 715128832 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007911250200609854, + "loss": 0.0797, + "theoretical_loss": 3.7996000671193593, + "tokens_seen": 715390976 + }, + { + "epoch": 0.22, + "learning_rate": 0.000791044776119403, + "loss": 0.0777, + "theoretical_loss": 3.7994603321201277, + "tokens_seen": 715653120 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007909645321778206, + "loss": 0.0787, + "theoretical_loss": 3.7993206626223177, + "tokens_seen": 715915264 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007908842882362381, + "loss": 0.0789, + "theoretical_loss": 3.799181058571258, + "tokens_seen": 716177408 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007908040442946558, + "loss": 0.0757, + "theoretical_loss": 3.7990415199123424, + "tokens_seen": 716439552 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007907238003530733, + "loss": 0.0809, + "theoretical_loss": 3.79890204659103, + "tokens_seen": 716701696 + }, + { + "epoch": 0.22, + "learning_rate": 0.000790643556411491, + "loss": 0.0783, + "theoretical_loss": 3.7987626385528466, + "tokens_seen": 716963840 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007905633124699085, + "loss": 0.0818, + "theoretical_loss": 3.798623295743382, + "tokens_seen": 717225984 + }, + { + "epoch": 0.22, + "learning_rate": 0.000790483068528326, + "loss": 0.0818, + "theoretical_loss": 3.798484018108291, + "tokens_seen": 717488128 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0011304336367174983, + "objective/train/docs_used": 266305, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5863089561462402, + "objective/train/original_loss": 1.5863087177276611, + "objective/train/theoretical_loss": 3.7984144037141716, + "objective/train/tokens_used": 738079200, + "objective/train/value_avg": -0.0092620849609375, + "objective/train/value_loss": 0.0005252858973108232, + "objective/train/value_max": -0.0002434253692626953, + "objective/train/value_min": -0.962890625, + "objective/train/value_reward_corr": 0.61228862282039, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.0013542175292969, + "objective/train/weighted_lm_loss": 1.5876915454864502, + "objective/train/weights_max": 1.5222142934799194, + "objective/train/weights_min": 0.23608994483947754, + "theoretical_loss": 3.7984144037141716, + "tokens_seen": 717619200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007904028245867438, + "loss": 0.0783, + "theoretical_loss": 3.7983448055932953, + "tokens_seen": 717750272 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007903225806451613, + "loss": 0.0822, + "theoretical_loss": 3.79820565814418, + "tokens_seen": 718012416 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007902423367035789, + "loss": 0.0786, + "theoretical_loss": 3.798066575706795, + "tokens_seen": 718274560 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007901620927619965, + "loss": 0.0779, + "theoretical_loss": 3.797927558227056, + "tokens_seen": 718536704 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007900818488204141, + "loss": 0.0771, + "theoretical_loss": 3.7977886056509433, + "tokens_seen": 718798848 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007900016048788316, + "loss": 0.0781, + "theoretical_loss": 3.797649717924502, + "tokens_seen": 719060992 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007899213609372492, + "loss": 0.0791, + "theoretical_loss": 3.797510894993839, + "tokens_seen": 719323136 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007898411169956668, + "loss": 0.0783, + "theoretical_loss": 3.79737213680513, + "tokens_seen": 719585280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007897608730540844, + "loss": 0.0791, + "theoretical_loss": 3.797233443304612, + "tokens_seen": 719847424 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007896806291125021, + "loss": 0.0805, + "theoretical_loss": 3.7970948144385868, + "tokens_seen": 720109568 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007896003851709196, + "loss": 0.0764, + "theoretical_loss": 3.796956250153421, + "tokens_seen": 720371712 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007895201412293372, + "loss": 0.0797, + "theoretical_loss": 3.796817750395544, + "tokens_seen": 720633856 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0022220478858798742, + "objective/train/docs_used": 267624, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4708417654037476, + "objective/train/original_loss": 1.4708420038223267, + "objective/train/theoretical_loss": 3.7966793151114504, + "objective/train/tokens_used": 741356000, + "objective/train/value_avg": -0.0073699951171875, + "objective/train/value_loss": 0.0003509256348479539, + "objective/train/value_max": -0.0001838207244873047, + "objective/train/value_min": -0.91015625, + "objective/train/value_reward_corr": 0.5371351152106856, + "objective/train/value_std": 0.0122222900390625, + "objective/train/weight_avg": 1.0023695230484009, + "objective/train/weighted_lm_loss": 1.4751582145690918, + "objective/train/weights_max": 1.2902494668960571, + "objective/train/weights_min": 0.27010083198547363, + "theoretical_loss": 3.7966793151114504, + "tokens_seen": 720896000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007894398972877548, + "loss": 0.0768, + "theoretical_loss": 3.7966793151114504, + "tokens_seen": 720896000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007893596533461724, + "loss": 0.0768, + "theoretical_loss": 3.796540944247698, + "tokens_seen": 721158144 + }, + { + "epoch": 0.22, + "learning_rate": 0.00078927940940459, + "loss": 0.0783, + "theoretical_loss": 3.796402637750908, + "tokens_seen": 721420288 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007891991654630075, + "loss": 0.0809, + "theoretical_loss": 3.796264395567766, + "tokens_seen": 721682432 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007891189215214251, + "loss": 0.0788, + "theoretical_loss": 3.7961262176450195, + "tokens_seen": 721944576 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007890386775798428, + "loss": 0.08, + "theoretical_loss": 3.795988103929482, + "tokens_seen": 722206720 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007889584336382604, + "loss": 0.0762, + "theoretical_loss": 3.7958500543680276, + "tokens_seen": 722468864 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007888781896966779, + "loss": 0.079, + "theoretical_loss": 3.795712068907596, + "tokens_seen": 722731008 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007887979457550956, + "loss": 0.0825, + "theoretical_loss": 3.795574147495188, + "tokens_seen": 722993152 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007887177018135131, + "loss": 0.0807, + "theoretical_loss": 3.795436290077868, + "tokens_seen": 723255296 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007886374578719306, + "loss": 0.081, + "theoretical_loss": 3.795298496602765, + "tokens_seen": 723517440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007885572139303483, + "loss": 0.0778, + "theoretical_loss": 3.795160767017068, + "tokens_seen": 723779584 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007884769699887658, + "loss": 0.0787, + "theoretical_loss": 3.795023101268031, + "tokens_seen": 724041728 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0011378737399354577, + "objective/train/docs_used": 268786, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.588625431060791, + "objective/train/original_loss": 1.588625431060791, + "objective/train/theoretical_loss": 3.7949542923157935, + "objective/train/tokens_used": 744632800, + "objective/train/value_avg": -0.007007598876953125, + "objective/train/value_loss": 0.00012135379802202806, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.343505859375, + "objective/train/value_reward_corr": 0.6047769086501988, + "objective/train/value_std": 0.00875091552734375, + "objective/train/weight_avg": 1.0011974573135376, + "objective/train/weighted_lm_loss": 1.5900264978408813, + "objective/train/weights_max": 1.1416176557540894, + "objective/train/weights_min": 0.7288485765457153, + "theoretical_loss": 3.7949542923157935, + "tokens_seen": 724172800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007883967260471834, + "loss": 0.0827, + "theoretical_loss": 3.7948854993029695, + "tokens_seen": 724303872 + }, + { + "epoch": 0.22, + "learning_rate": 0.000788316482105601, + "loss": 0.0767, + "theoretical_loss": 3.7947479610692616, + "tokens_seen": 724566016 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007882362381640187, + "loss": 0.0784, + "theoretical_loss": 3.794610486514348, + "tokens_seen": 724828160 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007881559942224363, + "loss": 0.0788, + "theoretical_loss": 3.7944730755857323, + "tokens_seen": 725090304 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007880757502808538, + "loss": 0.0773, + "theoretical_loss": 3.794335728230979, + "tokens_seen": 725352448 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007879955063392714, + "loss": 0.078, + "theoretical_loss": 3.7941984443977157, + "tokens_seen": 725614592 + }, + { + "epoch": 0.22, + "learning_rate": 0.000787915262397689, + "loss": 0.0762, + "theoretical_loss": 3.7940612240336327, + "tokens_seen": 725876736 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007878350184561066, + "loss": 0.0786, + "theoretical_loss": 3.793924067086481, + "tokens_seen": 726138880 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007877547745145241, + "loss": 0.0781, + "theoretical_loss": 3.793786973504073, + "tokens_seen": 726401024 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007876745305729418, + "loss": 0.0772, + "theoretical_loss": 3.7936499432342847, + "tokens_seen": 726663168 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007875942866313593, + "loss": 0.0782, + "theoretical_loss": 3.7935129762250526, + "tokens_seen": 726925312 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007875140426897769, + "loss": 0.0813, + "theoretical_loss": 3.7933760724243752, + "tokens_seen": 727187456 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0019741442520171404, + "objective/train/docs_used": 269904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5811312198638916, + "objective/train/original_loss": 1.5811312198638916, + "objective/train/theoretical_loss": 3.7932392317803116, + "objective/train/tokens_used": 747909600, + "objective/train/value_avg": -0.006404876708984375, + "objective/train/value_loss": 0.00015449750935658813, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.30126953125, + "objective/train/value_reward_corr": 0.5738474794753151, + "objective/train/value_std": 0.00901031494140625, + "objective/train/weight_avg": 1.002044916152954, + "objective/train/weighted_lm_loss": 1.5848934650421143, + "objective/train/weights_max": 1.2912355661392212, + "objective/train/weights_min": 0.36876535415649414, + "theoretical_loss": 3.7932392317803116, + "tokens_seen": 727449600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007874337987481946, + "loss": 0.0798, + "theoretical_loss": 3.7932392317803116, + "tokens_seen": 727449600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007873535548066121, + "loss": 0.0773, + "theoretical_loss": 3.7931024542409837, + "tokens_seen": 727711744 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007872733108650297, + "loss": 0.0784, + "theoretical_loss": 3.7929657397545733, + "tokens_seen": 727973888 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007871930669234473, + "loss": 0.0767, + "theoretical_loss": 3.792829088269324, + "tokens_seen": 728236032 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007871128229818649, + "loss": 0.077, + "theoretical_loss": 3.792692499733541, + "tokens_seen": 728498176 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007870325790402824, + "loss": 0.0785, + "theoretical_loss": 3.7925559740955896, + "tokens_seen": 728760320 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007869523350987, + "loss": 0.0775, + "theoretical_loss": 3.7924195113038968, + "tokens_seen": 729022464 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007868720911571176, + "loss": 0.0788, + "theoretical_loss": 3.7922831113069493, + "tokens_seen": 729284608 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007867918472155353, + "loss": 0.0762, + "theoretical_loss": 3.792146774053296, + "tokens_seen": 729546752 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007867116032739529, + "loss": 0.077, + "theoretical_loss": 3.792010499491545, + "tokens_seen": 729808896 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007866313593323704, + "loss": 0.0754, + "theoretical_loss": 3.7918742875703657, + "tokens_seen": 730071040 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007865511153907881, + "loss": 0.0763, + "theoretical_loss": 3.7917381382384883, + "tokens_seen": 730333184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007864708714492056, + "loss": 0.0769, + "theoretical_loss": 3.791602051444703, + "tokens_seen": 730595328 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0019119989592581987, + "objective/train/docs_used": 271086, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5188298225402832, + "objective/train/original_loss": 1.5188297033309937, + "objective/train/theoretical_loss": 3.7915340314836077, + "objective/train/tokens_used": 751186400, + "objective/train/value_avg": -0.00708770751953125, + "objective/train/value_loss": 0.00019582056847866625, + "objective/train/value_max": -0.00018966197967529297, + "objective/train/value_min": -0.70556640625, + "objective/train/value_reward_corr": 0.6300847957470939, + "objective/train/value_std": 0.0132293701171875, + "objective/train/weight_avg": 1.0020005702972412, + "objective/train/weighted_lm_loss": 1.5218416452407837, + "objective/train/weights_max": 2.00211238861084, + "objective/train/weights_min": 0.2432425171136856, + "theoretical_loss": 3.7915340314836077, + "tokens_seen": 730726400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007863906275076232, + "loss": 0.0753, + "theoretical_loss": 3.791466027137859, + "tokens_seen": 730857472 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007863103835660408, + "loss": 0.0783, + "theoretical_loss": 3.7913300652668678, + "tokens_seen": 731119616 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007862301396244583, + "loss": 0.0779, + "theoretical_loss": 3.7911941657807002, + "tokens_seen": 731381760 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007861498956828759, + "loss": 0.0754, + "theoretical_loss": 3.7910583286283854, + "tokens_seen": 731643904 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007860696517412936, + "loss": 0.0786, + "theoretical_loss": 3.7909225537590157, + "tokens_seen": 731906048 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007859894077997112, + "loss": 0.0766, + "theoretical_loss": 3.790786841121739, + "tokens_seen": 732168192 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007859091638581287, + "loss": 0.0767, + "theoretical_loss": 3.7906511906657676, + "tokens_seen": 732430336 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007858289199165464, + "loss": 0.0766, + "theoretical_loss": 3.7905156023403697, + "tokens_seen": 732692480 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007857486759749639, + "loss": 0.0781, + "theoretical_loss": 3.7903800760948743, + "tokens_seen": 732954624 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007856684320333814, + "loss": 0.0778, + "theoretical_loss": 3.790244611878671, + "tokens_seen": 733216768 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007855881880917991, + "loss": 0.078, + "theoretical_loss": 3.790109209641206, + "tokens_seen": 733478912 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007855079441502166, + "loss": 0.0769, + "theoretical_loss": 3.7899738693319875, + "tokens_seen": 733741056 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0013161771930754185, + "objective/train/docs_used": 272255, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.631644606590271, + "objective/train/original_loss": 1.6316447257995605, + "objective/train/theoretical_loss": 3.7898385909005814, + "objective/train/tokens_used": 754463200, + "objective/train/value_avg": -0.006435394287109375, + "objective/train/value_loss": 0.0001979928492801264, + "objective/train/value_max": -0.00013446807861328125, + "objective/train/value_min": -0.2325439453125, + "objective/train/value_reward_corr": 0.5276408179266261, + "objective/train/value_std": 0.00835418701171875, + "objective/train/weight_avg": 1.0014052391052246, + "objective/train/weighted_lm_loss": 1.6336127519607544, + "objective/train/weights_max": 1.1831884384155273, + "objective/train/weights_min": 0.3687118589878082, + "theoretical_loss": 3.7898385909005814, + "tokens_seen": 734003200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007854277002086343, + "loss": 0.0802, + "theoretical_loss": 3.7898385909005814, + "tokens_seen": 734003200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007853474562670519, + "loss": 0.0776, + "theoretical_loss": 3.7897033742966135, + "tokens_seen": 734265344 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007852672123254695, + "loss": 0.0776, + "theoretical_loss": 3.789568219469767, + "tokens_seen": 734527488 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007851869683838871, + "loss": 0.08, + "theoretical_loss": 3.789433126369786, + "tokens_seen": 734789632 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007851067244423046, + "loss": 0.0767, + "theoretical_loss": 3.7892980949464716, + "tokens_seen": 735051776 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007850264805007222, + "loss": 0.0781, + "theoretical_loss": 3.7891631251496856, + "tokens_seen": 735313920 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007849462365591398, + "loss": 0.0775, + "theoretical_loss": 3.7890282169293465, + "tokens_seen": 735576064 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007848659926175574, + "loss": 0.0763, + "theoretical_loss": 3.7888933702354324, + "tokens_seen": 735838208 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007847857486759749, + "loss": 0.0796, + "theoretical_loss": 3.7887585850179786, + "tokens_seen": 736100352 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007847055047343926, + "loss": 0.0786, + "theoretical_loss": 3.788623861227081, + "tokens_seen": 736362496 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007846252607928101, + "loss": 0.0752, + "theoretical_loss": 3.7884891988128926, + "tokens_seen": 736624640 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007845450168512277, + "loss": 0.0782, + "theoretical_loss": 3.7883545977256228, + "tokens_seen": 736886784 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007844647729096454, + "loss": 0.0781, + "theoretical_loss": 3.7882200579155416, + "tokens_seen": 737148928 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0006363503634929657, + "objective/train/docs_used": 273341, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6099265813827515, + "objective/train/original_loss": 1.609926462173462, + "objective/train/theoretical_loss": 3.788152810973922, + "objective/train/tokens_used": 757740000, + "objective/train/value_avg": -0.0074005126953125, + "objective/train/value_loss": 0.00028310378547757864, + "objective/train/value_max": -0.0001596212387084961, + "objective/train/value_min": -0.5478515625, + "objective/train/value_reward_corr": 0.5414502644341082, + "objective/train/value_std": 0.0122528076171875, + "objective/train/weight_avg": 1.000755786895752, + "objective/train/weighted_lm_loss": 1.6105283498764038, + "objective/train/weights_max": 1.28411865234375, + "objective/train/weights_min": 0.3686028718948364, + "theoretical_loss": 3.788152810973922, + "tokens_seen": 737280000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007843845289680629, + "loss": 0.0792, + "theoretical_loss": 3.788085579332977, + "tokens_seen": 737411072 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007843042850264806, + "loss": 0.0773, + "theoretical_loss": 3.787951161928312, + "tokens_seen": 737673216 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007842240410848981, + "loss": 0.0786, + "theoretical_loss": 3.7878168056519916, + "tokens_seen": 737935360 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007841437971433157, + "loss": 0.0767, + "theoretical_loss": 3.787682510454515, + "tokens_seen": 738197504 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007840635532017333, + "loss": 0.0779, + "theoretical_loss": 3.7875482762864405, + "tokens_seen": 738459648 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007839833092601508, + "loss": 0.0761, + "theoretical_loss": 3.787414103098384, + "tokens_seen": 738721792 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007839030653185684, + "loss": 0.0768, + "theoretical_loss": 3.7872799908410193, + "tokens_seen": 738983936 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007838228213769861, + "loss": 0.0765, + "theoretical_loss": 3.787145939465076, + "tokens_seen": 739246080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007837425774354037, + "loss": 0.0773, + "theoretical_loss": 3.7870119489213425, + "tokens_seen": 739508224 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007836623334938212, + "loss": 0.0795, + "theoretical_loss": 3.786878019160664, + "tokens_seen": 739770368 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007835820895522389, + "loss": 0.0763, + "theoretical_loss": 3.7867441501339427, + "tokens_seen": 740032512 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007835018456106564, + "loss": 0.0783, + "theoretical_loss": 3.7866103417921373, + "tokens_seen": 740294656 + }, + { + "epoch": 0.22, + "objective/train/advantage_avg": 0.0014227567007765174, + "objective/train/docs_used": 274540, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.675157070159912, + "objective/train/original_loss": 1.6751569509506226, + "objective/train/theoretical_loss": 3.786476594086265, + "objective/train/tokens_used": 761016800, + "objective/train/value_avg": -0.00887298583984375, + "objective/train/value_loss": 0.0004343730106484145, + "objective/train/value_max": -0.00012731552124023438, + "objective/train/value_min": -0.580078125, + "objective/train/value_reward_corr": 0.5689424667790532, + "objective/train/value_std": 0.0146942138671875, + "objective/train/weight_avg": 1.0016205310821533, + "objective/train/weighted_lm_loss": 1.6775554418563843, + "objective/train/weights_max": 1.65679132938385, + "objective/train/weights_min": 0.3746204078197479, + "theoretical_loss": 3.786476594086265, + "tokens_seen": 740556800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007834216016690739, + "loss": 0.0818, + "theoretical_loss": 3.786476594086265, + "tokens_seen": 740556800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007833413577274916, + "loss": 0.0761, + "theoretical_loss": 3.7863429069673984, + "tokens_seen": 740818944 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007832611137859091, + "loss": 0.0774, + "theoretical_loss": 3.7862092803866663, + "tokens_seen": 741081088 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007831808698443267, + "loss": 0.0745, + "theoretical_loss": 3.786075714295257, + "tokens_seen": 741343232 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007831006259027444, + "loss": 0.0797, + "theoretical_loss": 3.7859422086444123, + "tokens_seen": 741605376 + }, + { + "epoch": 0.22, + "learning_rate": 0.000783020381961162, + "loss": 0.078, + "theoretical_loss": 3.7858087633854325, + "tokens_seen": 741867520 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007829401380195796, + "loss": 0.0772, + "theoretical_loss": 3.785675378469673, + "tokens_seen": 742129664 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007828598940779972, + "loss": 0.0772, + "theoretical_loss": 3.7855420538485474, + "tokens_seen": 742391808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007827796501364147, + "loss": 0.0787, + "theoretical_loss": 3.7854087894735233, + "tokens_seen": 742653952 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007826994061948323, + "loss": 0.0751, + "theoretical_loss": 3.7852755852961257, + "tokens_seen": 742916096 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007826191622532499, + "loss": 0.0791, + "theoretical_loss": 3.785142441267936, + "tokens_seen": 743178240 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007825389183116674, + "loss": 0.0778, + "theoretical_loss": 3.7850093573405905, + "tokens_seen": 743440384 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007824586743700851, + "loss": 0.076, + "theoretical_loss": 3.7848763334657827, + "tokens_seen": 743702528 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0006977500161156058, + "objective/train/docs_used": 275779, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6511344909667969, + "objective/train/original_loss": 1.6511344909667969, + "objective/train/theoretical_loss": 3.784809844033, + "objective/train/tokens_used": 764293600, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.0002167196071241051, + "objective/train/value_max": -0.00013768672943115234, + "objective/train/value_min": -0.63427734375, + "objective/train/value_reward_corr": 0.6324548991472896, + "objective/train/value_std": 0.01303863525390625, + "objective/train/weight_avg": 1.0008044242858887, + "objective/train/weighted_lm_loss": 1.6532789468765259, + "objective/train/weights_max": 1.774770736694336, + "objective/train/weights_min": 0.6139143705368042, + "theoretical_loss": 3.784809844033, + "tokens_seen": 743833600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007823784304285027, + "loss": 0.0807, + "theoretical_loss": 3.7847433695952617, + "tokens_seen": 743964672 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007822981864869203, + "loss": 0.0785, + "theoretical_loss": 3.7846104656808306, + "tokens_seen": 744226816 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007822179425453379, + "loss": 0.0759, + "theoretical_loss": 3.7844776216743505, + "tokens_seen": 744488960 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007821376986037554, + "loss": 0.0772, + "theoretical_loss": 3.784344837527737, + "tokens_seen": 744751104 + }, + { + "epoch": 0.23, + "learning_rate": 0.000782057454662173, + "loss": 0.0761, + "theoretical_loss": 3.784212113192961, + "tokens_seen": 745013248 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007819772107205906, + "loss": 0.0775, + "theoretical_loss": 3.7840794486220495, + "tokens_seen": 745275392 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007818969667790082, + "loss": 0.078, + "theoretical_loss": 3.783946843767084, + "tokens_seen": 745537536 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007818167228374257, + "loss": 0.0786, + "theoretical_loss": 3.783814298580203, + "tokens_seen": 745799680 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007817364788958434, + "loss": 0.0774, + "theoretical_loss": 3.7836818130135974, + "tokens_seen": 746061824 + }, + { + "epoch": 0.23, + "learning_rate": 0.000781656234954261, + "loss": 0.0759, + "theoretical_loss": 3.783549387019515, + "tokens_seen": 746323968 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007815759910126786, + "loss": 0.0781, + "theoretical_loss": 3.7834170205502584, + "tokens_seen": 746586112 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007814957470710962, + "loss": 0.0767, + "theoretical_loss": 3.783284713558186, + "tokens_seen": 746848256 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0004192414053250104, + "objective/train/docs_used": 277055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5548205375671387, + "objective/train/original_loss": 1.5548205375671387, + "objective/train/theoretical_loss": 3.783152465995708, + "objective/train/tokens_used": 767570400, + "objective/train/value_avg": -0.0106964111328125, + "objective/train/value_loss": 0.0003498530713841319, + "objective/train/value_max": -0.0001881122589111328, + "objective/train/value_min": -0.37255859375, + "objective/train/value_reward_corr": 0.7141878653817549, + "objective/train/value_std": 0.01702880859375, + "objective/train/weight_avg": 1.0005861520767212, + "objective/train/weighted_lm_loss": 1.5560520887374878, + "objective/train/weights_max": 1.2006479501724243, + "objective/train/weights_min": 0.36900386214256287, + "theoretical_loss": 3.783152465995708, + "tokens_seen": 747110400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007814155031295137, + "loss": 0.0792, + "theoretical_loss": 3.783152465995708, + "tokens_seen": 747110400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007813352591879314, + "loss": 0.0773, + "theoretical_loss": 3.7830202778152935, + "tokens_seen": 747372544 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007812550152463489, + "loss": 0.0751, + "theoretical_loss": 3.7828881489694632, + "tokens_seen": 747634688 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007811747713047665, + "loss": 0.0757, + "theoretical_loss": 3.7827560794107926, + "tokens_seen": 747896832 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007810945273631841, + "loss": 0.0767, + "theoretical_loss": 3.7826240690919137, + "tokens_seen": 748158976 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007810142834216016, + "loss": 0.0792, + "theoretical_loss": 3.7824921179655115, + "tokens_seen": 748421120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007809340394800192, + "loss": 0.0775, + "theoretical_loss": 3.782360225984325, + "tokens_seen": 748683264 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007808537955384369, + "loss": 0.0771, + "theoretical_loss": 3.782228393101149, + "tokens_seen": 748945408 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007807735515968545, + "loss": 0.0775, + "theoretical_loss": 3.78209661926883, + "tokens_seen": 749207552 + }, + { + "epoch": 0.23, + "learning_rate": 0.000780693307655272, + "loss": 0.0774, + "theoretical_loss": 3.781964904440271, + "tokens_seen": 749469696 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007806130637136897, + "loss": 0.0796, + "theoretical_loss": 3.7818332485684283, + "tokens_seen": 749731840 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007805328197721072, + "loss": 0.0772, + "theoretical_loss": 3.781701651606311, + "tokens_seen": 749993984 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007804525758305248, + "loss": 0.0795, + "theoretical_loss": 3.7815701135069846, + "tokens_seen": 750256128 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.002483784221112728, + "objective/train/docs_used": 278273, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4896938800811768, + "objective/train/original_loss": 1.4896938800811768, + "objective/train/theoretical_loss": 3.7815043665162147, + "objective/train/tokens_used": 770847200, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.00018718333740253001, + "objective/train/value_max": -0.00016736984252929688, + "objective/train/value_min": -0.252685546875, + "objective/train/value_reward_corr": 0.7167813323322847, + "objective/train/value_std": 0.014862060546875, + "objective/train/weight_avg": 1.002573013305664, + "objective/train/weighted_lm_loss": 1.4927213191986084, + "objective/train/weights_max": 1.2056413888931274, + "objective/train/weights_min": 0.3711751401424408, + "theoretical_loss": 3.7815043665162147, + "tokens_seen": 750387200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007803723318889424, + "loss": 0.0767, + "theoretical_loss": 3.7814386342235653, + "tokens_seen": 750518272 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007802920879473599, + "loss": 0.0727, + "theoretical_loss": 3.7813072137092254, + "tokens_seen": 750780416 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007802118440057776, + "loss": 0.0752, + "theoretical_loss": 3.7811758519171894, + "tokens_seen": 751042560 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007801316000641952, + "loss": 0.079, + "theoretical_loss": 3.781044548800736, + "tokens_seen": 751304704 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007800513561226128, + "loss": 0.0795, + "theoretical_loss": 3.7809133043131973, + "tokens_seen": 751566848 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007799711121810304, + "loss": 0.0786, + "theoretical_loss": 3.7807821184079584, + "tokens_seen": 751828992 + }, + { + "epoch": 0.23, + "learning_rate": 0.000779890868239448, + "loss": 0.0776, + "theoretical_loss": 3.780650991038459, + "tokens_seen": 752091136 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007798106242978655, + "loss": 0.0775, + "theoretical_loss": 3.7805199221581893, + "tokens_seen": 752353280 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007797303803562831, + "loss": 0.0797, + "theoretical_loss": 3.7803889117206957, + "tokens_seen": 752615424 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007796501364147007, + "loss": 0.079, + "theoretical_loss": 3.7802579596795756, + "tokens_seen": 752877568 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007795698924731182, + "loss": 0.0755, + "theoretical_loss": 3.7801270659884807, + "tokens_seen": 753139712 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007794896485315359, + "loss": 0.078, + "theoretical_loss": 3.7799962306011143, + "tokens_seen": 753401856 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0004187932063359767, + "objective/train/docs_used": 279492, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5806915760040283, + "objective/train/original_loss": 1.5806913375854492, + "objective/train/theoretical_loss": 3.779865453471234, + "objective/train/tokens_used": 774124000, + "objective/train/value_avg": -0.01265716552734375, + "objective/train/value_loss": 0.00038256868720054626, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.55712890625, + "objective/train/value_reward_corr": 0.7937497238851523, + "objective/train/value_std": 0.0217437744140625, + "objective/train/weight_avg": 1.0005953311920166, + "objective/train/weighted_lm_loss": 1.5813276767730713, + "objective/train/weights_max": 1.2988438606262207, + "objective/train/weights_min": 0.3966088891029358, + "theoretical_loss": 3.779865453471234, + "tokens_seen": 753664000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007794094045899534, + "loss": 0.0791, + "theoretical_loss": 3.779865453471234, + "tokens_seen": 753664000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007793291606483711, + "loss": 0.0777, + "theoretical_loss": 3.7797347345526484, + "tokens_seen": 753926144 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007792489167067887, + "loss": 0.0777, + "theoretical_loss": 3.7796040737992205, + "tokens_seen": 754188288 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007791686727652062, + "loss": 0.0784, + "theoretical_loss": 3.7794734711648648, + "tokens_seen": 754450432 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790884288236239, + "loss": 0.0792, + "theoretical_loss": 3.779342926603549, + "tokens_seen": 754712576 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790081848820414, + "loss": 0.0769, + "theoretical_loss": 3.7792124400692924, + "tokens_seen": 754974720 + }, + { + "epoch": 0.23, + "learning_rate": 0.000778927940940459, + "loss": 0.0763, + "theoretical_loss": 3.7790820115161674, + "tokens_seen": 755236864 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007788476969988766, + "loss": 0.0792, + "theoretical_loss": 3.778951640898298, + "tokens_seen": 755499008 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007787674530572942, + "loss": 0.0781, + "theoretical_loss": 3.7788213281698617, + "tokens_seen": 755761152 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007786872091157117, + "loss": 0.0759, + "theoretical_loss": 3.778691073285086, + "tokens_seen": 756023296 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007786069651741294, + "loss": 0.0796, + "theoretical_loss": 3.7785608761982523, + "tokens_seen": 756285440 + }, + { + "epoch": 0.23, + "learning_rate": 0.000778526721232547, + "loss": 0.0765, + "theoretical_loss": 3.7784307368636934, + "tokens_seen": 756547584 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007784464772909645, + "loss": 0.0756, + "theoretical_loss": 3.7783006552357934, + "tokens_seen": 756809728 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0009907586500048637, + "objective/train/docs_used": 280659, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6565080881118774, + "objective/train/original_loss": 1.6565080881118774, + "objective/train/theoretical_loss": 3.7782356360476, + "objective/train/tokens_used": 777400800, + "objective/train/value_avg": -0.006893157958984375, + "objective/train/value_loss": 0.00026105440338142216, + "objective/train/value_max": -9.316205978393555e-05, + "objective/train/value_min": -0.25146484375, + "objective/train/value_reward_corr": 0.6604508877653548, + "objective/train/value_std": 0.011566162109375, + "objective/train/weight_avg": 1.0011118650436401, + "objective/train/weighted_lm_loss": 1.6582515239715576, + "objective/train/weights_max": 1.2315527200698853, + "objective/train/weights_min": 0.3999635875225067, + "theoretical_loss": 3.7782356360476, + "tokens_seen": 756940800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007783662333493822, + "loss": 0.0793, + "theoretical_loss": 3.7781706312689893, + "tokens_seen": 757071872 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007782859894077997, + "loss": 0.078, + "theoretical_loss": 3.7780406649177696, + "tokens_seen": 757334016 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007782057454662173, + "loss": 0.0774, + "theoretical_loss": 3.777910756136673, + "tokens_seen": 757596160 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007781255015246349, + "loss": 0.0779, + "theoretical_loss": 3.777780904880292, + "tokens_seen": 757858304 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007780452575830524, + "loss": 0.0792, + "theoretical_loss": 3.777651111103269, + "tokens_seen": 758120448 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007779650136414702, + "loss": 0.0768, + "theoretical_loss": 3.777521374760298, + "tokens_seen": 758382592 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007778847696998877, + "loss": 0.0795, + "theoretical_loss": 3.7773916958061253, + "tokens_seen": 758644736 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007778045257583053, + "loss": 0.0812, + "theoretical_loss": 3.777262074195548, + "tokens_seen": 758906880 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007777242818167229, + "loss": 0.0791, + "theoretical_loss": 3.777132509883413, + "tokens_seen": 759169024 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007776440378751405, + "loss": 0.0789, + "theoretical_loss": 3.7770030028246215, + "tokens_seen": 759431168 + }, + { + "epoch": 0.23, + "learning_rate": 0.000777563793933558, + "loss": 0.078, + "theoretical_loss": 3.7768735529741226, + "tokens_seen": 759693312 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007774835499919756, + "loss": 0.0757, + "theoretical_loss": 3.776744160286918, + "tokens_seen": 759955456 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.0008638743311166763, + "objective/train/docs_used": 281753, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.592660665512085, + "objective/train/original_loss": 1.592660665512085, + "objective/train/theoretical_loss": 3.77661482471806, + "objective/train/tokens_used": 780677600, + "objective/train/value_avg": -0.0077362060546875, + "objective/train/value_loss": 0.0008141609723679721, + "objective/train/value_max": -8.416175842285156e-05, + "objective/train/value_min": -0.5966796875, + "objective/train/value_reward_corr": 0.6082159732147249, + "objective/train/value_std": 0.015960693359375, + "objective/train/weight_avg": 0.9994794726371765, + "objective/train/weighted_lm_loss": 1.5920522212982178, + "objective/train/weights_max": 1.4571453332901, + "objective/train/weights_min": 0.26885083317756653, + "theoretical_loss": 3.77661482471806, + "tokens_seen": 760217600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007774033060503932, + "loss": 0.0807, + "theoretical_loss": 3.77661482471806, + "tokens_seen": 760217600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007773230621088107, + "loss": 0.0799, + "theoretical_loss": 3.776485546222651, + "tokens_seen": 760479744 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007772428181672284, + "loss": 0.0785, + "theoretical_loss": 3.776356324755847, + "tokens_seen": 760741888 + }, + { + "epoch": 0.23, + "learning_rate": 0.000777162574225646, + "loss": 0.0768, + "theoretical_loss": 3.7762271602728497, + "tokens_seen": 761004032 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007770823302840636, + "loss": 0.0786, + "theoretical_loss": 3.7760980527289156, + "tokens_seen": 761266176 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007770020863424812, + "loss": 0.0756, + "theoretical_loss": 3.77596900207935, + "tokens_seen": 761528320 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007769218424008987, + "loss": 0.0763, + "theoretical_loss": 3.775840008279509, + "tokens_seen": 761790464 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007768415984593163, + "loss": 0.0788, + "theoretical_loss": 3.7757110712847997, + "tokens_seen": 762052608 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007767613545177339, + "loss": 0.0792, + "theoretical_loss": 3.775582191050678, + "tokens_seen": 762314752 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007766811105761515, + "loss": 0.0783, + "theoretical_loss": 3.775453367532651, + "tokens_seen": 762576896 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007766008666345691, + "loss": 0.0783, + "theoretical_loss": 3.775324600686276, + "tokens_seen": 762839040 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007765206226929867, + "loss": 0.0776, + "theoretical_loss": 3.7751958904671614, + "tokens_seen": 763101184 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007764403787514042, + "loss": 0.0786, + "theoretical_loss": 3.7750672368309623, + "tokens_seen": 763363328 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0011741745984181762, + "objective/train/docs_used": 282838, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.454331398010254, + "objective/train/original_loss": 1.454331398010254, + "objective/train/theoretical_loss": 3.775002931217613, + "objective/train/tokens_used": 783954400, + "objective/train/value_avg": -0.00695037841796875, + "objective/train/value_loss": 0.0001637246459722519, + "objective/train/value_max": -6.657838821411133e-05, + "objective/train/value_min": -0.275146484375, + "objective/train/value_reward_corr": 0.6056801602468862, + "objective/train/value_std": 0.01113128662109375, + "objective/train/weight_avg": 1.001247763633728, + "objective/train/weighted_lm_loss": 1.45583176612854, + "objective/train/weights_max": 1.1704035997390747, + "objective/train/weights_min": 0.3682715594768524, + "theoretical_loss": 3.775002931217613, + "tokens_seen": 763494400 + }, + { + "epoch": 0.23, + "learning_rate": 0.000776360134809822, + "loss": 0.0812, + "theoretical_loss": 3.7749386397333873, + "tokens_seen": 763625472 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007762798908682395, + "loss": 0.0781, + "theoretical_loss": 3.774810099130193, + "tokens_seen": 763887616 + }, + { + "epoch": 0.23, + "learning_rate": 0.000776199646926657, + "loss": 0.0786, + "theoretical_loss": 3.7746816149771862, + "tokens_seen": 764149760 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007761194029850747, + "loss": 0.0774, + "theoretical_loss": 3.7745531872302234, + "tokens_seen": 764411904 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007760391590434922, + "loss": 0.0782, + "theoretical_loss": 3.774424815845211, + "tokens_seen": 764674048 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007759589151019098, + "loss": 0.0781, + "theoretical_loss": 3.774296500778105, + "tokens_seen": 764936192 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007758786711603274, + "loss": 0.0784, + "theoretical_loss": 3.77416824198491, + "tokens_seen": 765198336 + }, + { + "epoch": 0.23, + "learning_rate": 0.000775798427218745, + "loss": 0.0764, + "theoretical_loss": 3.7740400394216813, + "tokens_seen": 765460480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007757181832771625, + "loss": 0.0811, + "theoretical_loss": 3.7739118930445223, + "tokens_seen": 765722624 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007756379393355802, + "loss": 0.0799, + "theoretical_loss": 3.7737838028095867, + "tokens_seen": 765984768 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007755576953939978, + "loss": 0.0772, + "theoretical_loss": 3.773655768673077, + "tokens_seen": 766246912 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007754774514524153, + "loss": 0.0806, + "theoretical_loss": 3.7735277905912445, + "tokens_seen": 766509056 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": -0.0006871665827929974, + "objective/train/docs_used": 283988, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5568292140960693, + "objective/train/original_loss": 1.5568292140960693, + "objective/train/theoretical_loss": 3.773399868520391, + "objective/train/tokens_used": 787231200, + "objective/train/value_avg": -0.0089263916015625, + "objective/train/value_loss": 0.000296746235108003, + "objective/train/value_max": -0.00017535686492919922, + "objective/train/value_min": -0.2440185546875, + "objective/train/value_reward_corr": 0.6491560262447342, + "objective/train/value_std": 0.01305389404296875, + "objective/train/weight_avg": 0.9994507431983948, + "objective/train/weighted_lm_loss": 1.5556931495666504, + "objective/train/weights_max": 1.159029483795166, + "objective/train/weights_min": 0.37362140417099, + "theoretical_loss": 3.773399868520391, + "tokens_seen": 766771200 + }, + { + "epoch": 0.23, + "learning_rate": 0.000775397207510833, + "loss": 0.0784, + "theoretical_loss": 3.773399868520391, + "tokens_seen": 766771200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007753169635692505, + "loss": 0.0784, + "theoretical_loss": 3.7732720024168644, + "tokens_seen": 767033344 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007752367196276682, + "loss": 0.0773, + "theoretical_loss": 3.773144192237065, + "tokens_seen": 767295488 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007751564756860857, + "loss": 0.0783, + "theoretical_loss": 3.7730164379374402, + "tokens_seen": 767557632 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007750762317445032, + "loss": 0.0777, + "theoretical_loss": 3.772888739474485, + "tokens_seen": 767819776 + }, + { + "epoch": 0.23, + "learning_rate": 0.000774995987802921, + "loss": 0.0769, + "theoretical_loss": 3.772761096804745, + "tokens_seen": 768081920 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007749157438613385, + "loss": 0.0777, + "theoretical_loss": 3.7726335098848143, + "tokens_seen": 768344064 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007748354999197561, + "loss": 0.0775, + "theoretical_loss": 3.772505978671334, + "tokens_seen": 768606208 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007747552559781737, + "loss": 0.0812, + "theoretical_loss": 3.772378503120996, + "tokens_seen": 768868352 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007746750120365913, + "loss": 0.0787, + "theoretical_loss": 3.7722510831905387, + "tokens_seen": 769130496 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007745947680950088, + "loss": 0.0793, + "theoretical_loss": 3.7721237188367494, + "tokens_seen": 769392640 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007745145241534264, + "loss": 0.0775, + "theoretical_loss": 3.771996410016464, + "tokens_seen": 769654784 + }, + { + "epoch": 0.23, + "learning_rate": 0.000774434280211844, + "loss": 0.0759, + "theoretical_loss": 3.7718691566865665, + "tokens_seen": 769916928 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.001441980479285121, + "objective/train/docs_used": 285055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.580965518951416, + "objective/train/original_loss": 1.580965518951416, + "objective/train/theoretical_loss": 3.7718055508170525, + "objective/train/tokens_used": 790508000, + "objective/train/value_avg": -0.00901031494140625, + "objective/train/value_loss": 0.00021419981203507632, + "objective/train/value_max": -0.00022876262664794922, + "objective/train/value_min": -0.2083740234375, + "objective/train/value_reward_corr": 0.6527358181867582, + "objective/train/value_std": 0.0116729736328125, + "objective/train/weight_avg": 1.0015414953231812, + "objective/train/weighted_lm_loss": 1.5829206705093384, + "objective/train/weights_max": 1.149869441986084, + "objective/train/weights_min": 0.4065597653388977, + "theoretical_loss": 3.7718055508170525, + "tokens_seen": 770048000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007743540362702615, + "loss": 0.0782, + "theoretical_loss": 3.7717419588039887, + "tokens_seen": 770179072 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007742737923286792, + "loss": 0.078, + "theoretical_loss": 3.7716148163257115, + "tokens_seen": 770441216 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007741935483870968, + "loss": 0.0749, + "theoretical_loss": 3.7714877292087623, + "tokens_seen": 770703360 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007741133044455145, + "loss": 0.0774, + "theoretical_loss": 3.7713606974102167, + "tokens_seen": 770965504 + }, + { + "epoch": 0.23, + "learning_rate": 0.000774033060503932, + "loss": 0.0808, + "theoretical_loss": 3.7712337208872, + "tokens_seen": 771227648 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007739528165623495, + "loss": 0.0765, + "theoretical_loss": 3.7711067995968826, + "tokens_seen": 771489792 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007738725726207672, + "loss": 0.0798, + "theoretical_loss": 3.770979933496485, + "tokens_seen": 771751936 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007737923286791847, + "loss": 0.0771, + "theoretical_loss": 3.770853122543274, + "tokens_seen": 772014080 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007737120847376023, + "loss": 0.0752, + "theoretical_loss": 3.770726366694564, + "tokens_seen": 772276224 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007736318407960199, + "loss": 0.0788, + "theoretical_loss": 3.7705996659077172, + "tokens_seen": 772538368 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007735515968544375, + "loss": 0.0766, + "theoretical_loss": 3.770473020140143, + "tokens_seen": 772800512 + }, + { + "epoch": 0.23, + "learning_rate": 0.000773471352912855, + "loss": 0.0817, + "theoretical_loss": 3.770346429349299, + "tokens_seen": 773062656 + }, + { + "epoch": 0.23, + "objective/train/advantage_avg": 0.0003448604547884315, + "objective/train/docs_used": 286255, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4529708623886108, + "objective/train/original_loss": 1.4529707431793213, + "objective/train/theoretical_loss": 3.7702198934926896, + "objective/train/tokens_used": 793784800, + "objective/train/value_avg": -0.009613037109375, + "objective/train/value_loss": 0.000516877043992281, + "objective/train/value_max": -0.0001233816146850586, + "objective/train/value_min": -0.37451171875, + "objective/train/value_reward_corr": 0.5522317075530438, + "objective/train/value_std": 0.016510009765625, + "objective/train/weight_avg": 1.0005626678466797, + "objective/train/weighted_lm_loss": 1.4538027048110962, + "objective/train/weights_max": 1.3192977905273438, + "objective/train/weights_min": 0.36989736557006836, + "theoretical_loss": 3.7702198934926896, + "tokens_seen": 773324800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007733911089712728, + "loss": 0.0771, + "theoretical_loss": 3.7702198934926896, + "tokens_seen": 773324800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007733108650296903, + "loss": 0.0767, + "theoretical_loss": 3.7700934125278653, + "tokens_seen": 773586944 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007732306210881078, + "loss": 0.0805, + "theoretical_loss": 3.7699669864124266, + "tokens_seen": 773849088 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007731503771465255, + "loss": 0.0776, + "theoretical_loss": 3.769840615104018, + "tokens_seen": 774111232 + }, + { + "epoch": 0.23, + "learning_rate": 0.000773070133204943, + "loss": 0.0808, + "theoretical_loss": 3.7697142985603325, + "tokens_seen": 774373376 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007729898892633606, + "loss": 0.0797, + "theoretical_loss": 3.76958803673911, + "tokens_seen": 774635520 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007729096453217782, + "loss": 0.08, + "theoretical_loss": 3.7694618295981375, + "tokens_seen": 774897664 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007728294013801958, + "loss": 0.0779, + "theoretical_loss": 3.769335677095248, + "tokens_seen": 775159808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007727491574386135, + "loss": 0.0805, + "theoretical_loss": 3.769209579188323, + "tokens_seen": 775421952 + }, + { + "epoch": 0.24, + "learning_rate": 0.000772668913497031, + "loss": 0.078, + "theoretical_loss": 3.7690835358352883, + "tokens_seen": 775684096 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007725886695554486, + "loss": 0.0805, + "theoretical_loss": 3.7689575469941183, + "tokens_seen": 775946240 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007725084256138662, + "loss": 0.0791, + "theoretical_loss": 3.768831612622833, + "tokens_seen": 776208384 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007724281816722838, + "loss": 0.0774, + "theoretical_loss": 3.7687057326794986, + "tokens_seen": 776470528 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0015384306898340583, + "objective/train/docs_used": 287303, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5866420269012451, + "objective/train/original_loss": 1.5866421461105347, + "objective/train/theoretical_loss": 3.768642813105222, + "objective/train/tokens_used": 797061600, + "objective/train/value_avg": -0.006908416748046875, + "objective/train/value_loss": 0.00025792099768295884, + "objective/train/value_max": -9.995698928833008e-05, + "objective/train/value_min": -0.2237548828125, + "objective/train/value_reward_corr": 0.42940502937583597, + "objective/train/value_std": 0.0088653564453125, + "objective/train/weight_avg": 1.0016469955444336, + "objective/train/weighted_lm_loss": 1.5896674394607544, + "objective/train/weights_max": 1.1481863260269165, + "objective/train/weights_min": 0.3687625229358673, + "theoretical_loss": 3.768642813105222, + "tokens_seen": 776601600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007723479377307013, + "loss": 0.0805, + "theoretical_loss": 3.768579907122229, + "tokens_seen": 776732672 + }, + { + "epoch": 0.24, + "learning_rate": 0.000772267693789119, + "loss": 0.078, + "theoretical_loss": 3.768454135909183, + "tokens_seen": 776994816 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007721874498475365, + "loss": 0.078, + "theoretical_loss": 3.768328418998567, + "tokens_seen": 777256960 + }, + { + "epoch": 0.24, + "learning_rate": 0.000772107205905954, + "loss": 0.0778, + "theoretical_loss": 3.7682027563486327, + "tokens_seen": 777519104 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007720269619643718, + "loss": 0.0806, + "theoretical_loss": 3.768077147917678, + "tokens_seen": 777781248 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007719467180227893, + "loss": 0.0772, + "theoretical_loss": 3.7679515936640477, + "tokens_seen": 778043392 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007718664740812069, + "loss": 0.0758, + "theoretical_loss": 3.7678260935461316, + "tokens_seen": 778305536 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007717862301396245, + "loss": 0.0741, + "theoretical_loss": 3.767700647522366, + "tokens_seen": 778567680 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007717059861980421, + "loss": 0.0735, + "theoretical_loss": 3.7675752555512334, + "tokens_seen": 778829824 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007716257422564597, + "loss": 0.0783, + "theoretical_loss": 3.7674499175912617, + "tokens_seen": 779091968 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007715454983148772, + "loss": 0.0776, + "theoretical_loss": 3.767324633601024, + "tokens_seen": 779354112 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007714652543732948, + "loss": 0.0788, + "theoretical_loss": 3.7671994035391405, + "tokens_seen": 779616256 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0009066300117410719, + "objective/train/docs_used": 288522, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5356212854385376, + "objective/train/original_loss": 1.535621166229248, + "objective/train/theoretical_loss": 3.767074227364275, + "objective/train/tokens_used": 800338400, + "objective/train/value_avg": -0.005023956298828125, + "objective/train/value_loss": 0.0001440069027012214, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.1871337890625, + "objective/train/value_reward_corr": 0.5712434670773441, + "objective/train/value_std": 0.006877899169921875, + "objective/train/weight_avg": 1.0009735822677612, + "objective/train/weighted_lm_loss": 1.5371370315551758, + "objective/train/weights_max": 1.0837024450302124, + "objective/train/weights_min": 0.3689109683036804, + "theoretical_loss": 3.767074227364275, + "tokens_seen": 779878400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007713850104317124, + "loss": 0.0805, + "theoretical_loss": 3.767074227364275, + "tokens_seen": 779878400 + }, + { + "epoch": 0.24, + "learning_rate": 0.00077130476649013, + "loss": 0.0804, + "theoretical_loss": 3.7669491050351396, + "tokens_seen": 780140544 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007712245225485476, + "loss": 0.0793, + "theoretical_loss": 3.7668240365104895, + "tokens_seen": 780402688 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007711442786069653, + "loss": 0.08, + "theoretical_loss": 3.7666990217491265, + "tokens_seen": 780664832 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007710640346653828, + "loss": 0.0778, + "theoretical_loss": 3.7665740607098974, + "tokens_seen": 780926976 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007709837907238003, + "loss": 0.0771, + "theoretical_loss": 3.7664491533516946, + "tokens_seen": 781189120 + }, + { + "epoch": 0.24, + "learning_rate": 0.000770903546782218, + "loss": 0.0773, + "theoretical_loss": 3.766324299633455, + "tokens_seen": 781451264 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007708233028406355, + "loss": 0.0787, + "theoretical_loss": 3.766199499514162, + "tokens_seen": 781713408 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007707430588990531, + "loss": 0.0755, + "theoretical_loss": 3.7660747529528424, + "tokens_seen": 781975552 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007706628149574707, + "loss": 0.079, + "theoretical_loss": 3.76595005990857, + "tokens_seen": 782237696 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007705825710158883, + "loss": 0.0752, + "theoretical_loss": 3.7658254203404615, + "tokens_seen": 782499840 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007705023270743058, + "loss": 0.0773, + "theoretical_loss": 3.7657008342076796, + "tokens_seen": 782761984 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007704220831327236, + "loss": 0.0761, + "theoretical_loss": 3.765576301469433, + "tokens_seen": 783024128 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0012172440765425563, + "objective/train/docs_used": 289672, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6650699377059937, + "objective/train/original_loss": 1.6650700569152832, + "objective/train/theoretical_loss": 3.7655140551105246, + "objective/train/tokens_used": 803615200, + "objective/train/value_avg": -0.0089111328125, + "objective/train/value_loss": 0.000253047764999792, + "objective/train/value_max": -0.00019872188568115234, + "objective/train/value_min": -0.36279296875, + "objective/train/value_reward_corr": 0.6359651215028251, + "objective/train/value_std": 0.0158538818359375, + "objective/train/weight_avg": 1.0013352632522583, + "objective/train/weighted_lm_loss": 1.6672289371490479, + "objective/train/weights_max": 1.2659153938293457, + "objective/train/weights_min": 0.3686739206314087, + "theoretical_loss": 3.7655140551105246, + "tokens_seen": 783155200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007703418391911411, + "loss": 0.0775, + "theoretical_loss": 3.7654518220849726, + "tokens_seen": 783286272 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007702615952495587, + "loss": 0.0769, + "theoretical_loss": 3.7653273960135962, + "tokens_seen": 783548416 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007701813513079763, + "loss": 0.081, + "theoretical_loss": 3.765203023214645, + "tokens_seen": 783810560 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007701011073663938, + "loss": 0.0755, + "theoretical_loss": 3.7650787036475055, + "tokens_seen": 784072704 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007700208634248115, + "loss": 0.0768, + "theoretical_loss": 3.764954437271608, + "tokens_seen": 784334848 + }, + { + "epoch": 0.24, + "learning_rate": 0.000769940619483229, + "loss": 0.0718, + "theoretical_loss": 3.7648302240464284, + "tokens_seen": 784596992 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007698603755416466, + "loss": 0.0764, + "theoretical_loss": 3.764706063931486, + "tokens_seen": 784859136 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007697801316000643, + "loss": 0.0786, + "theoretical_loss": 3.764581956886345, + "tokens_seen": 785121280 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007696998876584818, + "loss": 0.0778, + "theoretical_loss": 3.7644579028706135, + "tokens_seen": 785383424 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007696196437168994, + "loss": 0.0759, + "theoretical_loss": 3.764333901843944, + "tokens_seen": 785645568 + }, + { + "epoch": 0.24, + "learning_rate": 0.000769539399775317, + "loss": 0.0775, + "theoretical_loss": 3.764209953766033, + "tokens_seen": 785907712 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007694591558337346, + "loss": 0.0755, + "theoretical_loss": 3.7640860585966207, + "tokens_seen": 786169856 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0007303394959308207, + "objective/train/docs_used": 290810, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4556148052215576, + "objective/train/original_loss": 1.4556148052215576, + "objective/train/theoretical_loss": 3.763962216295493, + "objective/train/tokens_used": 806892000, + "objective/train/value_avg": -0.0081787109375, + "objective/train/value_loss": 0.00028056377777829766, + "objective/train/value_max": -0.0001881122589111328, + "objective/train/value_min": -0.37158203125, + "objective/train/value_reward_corr": 0.6735432986137638, + "objective/train/value_std": 0.01500701904296875, + "objective/train/weight_avg": 1.0008543729782104, + "objective/train/weighted_lm_loss": 1.4567698240280151, + "objective/train/weights_max": 1.129281997680664, + "objective/train/weights_min": 0.36916786432266235, + "theoretical_loss": 3.763962216295493, + "tokens_seen": 786432000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007693789118921521, + "loss": 0.0757, + "theoretical_loss": 3.763962216295493, + "tokens_seen": 786432000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007692986679505698, + "loss": 0.0762, + "theoretical_loss": 3.7638384268224776, + "tokens_seen": 786694144 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007692184240089873, + "loss": 0.0775, + "theoretical_loss": 3.7637146901374474, + "tokens_seen": 786956288 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007691381800674048, + "loss": 0.0788, + "theoretical_loss": 3.7635910062003193, + "tokens_seen": 787218432 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007690579361258225, + "loss": 0.0768, + "theoretical_loss": 3.7634673749710523, + "tokens_seen": 787480576 + }, + { + "epoch": 0.24, + "learning_rate": 0.00076897769218424, + "loss": 0.0755, + "theoretical_loss": 3.763343796409651, + "tokens_seen": 787742720 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007688974482426578, + "loss": 0.0761, + "theoretical_loss": 3.7632202704761637, + "tokens_seen": 788004864 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007688172043010753, + "loss": 0.0757, + "theoretical_loss": 3.7630967971306797, + "tokens_seen": 788267008 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007687369603594929, + "loss": 0.0768, + "theoretical_loss": 3.762973376333335, + "tokens_seen": 788529152 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007686567164179105, + "loss": 0.0777, + "theoretical_loss": 3.7628500080443077, + "tokens_seen": 788791296 + }, + { + "epoch": 0.24, + "learning_rate": 0.000768576472476328, + "loss": 0.0771, + "theoretical_loss": 3.7627266922238185, + "tokens_seen": 789053440 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007684962285347456, + "loss": 0.0769, + "theoretical_loss": 3.762603428832133, + "tokens_seen": 789315584 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007684159845931632, + "loss": 0.0751, + "theoretical_loss": 3.7624802178295584, + "tokens_seen": 789577728 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0018347672885283828, + "objective/train/docs_used": 292000, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7165638208389282, + "objective/train/original_loss": 1.7165637016296387, + "objective/train/theoretical_loss": 3.762418631961796, + "objective/train/tokens_used": 810168800, + "objective/train/value_avg": -0.00826263427734375, + "objective/train/value_loss": 9.183614747598767e-05, + "objective/train/value_max": -0.00014090538024902344, + "objective/train/value_min": -0.263671875, + "objective/train/value_reward_corr": 0.8270739313759928, + "objective/train/value_std": 0.01273345947265625, + "objective/train/weight_avg": 1.0018802881240845, + "objective/train/weighted_lm_loss": 1.7201040983200073, + "objective/train/weights_max": 1.1363343000411987, + "objective/train/weights_min": 0.8306474089622498, + "theoretical_loss": 3.762418631961796, + "tokens_seen": 789708800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007683357406515808, + "loss": 0.078, + "theoretical_loss": 3.762357059176447, + "tokens_seen": 789839872 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007682554967099984, + "loss": 0.0776, + "theoretical_loss": 3.762233952833193, + "tokens_seen": 790102016 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007681752527684161, + "loss": 0.0799, + "theoretical_loss": 3.7621108987602336, + "tokens_seen": 790364160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007680950088268336, + "loss": 0.0766, + "theoretical_loss": 3.76198789691805, + "tokens_seen": 790626304 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007680147648852511, + "loss": 0.0798, + "theoretical_loss": 3.7618649472671652, + "tokens_seen": 790888448 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007679345209436688, + "loss": 0.0783, + "theoretical_loss": 3.761742049768146, + "tokens_seen": 791150592 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007678542770020863, + "loss": 0.0789, + "theoretical_loss": 3.761619204381602, + "tokens_seen": 791412736 + }, + { + "epoch": 0.24, + "learning_rate": 0.000767774033060504, + "loss": 0.0753, + "theoretical_loss": 3.7614964110681846, + "tokens_seen": 791674880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007676937891189215, + "loss": 0.0771, + "theoretical_loss": 3.761373669788589, + "tokens_seen": 791937024 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007676135451773391, + "loss": 0.0766, + "theoretical_loss": 3.7612509805035526, + "tokens_seen": 792199168 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007675333012357568, + "loss": 0.0751, + "theoretical_loss": 3.761128343173856, + "tokens_seen": 792461312 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007674530572941743, + "loss": 0.0757, + "theoretical_loss": 3.7610057577603215, + "tokens_seen": 792723456 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.00022212705516722053, + "objective/train/docs_used": 293128, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.473114252090454, + "objective/train/original_loss": 1.473114252090454, + "objective/train/theoretical_loss": 3.7608832242238144, + "objective/train/tokens_used": 813445600, + "objective/train/value_avg": -0.0098419189453125, + "objective/train/value_loss": 0.00027089385548606515, + "objective/train/value_max": -0.00020182132720947266, + "objective/train/value_min": -0.5673828125, + "objective/train/value_reward_corr": 0.6538193866153705, + "objective/train/value_std": 0.01425933837890625, + "objective/train/weight_avg": 1.0003442764282227, + "objective/train/weighted_lm_loss": 1.473488211631775, + "objective/train/weights_max": 1.16512930393219, + "objective/train/weights_min": 0.36934465169906616, + "theoretical_loss": 3.7608832242238144, + "tokens_seen": 792985600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007673728133525919, + "loss": 0.0782, + "theoretical_loss": 3.7608832242238144, + "tokens_seen": 792985600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007672925694110095, + "loss": 0.0779, + "theoretical_loss": 3.7607607425252416, + "tokens_seen": 793247744 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007672123254694271, + "loss": 0.079, + "theoretical_loss": 3.7606383126255536, + "tokens_seen": 793509888 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007671320815278446, + "loss": 0.0794, + "theoretical_loss": 3.760515934485743, + "tokens_seen": 793772032 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007670518375862623, + "loss": 0.0777, + "theoretical_loss": 3.760393608066843, + "tokens_seen": 794034176 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007669715936446798, + "loss": 0.076, + "theoretical_loss": 3.760271333329932, + "tokens_seen": 794296320 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007668913497030974, + "loss": 0.078, + "theoretical_loss": 3.7601491102361275, + "tokens_seen": 794558464 + }, + { + "epoch": 0.24, + "learning_rate": 0.000766811105761515, + "loss": 0.0777, + "theoretical_loss": 3.7600269387465914, + "tokens_seen": 794820608 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007667308618199326, + "loss": 0.0781, + "theoretical_loss": 3.759904818822525, + "tokens_seen": 795082752 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007666506178783502, + "loss": 0.0789, + "theoretical_loss": 3.759782750425175, + "tokens_seen": 795344896 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007665703739367678, + "loss": 0.076, + "theoretical_loss": 3.759660733515826, + "tokens_seen": 795607040 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007664901299951854, + "loss": 0.0767, + "theoretical_loss": 3.7595387680558088, + "tokens_seen": 795869184 + }, + { + "epoch": 0.24, + "learning_rate": 0.000766409886053603, + "loss": 0.0792, + "theoretical_loss": 3.759416854006492, + "tokens_seen": 796131328 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.001882580341771245, + "objective/train/docs_used": 294439, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6294305324554443, + "objective/train/original_loss": 1.6294307708740234, + "objective/train/theoretical_loss": 3.7593559162487864, + "objective/train/tokens_used": 816722400, + "objective/train/value_avg": -0.00921630859375, + "objective/train/value_loss": 0.00032186240423470736, + "objective/train/value_max": -0.00012636184692382812, + "objective/train/value_min": -0.79052734375, + "objective/train/value_reward_corr": 0.6324796724873081, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.0020285844802856, + "objective/train/weighted_lm_loss": 1.6332080364227295, + "objective/train/weights_max": 1.9955464601516724, + "objective/train/weights_min": 0.39590752124786377, + "theoretical_loss": 3.7593559162487864, + "tokens_seen": 796262400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007663296421120206, + "loss": 0.0795, + "theoretical_loss": 3.7592949913292886, + "tokens_seen": 796393472 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007662493981704381, + "loss": 0.0787, + "theoretical_loss": 3.759173179985652, + "tokens_seen": 796655616 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007661691542288557, + "loss": 0.0749, + "theoretical_loss": 3.7590514199370775, + "tokens_seen": 796917760 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007660889102872733, + "loss": 0.0737, + "theoretical_loss": 3.758929711145101, + "tokens_seen": 797179904 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007660086663456909, + "loss": 0.0756, + "theoretical_loss": 3.758808053571302, + "tokens_seen": 797442048 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007659284224041086, + "loss": 0.0805, + "theoretical_loss": 3.7586864471772996, + "tokens_seen": 797704192 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007658481784625261, + "loss": 0.0772, + "theoretical_loss": 3.758564891924755, + "tokens_seen": 797966336 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007657679345209437, + "loss": 0.0794, + "theoretical_loss": 3.758443387775371, + "tokens_seen": 798228480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007656876905793613, + "loss": 0.0764, + "theoretical_loss": 3.7583219346908905, + "tokens_seen": 798490624 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007656074466377788, + "loss": 0.0772, + "theoretical_loss": 3.758200532633099, + "tokens_seen": 798752768 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007655272026961964, + "loss": 0.0768, + "theoretical_loss": 3.7580791815638213, + "tokens_seen": 799014912 + }, + { + "epoch": 0.24, + "learning_rate": 0.000765446958754614, + "loss": 0.0785, + "theoretical_loss": 3.7579578814449253, + "tokens_seen": 799277056 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.0015031647635623813, + "objective/train/docs_used": 295724, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.440466046333313, + "objective/train/original_loss": 1.4404661655426025, + "objective/train/theoretical_loss": 3.7578366322383188, + "objective/train/tokens_used": 819999200, + "objective/train/value_avg": -0.005229949951171875, + "objective/train/value_loss": 0.00010367185313953087, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.90234375, + "objective/train/value_reward_corr": 0.7276015339903671, + "objective/train/value_std": 0.01129150390625, + "objective/train/weight_avg": 1.0015531778335571, + "objective/train/weighted_lm_loss": 1.4429603815078735, + "objective/train/weights_max": 1.3412903547286987, + "objective/train/weights_min": 0.5442660450935364, + "theoretical_loss": 3.7578366322383188, + "tokens_seen": 799539200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007653667148130316, + "loss": 0.0781, + "theoretical_loss": 3.7578366322383188, + "tokens_seen": 799539200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007652864708714491, + "loss": 0.0763, + "theoretical_loss": 3.7577154339059504, + "tokens_seen": 799801344 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007652062269298669, + "loss": 0.077, + "theoretical_loss": 3.7575942864098106, + "tokens_seen": 800063488 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007651259829882844, + "loss": 0.0758, + "theoretical_loss": 3.75747318971193, + "tokens_seen": 800325632 + }, + { + "epoch": 0.24, + "learning_rate": 0.000765045739046702, + "loss": 0.0757, + "theoretical_loss": 3.7573521437743795, + "tokens_seen": 800587776 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007649654951051196, + "loss": 0.0775, + "theoretical_loss": 3.7572311485592715, + "tokens_seen": 800849920 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007648852511635371, + "loss": 0.0755, + "theoretical_loss": 3.7571102040287596, + "tokens_seen": 801112064 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007648050072219548, + "loss": 0.0804, + "theoretical_loss": 3.7569893101450367, + "tokens_seen": 801374208 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007647247632803723, + "loss": 0.0805, + "theoretical_loss": 3.756868466870337, + "tokens_seen": 801636352 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007646445193387899, + "loss": 0.0782, + "theoretical_loss": 3.7567476741669346, + "tokens_seen": 801898496 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007645642753972076, + "loss": 0.078, + "theoretical_loss": 3.756626931997145, + "tokens_seen": 802160640 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007644840314556251, + "loss": 0.0746, + "theoretical_loss": 3.7565062403233234, + "tokens_seen": 802422784 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007644037875140427, + "loss": 0.0743, + "theoretical_loss": 3.7563855991078654, + "tokens_seen": 802684928 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": -0.0006523468182422221, + "objective/train/docs_used": 296852, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5035277605056763, + "objective/train/original_loss": 1.5035279989242554, + "objective/train/theoretical_loss": 3.7563252974102825, + "objective/train/tokens_used": 823276000, + "objective/train/value_avg": -0.00974273681640625, + "objective/train/value_loss": 0.00043232380994595587, + "objective/train/value_max": -0.0001823902130126953, + "objective/train/value_min": -0.93505859375, + "objective/train/value_reward_corr": 0.643231403068373, + "objective/train/value_std": 0.01873779296875, + "objective/train/weight_avg": 0.9995439648628235, + "objective/train/weighted_lm_loss": 1.5028263330459595, + "objective/train/weights_max": 2.1485886573791504, + "objective/train/weights_min": 0.3683769106864929, + "theoretical_loss": 3.7563252974102825, + "tokens_seen": 802816000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007643235435724603, + "loss": 0.0728, + "theoretical_loss": 3.7562650083132074, + "tokens_seen": 802947072 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007642432996308779, + "loss": 0.0785, + "theoretical_loss": 3.756144467901825, + "tokens_seen": 803209216 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007641630556892954, + "loss": 0.0779, + "theoretical_loss": 3.756023977836235, + "tokens_seen": 803471360 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007640828117477131, + "loss": 0.0768, + "theoretical_loss": 3.755903538078994, + "tokens_seen": 803733504 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007640025678061306, + "loss": 0.0804, + "theoretical_loss": 3.7557831485926982, + "tokens_seen": 803995648 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007639223238645483, + "loss": 0.0787, + "theoretical_loss": 3.7556628093399835, + "tokens_seen": 804257792 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007638420799229659, + "loss": 0.0803, + "theoretical_loss": 3.7555425202835275, + "tokens_seen": 804519936 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007637618359813834, + "loss": 0.0738, + "theoretical_loss": 3.7554222813860463, + "tokens_seen": 804782080 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007636815920398011, + "loss": 0.0771, + "theoretical_loss": 3.7553020926102954, + "tokens_seen": 805044224 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007636013480982186, + "loss": 0.0785, + "theoretical_loss": 3.755181953919071, + "tokens_seen": 805306368 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007635211041566362, + "loss": 0.0768, + "theoretical_loss": 3.755061865275209, + "tokens_seen": 805568512 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007634408602150538, + "loss": 0.078, + "theoretical_loss": 3.754941826641584, + "tokens_seen": 805830656 + }, + { + "epoch": 0.24, + "objective/train/advantage_avg": 0.001965285511687398, + "objective/train/docs_used": 297892, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5463677644729614, + "objective/train/original_loss": 1.546367883682251, + "objective/train/theoretical_loss": 3.754821837981112, + "objective/train/tokens_used": 826552800, + "objective/train/value_avg": -0.005489349365234375, + "objective/train/value_loss": 7.032585563138127e-05, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.2242431640625, + "objective/train/value_reward_corr": 0.6211484433775776, + "objective/train/value_std": 0.007572174072265625, + "objective/train/weight_avg": 1.0019999742507935, + "objective/train/weighted_lm_loss": 1.5493464469909668, + "objective/train/weights_max": 1.1479060649871826, + "objective/train/weights_min": 0.7884321212768555, + "theoretical_loss": 3.754821837981112, + "tokens_seen": 806092800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007633606162734714, + "loss": 0.0756, + "theoretical_loss": 3.754821837981112, + "tokens_seen": 806092800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007632803723318889, + "loss": 0.0761, + "theoretical_loss": 3.7547018992567462, + "tokens_seen": 806354944 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007632001283903065, + "loss": 0.0792, + "theoretical_loss": 3.7545820104314815, + "tokens_seen": 806617088 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007631198844487241, + "loss": 0.0745, + "theoretical_loss": 3.7544621714683517, + "tokens_seen": 806879232 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007630396405071417, + "loss": 0.0765, + "theoretical_loss": 3.754342382330428, + "tokens_seen": 807141376 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007629593965655594, + "loss": 0.0749, + "theoretical_loss": 3.7542226429808236, + "tokens_seen": 807403520 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007628791526239769, + "loss": 0.0744, + "theoretical_loss": 3.7541029533826893, + "tokens_seen": 807665664 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007627989086823945, + "loss": 0.0755, + "theoretical_loss": 3.7539833134992158, + "tokens_seen": 807927808 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007627186647408121, + "loss": 0.0784, + "theoretical_loss": 3.753863723293634, + "tokens_seen": 808189952 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007626384207992296, + "loss": 0.0768, + "theoretical_loss": 3.7537441827292106, + "tokens_seen": 808452096 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007625581768576473, + "loss": 0.0773, + "theoretical_loss": 3.753624691769255, + "tokens_seen": 808714240 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007624779329160648, + "loss": 0.0793, + "theoretical_loss": 3.7535052503771142, + "tokens_seen": 808976384 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007623976889744824, + "loss": 0.0757, + "theoretical_loss": 3.7533858585161735, + "tokens_seen": 809238528 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0006120595498941839, + "objective/train/docs_used": 298985, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5744049549102783, + "objective/train/original_loss": 1.5744049549102783, + "objective/train/theoretical_loss": 3.753326181148472, + "objective/train/tokens_used": 829829600, + "objective/train/value_avg": -0.0063629150390625, + "objective/train/value_loss": 0.00018711324082687497, + "objective/train/value_max": -8.153915405273438e-05, + "objective/train/value_min": -0.1954345703125, + "objective/train/value_reward_corr": 0.5172567162546362, + "objective/train/value_std": 0.0081634521484375, + "objective/train/weight_avg": 1.0006967782974243, + "objective/train/weighted_lm_loss": 1.575119972229004, + "objective/train/weights_max": 1.1109601259231567, + "objective/train/weights_min": 0.37364134192466736, + "theoretical_loss": 3.753326181148472, + "tokens_seen": 809369600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007623174450329001, + "loss": 0.0797, + "theoretical_loss": 3.753266516149858, + "tokens_seen": 809500672 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007622372010913177, + "loss": 0.0768, + "theoretical_loss": 3.7531472232416316, + "tokens_seen": 809762816 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007621569571497352, + "loss": 0.0739, + "theoretical_loss": 3.7530279797549957, + "tokens_seen": 810024960 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007620767132081528, + "loss": 0.075, + "theoretical_loss": 3.752908785653492, + "tokens_seen": 810287104 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007619964692665704, + "loss": 0.0758, + "theoretical_loss": 3.7527896409007004, + "tokens_seen": 810549248 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007619162253249879, + "loss": 0.0781, + "theoretical_loss": 3.7526705454602394, + "tokens_seen": 810811392 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007618359813834056, + "loss": 0.0772, + "theoretical_loss": 3.752551499295766, + "tokens_seen": 811073536 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007617557374418231, + "loss": 0.0767, + "theoretical_loss": 3.7524325023709757, + "tokens_seen": 811335680 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007616754935002407, + "loss": 0.0738, + "theoretical_loss": 3.7523135546496023, + "tokens_seen": 811597824 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007615952495586584, + "loss": 0.0773, + "theoretical_loss": 3.7521946560954182, + "tokens_seen": 811859968 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007615150056170759, + "loss": 0.0768, + "theoretical_loss": 3.7520758066722344, + "tokens_seen": 812122112 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007614347616754936, + "loss": 0.0751, + "theoretical_loss": 3.7519570063438996, + "tokens_seen": 812384256 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.002464708173647523, + "objective/train/docs_used": 300102, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4953174591064453, + "objective/train/original_loss": 1.4953173398971558, + "objective/train/theoretical_loss": 3.7518382550743024, + "objective/train/tokens_used": 833106400, + "objective/train/value_avg": -0.0084686279296875, + "objective/train/value_loss": 0.00014236473361961544, + "objective/train/value_max": -0.0001323223114013672, + "objective/train/value_min": -0.236572265625, + "objective/train/value_reward_corr": 0.6225013810544121, + "objective/train/value_std": 0.01258087158203125, + "objective/train/weight_avg": 1.00253164768219, + "objective/train/weighted_lm_loss": 1.4988830089569092, + "objective/train/weights_max": 1.1583771705627441, + "objective/train/weights_min": 0.36844298243522644, + "theoretical_loss": 3.7518382550743024, + "tokens_seen": 812646400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007613545177339111, + "loss": 0.0764, + "theoretical_loss": 3.7518382550743024, + "tokens_seen": 812646400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007612742737923287, + "loss": 0.0778, + "theoretical_loss": 3.7517195528273666, + "tokens_seen": 812908544 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007611940298507463, + "loss": 0.0758, + "theoretical_loss": 3.751600899567057, + "tokens_seen": 813170688 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007611137859091639, + "loss": 0.0725, + "theoretical_loss": 3.7514822952573743, + "tokens_seen": 813432832 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007610335419675814, + "loss": 0.0773, + "theoretical_loss": 3.7513637398623603, + "tokens_seen": 813694976 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007609532980259991, + "loss": 0.0759, + "theoretical_loss": 3.751245233346091, + "tokens_seen": 813957120 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007608730540844167, + "loss": 0.077, + "theoretical_loss": 3.7511267756726823, + "tokens_seen": 814219264 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007607928101428342, + "loss": 0.0786, + "theoretical_loss": 3.7510083668062886, + "tokens_seen": 814481408 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007607125662012519, + "loss": 0.075, + "theoretical_loss": 3.750890006711101, + "tokens_seen": 814743552 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007606323222596694, + "loss": 0.0752, + "theoretical_loss": 3.7507716953513492, + "tokens_seen": 815005696 + }, + { + "epoch": 0.25, + "learning_rate": 0.000760552078318087, + "loss": 0.0787, + "theoretical_loss": 3.7506534326912995, + "tokens_seen": 815267840 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007604718343765046, + "loss": 0.0772, + "theoretical_loss": 3.7505352186952567, + "tokens_seen": 815529984 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007603915904349222, + "loss": 0.074, + "theoretical_loss": 3.7504170533275634, + "tokens_seen": 815792128 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0003943704068660736, + "objective/train/docs_used": 301260, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4938346147537231, + "objective/train/original_loss": 1.4938344955444336, + "objective/train/theoretical_loss": 3.7503579888682155, + "objective/train/tokens_used": 836383200, + "objective/train/value_avg": -0.00901031494140625, + "objective/train/value_loss": 0.0003670316655188799, + "objective/train/value_max": -0.0001080632209777832, + "objective/train/value_min": -0.38720703125, + "objective/train/value_reward_corr": 0.6623424603359667, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.0005619525909424, + "objective/train/weighted_lm_loss": 1.4935811758041382, + "objective/train/weights_max": 1.3037108182907104, + "objective/train/weights_min": 0.36955323815345764, + "theoretical_loss": 3.7503579888682155, + "tokens_seen": 815923200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007603113464933397, + "loss": 0.0733, + "theoretical_loss": 3.7502989365526, + "tokens_seen": 816054272 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007602311025517573, + "loss": 0.0764, + "theoretical_loss": 3.7501808683347826, + "tokens_seen": 816316416 + }, + { + "epoch": 0.25, + "learning_rate": 0.000760150858610175, + "loss": 0.0753, + "theoretical_loss": 3.7500628486385668, + "tokens_seen": 816578560 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007600706146685926, + "loss": 0.0747, + "theoretical_loss": 3.7499448774284447, + "tokens_seen": 816840704 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007599903707270102, + "loss": 0.0771, + "theoretical_loss": 3.749826954668946, + "tokens_seen": 817102848 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007599101267854277, + "loss": 0.0774, + "theoretical_loss": 3.7497090803246387, + "tokens_seen": 817364992 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007598298828438454, + "loss": 0.075, + "theoretical_loss": 3.7495912543601246, + "tokens_seen": 817627136 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007597496389022629, + "loss": 0.0778, + "theoretical_loss": 3.7494734767400475, + "tokens_seen": 817889280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007596693949606804, + "loss": 0.0766, + "theoretical_loss": 3.7493557474290853, + "tokens_seen": 818151424 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007595891510190981, + "loss": 0.0745, + "theoretical_loss": 3.7492380663919533, + "tokens_seen": 818413568 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007595089070775156, + "loss": 0.0762, + "theoretical_loss": 3.7491204335934043, + "tokens_seen": 818675712 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007594286631359332, + "loss": 0.0761, + "theoretical_loss": 3.7490028489982286, + "tokens_seen": 818937856 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0018716322956606746, + "objective/train/docs_used": 302476, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5703623294830322, + "objective/train/original_loss": 1.5703623294830322, + "objective/train/theoretical_loss": 3.7488853125712525, + "objective/train/tokens_used": 839660000, + "objective/train/value_avg": -0.011810302734375, + "objective/train/value_loss": 0.0005407900898717344, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.86669921875, + "objective/train/value_reward_corr": 0.6016890575229221, + "objective/train/value_std": 0.01898193359375, + "objective/train/weight_avg": 1.0021065473556519, + "objective/train/weighted_lm_loss": 1.5734965801239014, + "objective/train/weights_max": 1.6662120819091797, + "objective/train/weights_min": 0.3839375674724579, + "theoretical_loss": 3.7488853125712525, + "tokens_seen": 819200000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007593484191943509, + "loss": 0.073, + "theoretical_loss": 3.7488853125712525, + "tokens_seen": 819200000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007592681752527685, + "loss": 0.0737, + "theoretical_loss": 3.7487678242773406, + "tokens_seen": 819462144 + }, + { + "epoch": 0.25, + "learning_rate": 0.000759187931311186, + "loss": 0.0774, + "theoretical_loss": 3.748650384081392, + "tokens_seen": 819724288 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007591076873696036, + "loss": 0.0769, + "theoretical_loss": 3.7485329919483448, + "tokens_seen": 819986432 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007590274434280212, + "loss": 0.0746, + "theoretical_loss": 3.7484156478431734, + "tokens_seen": 820248576 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007589471994864387, + "loss": 0.0751, + "theoretical_loss": 3.748298351730888, + "tokens_seen": 820510720 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007588669555448564, + "loss": 0.0756, + "theoretical_loss": 3.748181103576537, + "tokens_seen": 820772864 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007587867116032739, + "loss": 0.0773, + "theoretical_loss": 3.7480639033452032, + "tokens_seen": 821035008 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007587064676616916, + "loss": 0.078, + "theoretical_loss": 3.747946751002009, + "tokens_seen": 821297152 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007586262237201092, + "loss": 0.0783, + "theoretical_loss": 3.747829646512109, + "tokens_seen": 821559296 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007585459797785267, + "loss": 0.0765, + "theoretical_loss": 3.747712589840699, + "tokens_seen": 821821440 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007584657358369444, + "loss": 0.075, + "theoretical_loss": 3.7475955809530084, + "tokens_seen": 822083584 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007583854918953619, + "loss": 0.0732, + "theoretical_loss": 3.747478619814303, + "tokens_seen": 822345728 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.000402974575990811, + "objective/train/docs_used": 303734, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5402162075042725, + "objective/train/original_loss": 1.540216088294983, + "objective/train/theoretical_loss": 3.7474201571399757, + "objective/train/tokens_used": 842936800, + "objective/train/value_avg": -0.011871337890625, + "objective/train/value_loss": 0.00018918554997071624, + "objective/train/value_max": -0.00019872188568115234, + "objective/train/value_min": -0.302734375, + "objective/train/value_reward_corr": 0.9061185714934559, + "objective/train/value_std": 0.0229949951171875, + "objective/train/weight_avg": 1.000493049621582, + "objective/train/weighted_lm_loss": 1.5424861907958984, + "objective/train/weights_max": 1.3256474733352661, + "objective/train/weights_min": 0.37776029109954834, + "theoretical_loss": 3.7474201571399757, + "tokens_seen": 822476800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007583052479537795, + "loss": 0.0791, + "theoretical_loss": 3.7473617063898863, + "tokens_seen": 822607872 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007582250040121971, + "loss": 0.0768, + "theoretical_loss": 3.747244840645097, + "tokens_seen": 822870016 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007581447600706147, + "loss": 0.0752, + "theoretical_loss": 3.7471280225453096, + "tokens_seen": 823132160 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007580645161290322, + "loss": 0.0766, + "theoretical_loss": 3.747011252055936, + "tokens_seen": 823394304 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007579842721874498, + "loss": 0.075, + "theoretical_loss": 3.746894529142424, + "tokens_seen": 823656448 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007579040282458675, + "loss": 0.0764, + "theoretical_loss": 3.746777853770256, + "tokens_seen": 823918592 + }, + { + "epoch": 0.25, + "learning_rate": 0.000757823784304285, + "loss": 0.0752, + "theoretical_loss": 3.746661225904953, + "tokens_seen": 824180736 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007577435403627027, + "loss": 0.0776, + "theoretical_loss": 3.746544645512069, + "tokens_seen": 824442880 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007576632964211202, + "loss": 0.0754, + "theoretical_loss": 3.7464281125571963, + "tokens_seen": 824705024 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007575830524795379, + "loss": 0.0761, + "theoretical_loss": 3.7463116270059618, + "tokens_seen": 824967168 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007575028085379554, + "loss": 0.0751, + "theoretical_loss": 3.7461951888240286, + "tokens_seen": 825229312 + }, + { + "epoch": 0.25, + "learning_rate": 0.000757422564596373, + "loss": 0.077, + "theoretical_loss": 3.7460787979770958, + "tokens_seen": 825491456 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.001048018573783338, + "objective/train/docs_used": 304950, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.441100835800171, + "objective/train/original_loss": 1.441100835800171, + "objective/train/theoretical_loss": 3.745962454430897, + "objective/train/tokens_used": 846213600, + "objective/train/value_avg": -0.00592803955078125, + "objective/train/value_loss": 0.00011939887917833403, + "objective/train/value_max": -0.00017404556274414062, + "objective/train/value_min": -0.1895751953125, + "objective/train/value_reward_corr": 0.6019524468909858, + "objective/train/value_std": 0.0082244873046875, + "objective/train/weight_avg": 1.0011035203933716, + "objective/train/weighted_lm_loss": 1.4430818557739258, + "objective/train/weights_max": 1.2087359428405762, + "objective/train/weights_min": 0.3807637691497803, + "theoretical_loss": 3.745962454430897, + "tokens_seen": 825753600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007573423206547906, + "loss": 0.0766, + "theoretical_loss": 3.745962454430897, + "tokens_seen": 825753600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007572620767132081, + "loss": 0.0787, + "theoretical_loss": 3.745846158151204, + "tokens_seen": 826015744 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007571818327716257, + "loss": 0.0766, + "theoretical_loss": 3.7457299091038214, + "tokens_seen": 826277888 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007571015888300434, + "loss": 0.0748, + "theoretical_loss": 3.745613707254591, + "tokens_seen": 826540032 + }, + { + "epoch": 0.25, + "learning_rate": 0.000757021344888461, + "loss": 0.0764, + "theoretical_loss": 3.7454975525693897, + "tokens_seen": 826802176 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007569411009468785, + "loss": 0.076, + "theoretical_loss": 3.7453814450141305, + "tokens_seen": 827064320 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007568608570052962, + "loss": 0.0808, + "theoretical_loss": 3.7452653845547603, + "tokens_seen": 827326464 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007567806130637137, + "loss": 0.08, + "theoretical_loss": 3.745149371157263, + "tokens_seen": 827588608 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007567003691221312, + "loss": 0.078, + "theoretical_loss": 3.7450334047876574, + "tokens_seen": 827850752 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007566201251805489, + "loss": 0.0779, + "theoretical_loss": 3.744917485411997, + "tokens_seen": 828112896 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007565398812389664, + "loss": 0.0803, + "theoretical_loss": 3.744801612996371, + "tokens_seen": 828375040 + }, + { + "epoch": 0.25, + "learning_rate": 0.000756459637297384, + "loss": 0.0767, + "theoretical_loss": 3.744685787506903, + "tokens_seen": 828637184 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007563793933558017, + "loss": 0.0776, + "theoretical_loss": 3.7445700089097533, + "tokens_seen": 828899328 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0003027545753866434, + "objective/train/docs_used": 306096, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4149469137191772, + "objective/train/original_loss": 1.4149470329284668, + "objective/train/theoretical_loss": 3.7445121371852323, + "objective/train/tokens_used": 849490400, + "objective/train/value_avg": -0.00829315185546875, + "objective/train/value_loss": 0.00021588837262243032, + "objective/train/value_max": -0.0001926422119140625, + "objective/train/value_min": -0.2091064453125, + "objective/train/value_reward_corr": 0.6806189676548946, + "objective/train/value_std": 0.013153076171875, + "objective/train/weight_avg": 1.0003987550735474, + "objective/train/weighted_lm_loss": 1.4160531759262085, + "objective/train/weights_max": 1.1402249336242676, + "objective/train/weights_min": 0.3719916045665741, + "theoretical_loss": 3.7445121371852323, + "tokens_seen": 829030400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007562991494142193, + "loss": 0.0764, + "theoretical_loss": 3.7444542771711165, + "tokens_seen": 829161472 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007562189054726369, + "loss": 0.0747, + "theoretical_loss": 3.744338592257222, + "tokens_seen": 829423616 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007561386615310544, + "loss": 0.0762, + "theoretical_loss": 3.744222954134334, + "tokens_seen": 829685760 + }, + { + "epoch": 0.25, + "learning_rate": 0.000756058417589472, + "loss": 0.0747, + "theoretical_loss": 3.7441073627687524, + "tokens_seen": 829947904 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007559781736478896, + "loss": 0.0769, + "theoretical_loss": 3.743991818126812, + "tokens_seen": 830210048 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007558979297063072, + "loss": 0.0764, + "theoretical_loss": 3.7438763201748815, + "tokens_seen": 830472192 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007558176857647247, + "loss": 0.0766, + "theoretical_loss": 3.743760868879365, + "tokens_seen": 830734336 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007557374418231424, + "loss": 0.075, + "theoretical_loss": 3.743645464206702, + "tokens_seen": 830996480 + }, + { + "epoch": 0.25, + "learning_rate": 0.00075565719788156, + "loss": 0.0759, + "theoretical_loss": 3.743530106123365, + "tokens_seen": 831258624 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007555769539399775, + "loss": 0.0757, + "theoretical_loss": 3.7434147945958642, + "tokens_seen": 831520768 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007554967099983952, + "loss": 0.0767, + "theoretical_loss": 3.7432995295907405, + "tokens_seen": 831782912 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007554164660568127, + "loss": 0.0782, + "theoretical_loss": 3.7431843110745726, + "tokens_seen": 832045056 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0011427036952227354, + "objective/train/docs_used": 307306, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.53184974193573, + "objective/train/original_loss": 1.5318498611450195, + "objective/train/theoretical_loss": 3.743069139013972, + "objective/train/tokens_used": 852767200, + "objective/train/value_avg": -0.0099029541015625, + "objective/train/value_loss": 0.0002651831309776753, + "objective/train/value_max": -0.0002551078796386719, + "objective/train/value_min": -0.54150390625, + "objective/train/value_reward_corr": 0.6319940300493742, + "objective/train/value_std": 0.01386260986328125, + "objective/train/weight_avg": 1.0012651681900024, + "objective/train/weighted_lm_loss": 1.5327504873275757, + "objective/train/weights_max": 1.5147995948791504, + "objective/train/weights_min": 0.38184595108032227, + "theoretical_loss": 3.743069139013972, + "tokens_seen": 832307200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007553362221152303, + "loss": 0.0752, + "theoretical_loss": 3.743069139013972, + "tokens_seen": 832307200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007552559781736479, + "loss": 0.0763, + "theoretical_loss": 3.742954013375586, + "tokens_seen": 832569344 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007551757342320655, + "loss": 0.0769, + "theoretical_loss": 3.742838934126094, + "tokens_seen": 832831488 + }, + { + "epoch": 0.25, + "learning_rate": 0.000755095490290483, + "loss": 0.0764, + "theoretical_loss": 3.742723901232213, + "tokens_seen": 833093632 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007550152463489006, + "loss": 0.0767, + "theoretical_loss": 3.742608914660692, + "tokens_seen": 833355776 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007549350024073182, + "loss": 0.0781, + "theoretical_loss": 3.742493974378314, + "tokens_seen": 833617920 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007548547584657359, + "loss": 0.0753, + "theoretical_loss": 3.742379080351899, + "tokens_seen": 833880064 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007547745145241535, + "loss": 0.079, + "theoretical_loss": 3.7422642325482975, + "tokens_seen": 834142208 + }, + { + "epoch": 0.25, + "learning_rate": 0.000754694270582571, + "loss": 0.081, + "theoretical_loss": 3.742149430934398, + "tokens_seen": 834404352 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007546140266409887, + "loss": 0.0807, + "theoretical_loss": 3.7420346754771208, + "tokens_seen": 834666496 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007545337826994062, + "loss": 0.0763, + "theoretical_loss": 3.7419199661434197, + "tokens_seen": 834928640 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007544535387578238, + "loss": 0.0782, + "theoretical_loss": 3.7418053029002842, + "tokens_seen": 835190784 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007543732948162414, + "loss": 0.074, + "theoretical_loss": 3.7416906857147367, + "tokens_seen": 835452928 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 4.4281885493546724e-05, + "objective/train/docs_used": 308483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.479179859161377, + "objective/train/original_loss": 1.479179859161377, + "objective/train/theoretical_loss": 3.741633394383263, + "objective/train/tokens_used": 856044000, + "objective/train/value_avg": -0.01134490966796875, + "objective/train/value_loss": 0.0005562845035456121, + "objective/train/value_max": -0.00019562244415283203, + "objective/train/value_min": -0.66552734375, + "objective/train/value_reward_corr": 0.6574823323493614, + "objective/train/value_std": 0.0205535888671875, + "objective/train/weight_avg": 1.0002927780151367, + "objective/train/weighted_lm_loss": 1.477531909942627, + "objective/train/weights_max": 1.606209397315979, + "objective/train/weights_min": 0.37909993529319763, + "theoretical_loss": 3.741633394383263, + "tokens_seen": 835584000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007542930508746589, + "loss": 0.0756, + "theoretical_loss": 3.741576114553835, + "tokens_seen": 835715072 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007542128069330765, + "loss": 0.0778, + "theoretical_loss": 3.7414615893846683, + "tokens_seen": 835977216 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007541325629914942, + "loss": 0.0768, + "theoretical_loss": 3.741347110174362, + "tokens_seen": 836239360 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007540523190499118, + "loss": 0.0776, + "theoretical_loss": 3.741232676890074, + "tokens_seen": 836501504 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007539720751083293, + "loss": 0.0785, + "theoretical_loss": 3.7411182894989965, + "tokens_seen": 836763648 + }, + { + "epoch": 0.25, + "learning_rate": 0.000753891831166747, + "loss": 0.078, + "theoretical_loss": 3.7410039479683546, + "tokens_seen": 837025792 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007538115872251645, + "loss": 0.0758, + "theoretical_loss": 3.740889652265408, + "tokens_seen": 837287936 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007537313432835821, + "loss": 0.0775, + "theoretical_loss": 3.7407754023574507, + "tokens_seen": 837550080 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007536510993419997, + "loss": 0.0768, + "theoretical_loss": 3.7406611982118076, + "tokens_seen": 837812224 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007535708554004172, + "loss": 0.0768, + "theoretical_loss": 3.74054703979584, + "tokens_seen": 838074368 + }, + { + "epoch": 0.25, + "learning_rate": 0.000753490611458835, + "loss": 0.0783, + "theoretical_loss": 3.7404329270769403, + "tokens_seen": 838336512 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007534103675172525, + "loss": 0.076, + "theoretical_loss": 3.740318860022537, + "tokens_seen": 838598656 + }, + { + "epoch": 0.25, + "objective/train/advantage_avg": 0.0013807304203510284, + "objective/train/docs_used": 309674, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6753649711608887, + "objective/train/original_loss": 1.6753649711608887, + "objective/train/theoretical_loss": 3.7402048386000892, + "objective/train/tokens_used": 859320800, + "objective/train/value_avg": -0.01296234130859375, + "objective/train/value_loss": 0.0006226726691238582, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.6162109375, + "objective/train/value_reward_corr": 0.639937529812378, + "objective/train/value_std": 0.019622802734375, + "objective/train/weight_avg": 1.001650094985962, + "objective/train/weighted_lm_loss": 1.6781058311462402, + "objective/train/weights_max": 1.614859700202942, + "objective/train/weights_min": 0.37846994400024414, + "theoretical_loss": 3.7402048386000892, + "tokens_seen": 838860800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007533301235756701, + "loss": 0.0754, + "theoretical_loss": 3.7402048386000892, + "tokens_seen": 838860800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007532498796340877, + "loss": 0.0743, + "theoretical_loss": 3.740090862777091, + "tokens_seen": 839122944 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007531696356925052, + "loss": 0.0779, + "theoretical_loss": 3.7399769325210697, + "tokens_seen": 839385088 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007530893917509228, + "loss": 0.0771, + "theoretical_loss": 3.7398630477995853, + "tokens_seen": 839647232 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007530091478093404, + "loss": 0.0751, + "theoretical_loss": 3.7397492085802315, + "tokens_seen": 839909376 + }, + { + "epoch": 0.25, + "learning_rate": 0.000752928903867758, + "loss": 0.0765, + "theoretical_loss": 3.739635414830635, + "tokens_seen": 840171520 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007528486599261755, + "loss": 0.0763, + "theoretical_loss": 3.7395216665184554, + "tokens_seen": 840433664 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007527684159845932, + "loss": 0.0793, + "theoretical_loss": 3.739407963611386, + "tokens_seen": 840695808 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007526881720430108, + "loss": 0.074, + "theoretical_loss": 3.739294306077152, + "tokens_seen": 840957952 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007526079281014283, + "loss": 0.0785, + "theoretical_loss": 3.7391806938835126, + "tokens_seen": 841220096 + }, + { + "epoch": 0.26, + "learning_rate": 0.000752527684159846, + "loss": 0.0755, + "theoretical_loss": 3.7390671269982603, + "tokens_seen": 841482240 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007524474402182635, + "loss": 0.0764, + "theoretical_loss": 3.7389536053892187, + "tokens_seen": 841744384 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007523671962766812, + "loss": 0.0744, + "theoretical_loss": 3.738840129024246, + "tokens_seen": 842006528 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.002575838938355446, + "objective/train/docs_used": 310992, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4314990043640137, + "objective/train/original_loss": 1.4314990043640137, + "objective/train/theoretical_loss": 3.73878340779825, + "objective/train/tokens_used": 862597600, + "objective/train/value_avg": -0.00510406494140625, + "objective/train/value_loss": 6.991349073359743e-05, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.186279296875, + "objective/train/value_reward_corr": 0.4218440665138546, + "objective/train/value_std": 0.005443572998046875, + "objective/train/weight_avg": 1.0026094913482666, + "objective/train/weighted_lm_loss": 1.4359185695648193, + "objective/train/weights_max": 1.1117740869522095, + "objective/train/weights_min": 0.5176728963851929, + "theoretical_loss": 3.73878340779825, + "tokens_seen": 842137600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007522869523350987, + "loss": 0.0771, + "theoretical_loss": 3.738726697871233, + "tokens_seen": 842268672 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007522067083935163, + "loss": 0.0796, + "theoretical_loss": 3.738613311898103, + "tokens_seen": 842530816 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007521264644519339, + "loss": 0.0764, + "theoretical_loss": 3.7384999710728106, + "tokens_seen": 842792960 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007520462205103514, + "loss": 0.0769, + "theoretical_loss": 3.738386675363346, + "tokens_seen": 843055104 + }, + { + "epoch": 0.26, + "learning_rate": 0.000751965976568769, + "loss": 0.0764, + "theoretical_loss": 3.738273424737729, + "tokens_seen": 843317248 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007518857326271867, + "loss": 0.0792, + "theoretical_loss": 3.7381602191640146, + "tokens_seen": 843579392 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007518054886856043, + "loss": 0.077, + "theoretical_loss": 3.738047058610289, + "tokens_seen": 843841536 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007517252447440218, + "loss": 0.0768, + "theoretical_loss": 3.7379339430446707, + "tokens_seen": 844103680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007516450008024395, + "loss": 0.0752, + "theoretical_loss": 3.7378208724353117, + "tokens_seen": 844365824 + }, + { + "epoch": 0.26, + "learning_rate": 0.000751564756860857, + "loss": 0.0776, + "theoretical_loss": 3.7377078467503955, + "tokens_seen": 844627968 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007514845129192746, + "loss": 0.0782, + "theoretical_loss": 3.737594865958138, + "tokens_seen": 844890112 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007514042689776922, + "loss": 0.076, + "theoretical_loss": 3.7374819300267883, + "tokens_seen": 845152256 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.000701575365383178, + "objective/train/docs_used": 312167, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.512547492980957, + "objective/train/original_loss": 1.512547492980957, + "objective/train/theoretical_loss": 3.7373690389246272, + "objective/train/tokens_used": 865874400, + "objective/train/value_avg": -0.00689697265625, + "objective/train/value_loss": 0.00012069241347489879, + "objective/train/value_max": -0.00014317035675048828, + "objective/train/value_min": -0.318115234375, + "objective/train/value_reward_corr": 0.6602302605762631, + "objective/train/value_std": 0.01007843017578125, + "objective/train/weight_avg": 1.0007611513137817, + "objective/train/weighted_lm_loss": 1.5141348838806152, + "objective/train/weights_max": 1.115106463432312, + "objective/train/weights_min": 0.8200203776359558, + "theoretical_loss": 3.7373690389246272, + "tokens_seen": 845414400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007513240250361097, + "loss": 0.0768, + "theoretical_loss": 3.7373690389246272, + "tokens_seen": 845414400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007512437810945275, + "loss": 0.0783, + "theoretical_loss": 3.737256192619967, + "tokens_seen": 845676544 + }, + { + "epoch": 0.26, + "learning_rate": 0.000751163537152945, + "loss": 0.0753, + "theoretical_loss": 3.737143391081154, + "tokens_seen": 845938688 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007510832932113626, + "loss": 0.0762, + "theoretical_loss": 3.7370306342765653, + "tokens_seen": 846200832 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007510030492697802, + "loss": 0.0737, + "theoretical_loss": 3.73691792217461, + "tokens_seen": 846462976 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007509228053281978, + "loss": 0.0779, + "theoretical_loss": 3.7368052547437305, + "tokens_seen": 846725120 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007508425613866153, + "loss": 0.0772, + "theoretical_loss": 3.7366926319524003, + "tokens_seen": 846987264 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007507623174450329, + "loss": 0.0747, + "theoretical_loss": 3.736580053769125, + "tokens_seen": 847249408 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007506820735034505, + "loss": 0.0728, + "theoretical_loss": 3.736467520162442, + "tokens_seen": 847511552 + }, + { + "epoch": 0.26, + "learning_rate": 0.000750601829561868, + "loss": 0.0772, + "theoretical_loss": 3.736355031100922, + "tokens_seen": 847773696 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007505215856202858, + "loss": 0.0765, + "theoretical_loss": 3.7362425865531654, + "tokens_seen": 848035840 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007504413416787033, + "loss": 0.0773, + "theoretical_loss": 3.736130186487806, + "tokens_seen": 848297984 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007503610977371209, + "loss": 0.0749, + "theoretical_loss": 3.736017830873508, + "tokens_seen": 848560128 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0018955293344333768, + "objective/train/docs_used": 313372, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.561277151107788, + "objective/train/original_loss": 1.561277151107788, + "objective/train/theoretical_loss": 3.7359616697257243, + "objective/train/tokens_used": 869151200, + "objective/train/value_avg": -0.00714111328125, + "objective/train/value_loss": 0.00045251031406223774, + "objective/train/value_max": -0.00013136863708496094, + "objective/train/value_min": -0.984375, + "objective/train/value_reward_corr": 0.6359393678379087, + "objective/train/value_std": 0.0159912109375, + "objective/train/weight_avg": 1.0020655393600464, + "objective/train/weighted_lm_loss": 1.5643846988677979, + "objective/train/weights_max": 2.523839235305786, + "objective/train/weights_min": 0.07859423011541367, + "theoretical_loss": 3.7359616697257243, + "tokens_seen": 848691200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007502808537955385, + "loss": 0.0772, + "theoretical_loss": 3.7359055196789694, + "tokens_seen": 848822272 + }, + { + "epoch": 0.26, + "learning_rate": 0.000750200609853956, + "loss": 0.075, + "theoretical_loss": 3.7357932528729183, + "tokens_seen": 849084416 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007501203659123736, + "loss": 0.0787, + "theoretical_loss": 3.7356810304241144, + "tokens_seen": 849346560 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007500401219707912, + "loss": 0.0783, + "theoretical_loss": 3.73556885230135, + "tokens_seen": 849608704 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007499598780292088, + "loss": 0.0756, + "theoretical_loss": 3.735456718473449, + "tokens_seen": 849870848 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007498796340876264, + "loss": 0.0718, + "theoretical_loss": 3.7353446289092647, + "tokens_seen": 850132992 + }, + { + "epoch": 0.26, + "learning_rate": 0.000749799390146044, + "loss": 0.0782, + "theoretical_loss": 3.7352325835776856, + "tokens_seen": 850395136 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007497191462044616, + "loss": 0.0773, + "theoretical_loss": 3.7351205824476277, + "tokens_seen": 850657280 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007496389022628792, + "loss": 0.0791, + "theoretical_loss": 3.7350086254880415, + "tokens_seen": 850919424 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007495586583212968, + "loss": 0.0743, + "theoretical_loss": 3.734896712667907, + "tokens_seen": 851181568 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007494784143797143, + "loss": 0.0754, + "theoretical_loss": 3.734784843956236, + "tokens_seen": 851443712 + }, + { + "epoch": 0.26, + "learning_rate": 0.000749398170438132, + "loss": 0.0747, + "theoretical_loss": 3.7346730193220727, + "tokens_seen": 851705856 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0007218181854113936, + "objective/train/docs_used": 314587, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5503199100494385, + "objective/train/original_loss": 1.5503199100494385, + "objective/train/theoretical_loss": 3.7345612387344906, + "objective/train/tokens_used": 872428000, + "objective/train/value_avg": -0.007656097412109375, + "objective/train/value_loss": 0.00033693682053126395, + "objective/train/value_max": -0.00014650821685791016, + "objective/train/value_min": -0.345947265625, + "objective/train/value_reward_corr": 0.6609246880890598, + "objective/train/value_std": 0.01441192626953125, + "objective/train/weight_avg": 1.0008713006973267, + "objective/train/weighted_lm_loss": 1.550735354423523, + "objective/train/weights_max": 1.2893656492233276, + "objective/train/weights_min": 0.3895317018032074, + "theoretical_loss": 3.7345612387344906, + "tokens_seen": 851968000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007493179264965495, + "loss": 0.0758, + "theoretical_loss": 3.7345612387344906, + "tokens_seen": 851968000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007492376825549671, + "loss": 0.0782, + "theoretical_loss": 3.734449502162596, + "tokens_seen": 852230144 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007491574386133847, + "loss": 0.0773, + "theoretical_loss": 3.7343378095755257, + "tokens_seen": 852492288 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007490771946718022, + "loss": 0.0745, + "theoretical_loss": 3.7342261609424483, + "tokens_seen": 852754432 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007489969507302198, + "loss": 0.0788, + "theoretical_loss": 3.7341145562325613, + "tokens_seen": 853016576 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007489167067886375, + "loss": 0.0761, + "theoretical_loss": 3.734002995415096, + "tokens_seen": 853278720 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007488364628470551, + "loss": 0.0772, + "theoretical_loss": 3.7338914784593134, + "tokens_seen": 853540864 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007487562189054726, + "loss": 0.0755, + "theoretical_loss": 3.733780005334505, + "tokens_seen": 853803008 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007486759749638903, + "loss": 0.0789, + "theoretical_loss": 3.733668576009995, + "tokens_seen": 854065152 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007485957310223078, + "loss": 0.0768, + "theoretical_loss": 3.733557190455136, + "tokens_seen": 854327296 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007485154870807254, + "loss": 0.0742, + "theoretical_loss": 3.733445848639313, + "tokens_seen": 854589440 + }, + { + "epoch": 0.26, + "learning_rate": 0.000748435243139143, + "loss": 0.0754, + "theoretical_loss": 3.733334550531942, + "tokens_seen": 854851584 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007483549991975605, + "loss": 0.0776, + "theoretical_loss": 3.7332232961024694, + "tokens_seen": 855113728 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.001274409587495029, + "objective/train/docs_used": 315806, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4173961877822876, + "objective/train/original_loss": 1.417395830154419, + "objective/train/theoretical_loss": 3.733167685257405, + "objective/train/tokens_used": 875704800, + "objective/train/value_avg": -0.006504058837890625, + "objective/train/value_loss": 0.00017858008504845202, + "objective/train/value_max": -0.0001647472381591797, + "objective/train/value_min": -0.2578125, + "objective/train/value_reward_corr": 0.5306423754138953, + "objective/train/value_std": 0.00859832763671875, + "objective/train/weight_avg": 1.001355767250061, + "objective/train/weighted_lm_loss": 1.420598030090332, + "objective/train/weights_max": 1.1989800930023193, + "objective/train/weights_min": 0.38221901655197144, + "theoretical_loss": 3.733167685257405, + "tokens_seen": 855244800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007482747552559783, + "loss": 0.0759, + "theoretical_loss": 3.7331120853203714, + "tokens_seen": 855375872 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007481945113143958, + "loss": 0.0752, + "theoretical_loss": 3.733000918155156, + "tokens_seen": 855638016 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007481142673728134, + "loss": 0.0726, + "theoretical_loss": 3.7328897945763617, + "tokens_seen": 855900160 + }, + { + "epoch": 0.26, + "learning_rate": 0.000748034023431231, + "loss": 0.0781, + "theoretical_loss": 3.7327787145535574, + "tokens_seen": 856162304 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007479537794896486, + "loss": 0.0738, + "theoretical_loss": 3.732667678056342, + "tokens_seen": 856424448 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007478735355480661, + "loss": 0.073, + "theoretical_loss": 3.732556685054346, + "tokens_seen": 856686592 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007477932916064837, + "loss": 0.0745, + "theoretical_loss": 3.7324457355172296, + "tokens_seen": 856948736 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007477130476649013, + "loss": 0.0748, + "theoretical_loss": 3.7323348294146843, + "tokens_seen": 857210880 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007476328037233188, + "loss": 0.0758, + "theoretical_loss": 3.73222396671643, + "tokens_seen": 857473024 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007475525597817365, + "loss": 0.0766, + "theoretical_loss": 3.73211314739222, + "tokens_seen": 857735168 + }, + { + "epoch": 0.26, + "learning_rate": 0.000747472315840154, + "loss": 0.0748, + "theoretical_loss": 3.732002371411835, + "tokens_seen": 857997312 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007473920718985718, + "loss": 0.0738, + "theoretical_loss": 3.7318916387450876, + "tokens_seen": 858259456 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.00041493060416541994, + "objective/train/docs_used": 317142, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5313217639923096, + "objective/train/original_loss": 1.5313217639923096, + "objective/train/theoretical_loss": 3.7317809493618204, + "objective/train/tokens_used": 878981600, + "objective/train/value_avg": -0.007602691650390625, + "objective/train/value_loss": 0.00031841802410781384, + "objective/train/value_max": -0.0001596212387084961, + "objective/train/value_min": -0.71923828125, + "objective/train/value_reward_corr": 0.6602214596414195, + "objective/train/value_std": 0.0145111083984375, + "objective/train/weight_avg": 1.0005568265914917, + "objective/train/weighted_lm_loss": 1.5318763256072998, + "objective/train/weights_max": 1.2434570789337158, + "objective/train/weights_min": 0.3680877387523651, + "theoretical_loss": 3.7317809493618204, + "tokens_seen": 858521600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007473118279569893, + "loss": 0.0776, + "theoretical_loss": 3.7317809493618204, + "tokens_seen": 858521600 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007472315840154068, + "loss": 0.0752, + "theoretical_loss": 3.7316703032319056, + "tokens_seen": 858783744 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007471513400738245, + "loss": 0.0787, + "theoretical_loss": 3.7315597003252474, + "tokens_seen": 859045888 + }, + { + "epoch": 0.26, + "learning_rate": 0.000747071096132242, + "loss": 0.0784, + "theoretical_loss": 3.731449140611777, + "tokens_seen": 859308032 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007469908521906596, + "loss": 0.0747, + "theoretical_loss": 3.7313386240614577, + "tokens_seen": 859570176 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007469106082490772, + "loss": 0.0761, + "theoretical_loss": 3.7312281506442835, + "tokens_seen": 859832320 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007468303643074948, + "loss": 0.0758, + "theoretical_loss": 3.7311177203302766, + "tokens_seen": 860094464 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007467501203659124, + "loss": 0.0769, + "theoretical_loss": 3.7310073330894906, + "tokens_seen": 860356608 + }, + { + "epoch": 0.26, + "learning_rate": 0.00074666987642433, + "loss": 0.074, + "theoretical_loss": 3.730896988892008, + "tokens_seen": 860618752 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007465896324827476, + "loss": 0.0809, + "theoretical_loss": 3.7307866877079414, + "tokens_seen": 860880896 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007465093885411651, + "loss": 0.0758, + "theoretical_loss": 3.730676429507435, + "tokens_seen": 861143040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007464291445995828, + "loss": 0.0736, + "theoretical_loss": 3.730566214260659, + "tokens_seen": 861405184 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007463489006580003, + "loss": 0.0762, + "theoretical_loss": 3.730456041937817, + "tokens_seen": 861667328 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0012228551786392927, + "objective/train/docs_used": 318349, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4355342388153076, + "objective/train/original_loss": 1.4355342388153076, + "objective/train/theoretical_loss": 3.730400971863568, + "objective/train/tokens_used": 882258400, + "objective/train/value_avg": -0.00885772705078125, + "objective/train/value_loss": 0.0004532379098236561, + "objective/train/value_max": -0.00011235475540161133, + "objective/train/value_min": -0.8740234375, + "objective/train/value_reward_corr": 0.5636366361372568, + "objective/train/value_std": 0.015533447265625, + "objective/train/weight_avg": 1.001416563987732, + "objective/train/weighted_lm_loss": 1.4377250671386719, + "objective/train/weights_max": 1.8811969757080078, + "objective/train/weights_min": 0.22597576677799225, + "theoretical_loss": 3.730400971863568, + "tokens_seen": 861798400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007462686567164179, + "loss": 0.0767, + "theoretical_loss": 3.730345912509141, + "tokens_seen": 861929472 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007461884127748355, + "loss": 0.0742, + "theoretical_loss": 3.7302358259448924, + "tokens_seen": 862191616 + }, + { + "epoch": 0.26, + "learning_rate": 0.000746108168833253, + "loss": 0.0778, + "theoretical_loss": 3.730125782215362, + "tokens_seen": 862453760 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007460279248916708, + "loss": 0.0766, + "theoretical_loss": 3.730015781290872, + "tokens_seen": 862715904 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007459476809500883, + "loss": 0.0784, + "theoretical_loss": 3.729905823141771, + "tokens_seen": 862978048 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007458674370085059, + "loss": 0.0739, + "theoretical_loss": 3.729795907738441, + "tokens_seen": 863240192 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007457871930669235, + "loss": 0.0748, + "theoretical_loss": 3.729686035051291, + "tokens_seen": 863502336 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007457069491253411, + "loss": 0.0762, + "theoretical_loss": 3.7295762050507593, + "tokens_seen": 863764480 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007456267051837586, + "loss": 0.0754, + "theoretical_loss": 3.7294664177073145, + "tokens_seen": 864026624 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007455464612421762, + "loss": 0.0773, + "theoretical_loss": 3.7293566729914547, + "tokens_seen": 864288768 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007454662173005938, + "loss": 0.0756, + "theoretical_loss": 3.7292469708737066, + "tokens_seen": 864550912 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007453859733590113, + "loss": 0.0767, + "theoretical_loss": 3.729137311324627, + "tokens_seen": 864813056 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 6.832103099441156e-05, + "objective/train/docs_used": 319421, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.434070348739624, + "objective/train/original_loss": 1.4340702295303345, + "objective/train/theoretical_loss": 3.7290276943148015, + "objective/train/tokens_used": 885535200, + "objective/train/value_avg": -0.00914764404296875, + "objective/train/value_loss": 0.0007328266510739923, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.91455078125, + "objective/train/value_reward_corr": 0.5348672218461172, + "objective/train/value_std": 0.0240936279296875, + "objective/train/weight_avg": 1.0004147291183472, + "objective/train/weighted_lm_loss": 1.43430757522583, + "objective/train/weights_max": 2.415832042694092, + "objective/train/weights_min": 0.3684886395931244, + "theoretical_loss": 3.7290276943148015, + "tokens_seen": 865075200 + }, + { + "epoch": 0.26, + "learning_rate": 0.000745305729417429, + "loss": 0.073, + "theoretical_loss": 3.7290276943148015, + "tokens_seen": 865075200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007452254854758466, + "loss": 0.0777, + "theoretical_loss": 3.7289181198148458, + "tokens_seen": 865337344 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007451452415342642, + "loss": 0.0774, + "theoretical_loss": 3.7288085877954025, + "tokens_seen": 865599488 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007450649975926818, + "loss": 0.0766, + "theoretical_loss": 3.728699098227146, + "tokens_seen": 865861632 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007449847536510994, + "loss": 0.0757, + "theoretical_loss": 3.728589651080779, + "tokens_seen": 866123776 + }, + { + "epoch": 0.26, + "learning_rate": 0.000744904509709517, + "loss": 0.0754, + "theoretical_loss": 3.728480246327032, + "tokens_seen": 866385920 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007448242657679345, + "loss": 0.0745, + "theoretical_loss": 3.7283708839366656, + "tokens_seen": 866648064 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007447440218263521, + "loss": 0.0766, + "theoretical_loss": 3.72826156388047, + "tokens_seen": 866910208 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007446637778847697, + "loss": 0.0763, + "theoretical_loss": 3.728152286129263, + "tokens_seen": 867172352 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007445835339431873, + "loss": 0.0747, + "theoretical_loss": 3.728043050653893, + "tokens_seen": 867434496 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007445032900016049, + "loss": 0.0798, + "theoretical_loss": 3.7279338574252354, + "tokens_seen": 867696640 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007444230460600226, + "loss": 0.0782, + "theoretical_loss": 3.7278247064141956, + "tokens_seen": 867958784 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007443428021184401, + "loss": 0.0772, + "theoretical_loss": 3.7277155975917076, + "tokens_seen": 868220928 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0010427895467728376, + "objective/train/docs_used": 320668, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5272066593170166, + "objective/train/original_loss": 1.5272066593170166, + "objective/train/theoretical_loss": 3.727661058992095, + "objective/train/tokens_used": 888812000, + "objective/train/value_avg": -0.0088348388671875, + "objective/train/value_loss": 0.00025395132252015173, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.421630859375, + "objective/train/value_reward_corr": 0.7319601341651985, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.0011564493179321, + "objective/train/weighted_lm_loss": 1.5287580490112305, + "objective/train/weights_max": 1.296626091003418, + "objective/train/weights_min": 0.3688490390777588, + "theoretical_loss": 3.727661058992095, + "tokens_seen": 868352000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007442625581768576, + "loss": 0.0782, + "theoretical_loss": 3.7276065309287345, + "tokens_seen": 868483072 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007441823142352753, + "loss": 0.0775, + "theoretical_loss": 3.727497506396267, + "tokens_seen": 868745216 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007441020702936928, + "loss": 0.0752, + "theoretical_loss": 3.7273885239653266, + "tokens_seen": 869007360 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007440218263521104, + "loss": 0.0753, + "theoretical_loss": 3.727279583606961, + "tokens_seen": 869269504 + }, + { + "epoch": 0.26, + "learning_rate": 0.000743941582410528, + "loss": 0.0763, + "theoretical_loss": 3.727170685292248, + "tokens_seen": 869531648 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007438613384689456, + "loss": 0.077, + "theoretical_loss": 3.7270618289922943, + "tokens_seen": 869793792 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007437810945273631, + "loss": 0.0753, + "theoretical_loss": 3.7269530146782337, + "tokens_seen": 870055936 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007437008505857808, + "loss": 0.0809, + "theoretical_loss": 3.72684424232123, + "tokens_seen": 870318080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007436206066441984, + "loss": 0.0772, + "theoretical_loss": 3.7267355118924748, + "tokens_seen": 870580224 + }, + { + "epoch": 0.26, + "learning_rate": 0.000743540362702616, + "loss": 0.0753, + "theoretical_loss": 3.726626823363188, + "tokens_seen": 870842368 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007434601187610336, + "loss": 0.0754, + "theoretical_loss": 3.7265181767046176, + "tokens_seen": 871104512 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007433798748194511, + "loss": 0.0773, + "theoretical_loss": 3.726409571888042, + "tokens_seen": 871366656 + }, + { + "epoch": 0.26, + "objective/train/advantage_avg": 0.0008427270222455263, + "objective/train/docs_used": 321822, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6887977123260498, + "objective/train/original_loss": 1.6887975931167603, + "objective/train/theoretical_loss": 3.7263010088847652, + "objective/train/tokens_used": 892088800, + "objective/train/value_avg": -0.00997161865234375, + "objective/train/value_loss": 0.00024173302517738193, + "objective/train/value_max": -0.00015115737915039062, + "objective/train/value_min": -0.336181640625, + "objective/train/value_reward_corr": 0.7177472974340894, + "objective/train/value_std": 0.01708984375, + "objective/train/weight_avg": 1.000956654548645, + "objective/train/weighted_lm_loss": 1.689243197441101, + "objective/train/weights_max": 1.1999152898788452, + "objective/train/weights_min": 0.40030547976493835, + "theoretical_loss": 3.7263010088847652, + "tokens_seen": 871628800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007432996308778688, + "loss": 0.0784, + "theoretical_loss": 3.7263010088847652, + "tokens_seen": 871628800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007432193869362863, + "loss": 0.0753, + "theoretical_loss": 3.726192487666121, + "tokens_seen": 871890944 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007431391429947038, + "loss": 0.0765, + "theoretical_loss": 3.7260840082034714, + "tokens_seen": 872153088 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007430588990531216, + "loss": 0.0759, + "theoretical_loss": 3.7259755704682065, + "tokens_seen": 872415232 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007429786551115391, + "loss": 0.0766, + "theoretical_loss": 3.7258671744317446, + "tokens_seen": 872677376 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007428984111699567, + "loss": 0.0768, + "theoretical_loss": 3.725758820065531, + "tokens_seen": 872939520 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007428181672283743, + "loss": 0.0761, + "theoretical_loss": 3.725650507341042, + "tokens_seen": 873201664 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007427379232867919, + "loss": 0.0774, + "theoretical_loss": 3.7255422362297788, + "tokens_seen": 873463808 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007426576793452094, + "loss": 0.0759, + "theoretical_loss": 3.7254340067032725, + "tokens_seen": 873725952 + }, + { + "epoch": 0.26, + "learning_rate": 0.000742577435403627, + "loss": 0.0766, + "theoretical_loss": 3.7253258187330816, + "tokens_seen": 873988096 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007424971914620446, + "loss": 0.0763, + "theoretical_loss": 3.7252176722907926, + "tokens_seen": 874250240 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007424169475204621, + "loss": 0.0788, + "theoretical_loss": 3.725109567348021, + "tokens_seen": 874512384 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007423367035788799, + "loss": 0.0778, + "theoretical_loss": 3.725001503876408, + "tokens_seen": 874774528 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.00147084123454988, + "objective/train/docs_used": 323008, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5208227634429932, + "objective/train/original_loss": 1.520822525024414, + "objective/train/theoretical_loss": 3.7249474876834325, + "objective/train/tokens_used": 895365600, + "objective/train/value_avg": -0.007843017578125, + "objective/train/value_loss": 0.00020401620713528246, + "objective/train/value_max": -0.00010311603546142578, + "objective/train/value_min": -0.30029296875, + "objective/train/value_reward_corr": 0.789625392294421, + "objective/train/value_std": 0.017059326171875, + "objective/train/weight_avg": 1.0015629529953003, + "objective/train/weighted_lm_loss": 1.5230070352554321, + "objective/train/weights_max": 1.1601619720458984, + "objective/train/weights_min": 0.37002718448638916, + "theoretical_loss": 3.7249474876834325, + "tokens_seen": 874905600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007422564596372974, + "loss": 0.0756, + "theoretical_loss": 3.7248934818476247, + "tokens_seen": 875036672 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007421762156957151, + "loss": 0.0757, + "theoretical_loss": 3.7247855012333693, + "tokens_seen": 875298816 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007420959717541326, + "loss": 0.0759, + "theoretical_loss": 3.7246775620053665, + "tokens_seen": 875560960 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007420157278125502, + "loss": 0.0768, + "theoretical_loss": 3.724569664135372, + "tokens_seen": 875823104 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007419354838709678, + "loss": 0.0763, + "theoretical_loss": 3.7244618075951657, + "tokens_seen": 876085248 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007418552399293853, + "loss": 0.0782, + "theoretical_loss": 3.7243539923565576, + "tokens_seen": 876347392 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007417749959878029, + "loss": 0.0773, + "theoretical_loss": 3.724246218391384, + "tokens_seen": 876609536 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007416947520462205, + "loss": 0.0778, + "theoretical_loss": 3.7241384856715096, + "tokens_seen": 876871680 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007416145081046381, + "loss": 0.0754, + "theoretical_loss": 3.724030794168826, + "tokens_seen": 877133824 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007415342641630557, + "loss": 0.0752, + "theoretical_loss": 3.723923143855253, + "tokens_seen": 877395968 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007414540202214734, + "loss": 0.0746, + "theoretical_loss": 3.723815534702738, + "tokens_seen": 877658112 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007413737762798909, + "loss": 0.0785, + "theoretical_loss": 3.7237079666832553, + "tokens_seen": 877920256 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.0005045576835982502, + "objective/train/docs_used": 324331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4083878993988037, + "objective/train/original_loss": 1.4083881378173828, + "objective/train/theoretical_loss": 3.7236004397688065, + "objective/train/tokens_used": 898642400, + "objective/train/value_avg": -0.01111602783203125, + "objective/train/value_loss": 0.0004869569675065577, + "objective/train/value_max": -0.00018668174743652344, + "objective/train/value_min": -0.7138671875, + "objective/train/value_reward_corr": 0.598061434276012, + "objective/train/value_std": 0.0183563232421875, + "objective/train/weight_avg": 0.9997174143791199, + "objective/train/weighted_lm_loss": 1.406802773475647, + "objective/train/weights_max": 1.8978021144866943, + "objective/train/weights_min": 0.38027602434158325, + "theoretical_loss": 3.7236004397688065, + "tokens_seen": 878182400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007412935323383084, + "loss": 0.078, + "theoretical_loss": 3.7236004397688065, + "tokens_seen": 878182400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007412132883967261, + "loss": 0.0754, + "theoretical_loss": 3.723492953931421, + "tokens_seen": 878444544 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007411330444551436, + "loss": 0.0759, + "theoretical_loss": 3.7233855091431565, + "tokens_seen": 878706688 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007410528005135613, + "loss": 0.0759, + "theoretical_loss": 3.723278105376096, + "tokens_seen": 878968832 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007409725565719788, + "loss": 0.0748, + "theoretical_loss": 3.723170742602351, + "tokens_seen": 879230976 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007408923126303964, + "loss": 0.0775, + "theoretical_loss": 3.7230634207940607, + "tokens_seen": 879493120 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007408120686888141, + "loss": 0.0773, + "theoretical_loss": 3.7229561399233906, + "tokens_seen": 879755264 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007407318247472316, + "loss": 0.0791, + "theoretical_loss": 3.7228488999625338, + "tokens_seen": 880017408 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007406515808056492, + "loss": 0.0775, + "theoretical_loss": 3.722741700883711, + "tokens_seen": 880279552 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007405713368640668, + "loss": 0.0755, + "theoretical_loss": 3.7226345426591694, + "tokens_seen": 880541696 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007404910929224844, + "loss": 0.0732, + "theoretical_loss": 3.722527425261183, + "tokens_seen": 880803840 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007404108489809019, + "loss": 0.0783, + "theoretical_loss": 3.7224203486620535, + "tokens_seen": 881065984 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007403306050393196, + "loss": 0.0777, + "theoretical_loss": 3.7223133128341104, + "tokens_seen": 881328128 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0017001149244606495, + "objective/train/docs_used": 325483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4574103355407715, + "objective/train/original_loss": 1.4574103355407715, + "objective/train/theoretical_loss": 3.722259810200693, + "objective/train/tokens_used": 901919200, + "objective/train/value_avg": -0.007709503173828125, + "objective/train/value_loss": 0.00018067251949105412, + "objective/train/value_max": -0.00011593103408813477, + "objective/train/value_min": -0.2108154296875, + "objective/train/value_reward_corr": 0.625343074103676, + "objective/train/value_std": 0.011138916015625, + "objective/train/weight_avg": 1.0017832517623901, + "objective/train/weighted_lm_loss": 1.4603242874145508, + "objective/train/weights_max": 1.2097421884536743, + "objective/train/weights_min": 0.36907634139060974, + "theoretical_loss": 3.722259810200693, + "tokens_seen": 881459200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007402503610977371, + "loss": 0.0752, + "theoretical_loss": 3.722206317749708, + "tokens_seen": 881590272 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007401701171561546, + "loss": 0.0778, + "theoretical_loss": 3.722099363381229, + "tokens_seen": 881852416 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007400898732145724, + "loss": 0.0769, + "theoretical_loss": 3.7219924497010837, + "tokens_seen": 882114560 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007400096292729899, + "loss": 0.0766, + "theoretical_loss": 3.721885576681708, + "tokens_seen": 882376704 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007399293853314075, + "loss": 0.078, + "theoretical_loss": 3.7217787442955643, + "tokens_seen": 882638848 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007398491413898251, + "loss": 0.0767, + "theoretical_loss": 3.721671952515144, + "tokens_seen": 882900992 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007397688974482427, + "loss": 0.0762, + "theoretical_loss": 3.7215652013129628, + "tokens_seen": 883163136 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007396886535066603, + "loss": 0.0757, + "theoretical_loss": 3.7214584906615644, + "tokens_seen": 883425280 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007396084095650778, + "loss": 0.0732, + "theoretical_loss": 3.7213518205335196, + "tokens_seen": 883687424 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007395281656234954, + "loss": 0.0764, + "theoretical_loss": 3.721245190901425, + "tokens_seen": 883949568 + }, + { + "epoch": 0.27, + "learning_rate": 0.000739447921681913, + "loss": 0.077, + "theoretical_loss": 3.721138601737904, + "tokens_seen": 884211712 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007393676777403307, + "loss": 0.0755, + "theoretical_loss": 3.721032053015607, + "tokens_seen": 884473856 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0006489785737358034, + "objective/train/docs_used": 326745, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.7073997259140015, + "objective/train/original_loss": 1.707399606704712, + "objective/train/theoretical_loss": 3.720925544707211, + "objective/train/tokens_used": 905196000, + "objective/train/value_avg": -0.0076141357421875, + "objective/train/value_loss": 0.00017974516958929598, + "objective/train/value_max": -0.00010389089584350586, + "objective/train/value_min": -0.5849609375, + "objective/train/value_reward_corr": 0.7308202130586332, + "objective/train/value_std": 0.0149078369140625, + "objective/train/weight_avg": 1.0007365942001343, + "objective/train/weighted_lm_loss": 1.7088409662246704, + "objective/train/weights_max": 1.28644597530365, + "objective/train/weights_min": 0.5981242656707764, + "theoretical_loss": 3.720925544707211, + "tokens_seen": 884736000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007392874337987482, + "loss": 0.0761, + "theoretical_loss": 3.720925544707211, + "tokens_seen": 884736000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007392071898571659, + "loss": 0.078, + "theoretical_loss": 3.720819076785419, + "tokens_seen": 884998144 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007391269459155834, + "loss": 0.0741, + "theoretical_loss": 3.720712649222961, + "tokens_seen": 885260288 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007390467019740009, + "loss": 0.0752, + "theoretical_loss": 3.720606261992593, + "tokens_seen": 885522432 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007389664580324186, + "loss": 0.0777, + "theoretical_loss": 3.7204999150670988, + "tokens_seen": 885784576 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007388862140908361, + "loss": 0.0753, + "theoretical_loss": 3.7203936084192866, + "tokens_seen": 886046720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007388059701492537, + "loss": 0.0775, + "theoretical_loss": 3.720287342021992, + "tokens_seen": 886308864 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007387257262076713, + "loss": 0.0748, + "theoretical_loss": 3.720181115848078, + "tokens_seen": 886571008 + }, + { + "epoch": 0.27, + "learning_rate": 0.000738645482266089, + "loss": 0.0742, + "theoretical_loss": 3.7200749298704316, + "tokens_seen": 886833152 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007385652383245065, + "loss": 0.0778, + "theoretical_loss": 3.7199687840619675, + "tokens_seen": 887095296 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007384849943829242, + "loss": 0.0767, + "theoretical_loss": 3.719862678395627, + "tokens_seen": 887357440 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007384047504413417, + "loss": 0.0755, + "theoretical_loss": 3.719756612844377, + "tokens_seen": 887619584 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007383245064997593, + "loss": 0.0738, + "theoretical_loss": 3.7196505873812105, + "tokens_seen": 887881728 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0009603702928870916, + "objective/train/docs_used": 327973, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3757928609848022, + "objective/train/original_loss": 1.3757928609848022, + "objective/train/theoretical_loss": 3.719597589674226, + "objective/train/tokens_used": 908472800, + "objective/train/value_avg": -0.006237030029296875, + "objective/train/value_loss": 0.00027475733077153563, + "objective/train/value_max": -0.00013554096221923828, + "objective/train/value_min": -0.5302734375, + "objective/train/value_reward_corr": 0.5065483364707875, + "objective/train/value_std": 0.0123748779296875, + "objective/train/weight_avg": 1.0010859966278076, + "objective/train/weighted_lm_loss": 1.3766732215881348, + "objective/train/weights_max": 1.5861471891403198, + "objective/train/weights_min": 0.37259092926979065, + "theoretical_loss": 3.719597589674226, + "tokens_seen": 888012800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007382442625581769, + "loss": 0.0765, + "theoretical_loss": 3.7195446019791465, + "tokens_seen": 888143872 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007381640186165944, + "loss": 0.0757, + "theoretical_loss": 3.7194386566112314, + "tokens_seen": 888406016 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007380837746750121, + "loss": 0.0739, + "theoretical_loss": 3.7193327512505356, + "tokens_seen": 888668160 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007380035307334296, + "loss": 0.0773, + "theoretical_loss": 3.7192268858701576, + "tokens_seen": 888930304 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007379232867918472, + "loss": 0.0743, + "theoretical_loss": 3.7191210604432205, + "tokens_seen": 889192448 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007378430428502649, + "loss": 0.0776, + "theoretical_loss": 3.7190152749428735, + "tokens_seen": 889454592 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007377627989086824, + "loss": 0.0758, + "theoretical_loss": 3.7189095293422927, + "tokens_seen": 889716736 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007376825549671, + "loss": 0.0774, + "theoretical_loss": 3.71880382361468, + "tokens_seen": 889978880 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007376023110255176, + "loss": 0.0768, + "theoretical_loss": 3.7186981577332614, + "tokens_seen": 890241024 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007375220670839352, + "loss": 0.0737, + "theoretical_loss": 3.718592531671291, + "tokens_seen": 890503168 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007374418231423527, + "loss": 0.0761, + "theoretical_loss": 3.7184869454020477, + "tokens_seen": 890765312 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007373615792007704, + "loss": 0.075, + "theoretical_loss": 3.7183813988988357, + "tokens_seen": 891027456 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0005341742653399706, + "objective/train/docs_used": 329220, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4387054443359375, + "objective/train/original_loss": 1.4387052059173584, + "objective/train/theoretical_loss": 3.7182758921349865, + "objective/train/tokens_used": 911749600, + "objective/train/value_avg": -0.00934600830078125, + "objective/train/value_loss": 0.0006961746839806437, + "objective/train/value_max": -0.0002532005310058594, + "objective/train/value_min": -0.74853515625, + "objective/train/value_reward_corr": 0.5610245926975079, + "objective/train/value_std": 0.018096923828125, + "objective/train/weight_avg": 1.0008351802825928, + "objective/train/weighted_lm_loss": 1.439525842666626, + "objective/train/weights_max": 1.8936995267868042, + "objective/train/weights_min": 0.36917349696159363, + "theoretical_loss": 3.7182758921349865, + "tokens_seen": 891289600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007372813352591879, + "loss": 0.0782, + "theoretical_loss": 3.7182758921349865, + "tokens_seen": 891289600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007372010913176055, + "loss": 0.0762, + "theoretical_loss": 3.718170425083856, + "tokens_seen": 891551744 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007371208473760232, + "loss": 0.0755, + "theoretical_loss": 3.718064997718826, + "tokens_seen": 891813888 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007370406034344407, + "loss": 0.0755, + "theoretical_loss": 3.7179596100133034, + "tokens_seen": 892076032 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007369603594928584, + "loss": 0.0782, + "theoretical_loss": 3.7178542619407233, + "tokens_seen": 892338176 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007368801155512759, + "loss": 0.0786, + "theoretical_loss": 3.7177489534745427, + "tokens_seen": 892600320 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007367998716096935, + "loss": 0.0789, + "theoretical_loss": 3.717643684588247, + "tokens_seen": 892862464 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007367196276681111, + "loss": 0.077, + "theoretical_loss": 3.7175384552553457, + "tokens_seen": 893124608 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007366393837265286, + "loss": 0.0743, + "theoretical_loss": 3.7174332654493742, + "tokens_seen": 893386752 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007365591397849462, + "loss": 0.0768, + "theoretical_loss": 3.717328115143894, + "tokens_seen": 893648896 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007364788958433638, + "loss": 0.0768, + "theoretical_loss": 3.717223004312491, + "tokens_seen": 893911040 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007363986519017815, + "loss": 0.077, + "theoretical_loss": 3.717117932928777, + "tokens_seen": 894173184 + }, + { + "epoch": 0.27, + "learning_rate": 0.000736318407960199, + "loss": 0.0754, + "theoretical_loss": 3.7170129009663886, + "tokens_seen": 894435328 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0014942382695153356, + "objective/train/docs_used": 330336, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3993629217147827, + "objective/train/original_loss": 1.3993628025054932, + "objective/train/theoretical_loss": 3.7169603997599605, + "objective/train/tokens_used": 915026400, + "objective/train/value_avg": -0.006420135498046875, + "objective/train/value_loss": 0.00015116189024411142, + "objective/train/value_max": -0.00015473365783691406, + "objective/train/value_min": -0.1751708984375, + "objective/train/value_reward_corr": 0.49777587233171894, + "objective/train/value_std": 0.0080718994140625, + "objective/train/weight_avg": 1.0015615224838257, + "objective/train/weighted_lm_loss": 1.4017528295516968, + "objective/train/weights_max": 1.12247896194458, + "objective/train/weights_min": 0.36870554089546204, + "theoretical_loss": 3.7169603997599605, + "tokens_seen": 894566400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007362381640186167, + "loss": 0.0778, + "theoretical_loss": 3.716907908398989, + "tokens_seen": 894697472 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007361579200770342, + "loss": 0.0757, + "theoretical_loss": 3.7168029552002655, + "tokens_seen": 894959616 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007360776761354517, + "loss": 0.0766, + "theoretical_loss": 3.716698041343931, + "tokens_seen": 895221760 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007359974321938694, + "loss": 0.078, + "theoretical_loss": 3.716593166803724, + "tokens_seen": 895483904 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007359171882522869, + "loss": 0.0735, + "theoretical_loss": 3.7164883315534087, + "tokens_seen": 895746048 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007358369443107046, + "loss": 0.0775, + "theoretical_loss": 3.7163835355667723, + "tokens_seen": 896008192 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007357567003691221, + "loss": 0.074, + "theoretical_loss": 3.7162787788176295, + "tokens_seen": 896270336 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007356764564275397, + "loss": 0.0763, + "theoretical_loss": 3.716174061279819, + "tokens_seen": 896532480 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007355962124859574, + "loss": 0.0741, + "theoretical_loss": 3.7160693829272047, + "tokens_seen": 896794624 + }, + { + "epoch": 0.27, + "learning_rate": 0.000735515968544375, + "loss": 0.0747, + "theoretical_loss": 3.715964743733676, + "tokens_seen": 897056768 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007354357246027925, + "loss": 0.0779, + "theoretical_loss": 3.7158601436731464, + "tokens_seen": 897318912 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007353554806612101, + "loss": 0.0751, + "theoretical_loss": 3.715755582719556, + "tokens_seen": 897581056 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0011574587551876903, + "objective/train/docs_used": 331577, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4930155277252197, + "objective/train/original_loss": 1.4930155277252197, + "objective/train/theoretical_loss": 3.7156510608468674, + "objective/train/tokens_used": 918303200, + "objective/train/value_avg": -0.007770538330078125, + "objective/train/value_loss": 0.0005507190944626927, + "objective/train/value_max": -7.843971252441406e-05, + "objective/train/value_min": -0.568359375, + "objective/train/value_reward_corr": 0.6120014802789004, + "objective/train/value_std": 0.01477813720703125, + "objective/train/weight_avg": 1.0013858079910278, + "objective/train/weighted_lm_loss": 1.4944040775299072, + "objective/train/weights_max": 1.3329410552978516, + "objective/train/weights_min": 0.22743035852909088, + "theoretical_loss": 3.7156510608468674, + "tokens_seen": 897843200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007352752367196277, + "loss": 0.0722, + "theoretical_loss": 3.7156510608468674, + "tokens_seen": 897843200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007351949927780452, + "loss": 0.0771, + "theoretical_loss": 3.7155465780290706, + "tokens_seen": 898105344 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007351147488364629, + "loss": 0.0756, + "theoretical_loss": 3.7154421342401793, + "tokens_seen": 898367488 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007350345048948804, + "loss": 0.076, + "theoretical_loss": 3.7153377294542325, + "tokens_seen": 898629632 + }, + { + "epoch": 0.27, + "learning_rate": 0.000734954260953298, + "loss": 0.0757, + "theoretical_loss": 3.715233363645293, + "tokens_seen": 898891776 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007348740170117157, + "loss": 0.0759, + "theoretical_loss": 3.7151290367874497, + "tokens_seen": 899153920 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007347937730701332, + "loss": 0.0747, + "theoretical_loss": 3.715024748854815, + "tokens_seen": 899416064 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007347135291285509, + "loss": 0.0754, + "theoretical_loss": 3.714920499821528, + "tokens_seen": 899678208 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007346332851869684, + "loss": 0.0746, + "theoretical_loss": 3.7148162896617505, + "tokens_seen": 899940352 + }, + { + "epoch": 0.27, + "learning_rate": 0.000734553041245386, + "loss": 0.0742, + "theoretical_loss": 3.714712118349669, + "tokens_seen": 900202496 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007344727973038036, + "loss": 0.0789, + "theoretical_loss": 3.7146079858594976, + "tokens_seen": 900464640 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007343925533622212, + "loss": 0.0753, + "theoretical_loss": 3.714503892165471, + "tokens_seen": 900726784 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007343123094206387, + "loss": 0.0761, + "theoretical_loss": 3.714399837241851, + "tokens_seen": 900988928 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": 0.0022575294133275747, + "objective/train/docs_used": 332819, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6288766860961914, + "objective/train/original_loss": 1.6288766860961914, + "objective/train/theoretical_loss": 3.714347824310907, + "objective/train/tokens_used": 921580000, + "objective/train/value_avg": -0.00806427001953125, + "objective/train/value_loss": 0.00021155290596652776, + "objective/train/value_max": -0.00020182132720947266, + "objective/train/value_min": -0.2548828125, + "objective/train/value_reward_corr": 0.5714205851186827, + "objective/train/value_std": 0.01141357421875, + "objective/train/weight_avg": 1.0023540258407593, + "objective/train/weighted_lm_loss": 1.632721185684204, + "objective/train/weights_max": 1.173399567604065, + "objective/train/weights_min": 0.3694700598716736, + "theoretical_loss": 3.714347824310907, + "tokens_seen": 901120000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007342320654790563, + "loss": 0.0751, + "theoretical_loss": 3.7142958210629233, + "tokens_seen": 901251072 + }, + { + "epoch": 0.27, + "learning_rate": 0.000734151821537474, + "loss": 0.0769, + "theoretical_loss": 3.714191843602998, + "tokens_seen": 901513216 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007340715775958915, + "loss": 0.077, + "theoretical_loss": 3.7140879048364104, + "tokens_seen": 901775360 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007339913336543092, + "loss": 0.0756, + "theoretical_loss": 3.7139840047375183, + "tokens_seen": 902037504 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007339110897127267, + "loss": 0.0761, + "theoretical_loss": 3.713880143280707, + "tokens_seen": 902299648 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007338308457711443, + "loss": 0.076, + "theoretical_loss": 3.713776320440383, + "tokens_seen": 902561792 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007337506018295619, + "loss": 0.0757, + "theoretical_loss": 3.7136725361909795, + "tokens_seen": 902823936 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007336703578879794, + "loss": 0.0765, + "theoretical_loss": 3.713568790506953, + "tokens_seen": 903086080 + }, + { + "epoch": 0.27, + "learning_rate": 0.000733590113946397, + "loss": 0.0747, + "theoretical_loss": 3.7134650833627854, + "tokens_seen": 903348224 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007335098700048146, + "loss": 0.0736, + "theoretical_loss": 3.7133614147329808, + "tokens_seen": 903610368 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007334296260632322, + "loss": 0.0777, + "theoretical_loss": 3.7132577845920696, + "tokens_seen": 903872512 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007333493821216499, + "loss": 0.0761, + "theoretical_loss": 3.7131541929146055, + "tokens_seen": 904134656 + }, + { + "epoch": 0.27, + "objective/train/advantage_avg": -0.001184366992674768, + "objective/train/docs_used": 333943, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4446207284927368, + "objective/train/original_loss": 1.4446208477020264, + "objective/train/theoretical_loss": 3.713050639675166, + "objective/train/tokens_used": 924856800, + "objective/train/value_avg": -0.00861358642578125, + "objective/train/value_loss": 0.00047513179015368223, + "objective/train/value_max": -0.00012826919555664062, + "objective/train/value_min": -0.440185546875, + "objective/train/value_reward_corr": 0.6907593542665094, + "objective/train/value_std": 0.01509857177734375, + "objective/train/weight_avg": 0.9990274310112, + "objective/train/weighted_lm_loss": 1.4427376985549927, + "objective/train/weights_max": 1.3462886810302734, + "objective/train/weights_min": 0.37275585532188416, + "theoretical_loss": 3.713050639675166, + "tokens_seen": 904396800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007332691381800675, + "loss": 0.073, + "theoretical_loss": 3.713050639675166, + "tokens_seen": 904396800 + }, + { + "epoch": 0.27, + "learning_rate": 0.000733188894238485, + "loss": 0.0732, + "theoretical_loss": 3.712947124848354, + "tokens_seen": 904658944 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007331086502969026, + "loss": 0.0772, + "theoretical_loss": 3.7128436484087954, + "tokens_seen": 904921088 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007330284063553202, + "loss": 0.0751, + "theoretical_loss": 3.712740210331141, + "tokens_seen": 905183232 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007329481624137377, + "loss": 0.0769, + "theoretical_loss": 3.712636810590065, + "tokens_seen": 905445376 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007328679184721554, + "loss": 0.0737, + "theoretical_loss": 3.7125334491602664, + "tokens_seen": 905707520 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007327876745305729, + "loss": 0.0745, + "theoretical_loss": 3.712430126016467, + "tokens_seen": 905969664 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007327074305889905, + "loss": 0.0752, + "theoretical_loss": 3.7123268411334136, + "tokens_seen": 906231808 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007326271866474082, + "loss": 0.0738, + "theoretical_loss": 3.7122235944858772, + "tokens_seen": 906493952 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007325469427058258, + "loss": 0.0736, + "theoretical_loss": 3.712120386048652, + "tokens_seen": 906756096 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007324666987642433, + "loss": 0.0761, + "theoretical_loss": 3.712017215796556, + "tokens_seen": 907018240 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007323864548226609, + "loss": 0.0744, + "theoretical_loss": 3.7119140837044315, + "tokens_seen": 907280384 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007323062108810785, + "loss": 0.0736, + "theoretical_loss": 3.7118109897471445, + "tokens_seen": 907542528 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.00023626594338566065, + "objective/train/docs_used": 335091, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4100664854049683, + "objective/train/original_loss": 1.4100666046142578, + "objective/train/theoretical_loss": 3.7117594570612176, + "objective/train/tokens_used": 928133600, + "objective/train/value_avg": -0.006671905517578125, + "objective/train/value_loss": 0.0001805531937861815, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.2276611328125, + "objective/train/value_reward_corr": 0.687647519301903, + "objective/train/value_std": 0.0102081298828125, + "objective/train/weight_avg": 0.9998487234115601, + "objective/train/weighted_lm_loss": 1.4099451303482056, + "objective/train/weights_max": 1.0809279680252075, + "objective/train/weights_min": 0.37694263458251953, + "theoretical_loss": 3.7117594570612176, + "tokens_seen": 907673600 + }, + { + "epoch": 0.28, + "learning_rate": 0.000732225966939496, + "loss": 0.0734, + "theoretical_loss": 3.7117079338995858, + "tokens_seen": 907804672 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007321457229979137, + "loss": 0.0769, + "theoretical_loss": 3.7116049161366673, + "tokens_seen": 908066816 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007320654790563312, + "loss": 0.0729, + "theoretical_loss": 3.7115019364333275, + "tokens_seen": 908328960 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731985235114749, + "loss": 0.0762, + "theoretical_loss": 3.7113989947645276, + "tokens_seen": 908591104 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007319049911731665, + "loss": 0.0758, + "theoretical_loss": 3.711296091105252, + "tokens_seen": 908853248 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731824747231584, + "loss": 0.0724, + "theoretical_loss": 3.7111932254305096, + "tokens_seen": 909115392 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007317445032900017, + "loss": 0.075, + "theoretical_loss": 3.7110903977153313, + "tokens_seen": 909377536 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007316642593484192, + "loss": 0.0759, + "theoretical_loss": 3.710987607934774, + "tokens_seen": 909639680 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007315840154068368, + "loss": 0.0759, + "theoretical_loss": 3.7108848560639167, + "tokens_seen": 909901824 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007315037714652544, + "loss": 0.0752, + "theoretical_loss": 3.7107821420778615, + "tokens_seen": 910163968 + }, + { + "epoch": 0.28, + "learning_rate": 0.000731423527523672, + "loss": 0.0747, + "theoretical_loss": 3.7106794659517357, + "tokens_seen": 910426112 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007313432835820895, + "loss": 0.073, + "theoretical_loss": 3.7105768276606885, + "tokens_seen": 910688256 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0003594663867261261, + "objective/train/docs_used": 336141, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.548851728439331, + "objective/train/original_loss": 1.548851490020752, + "objective/train/theoretical_loss": 3.710474227179893, + "objective/train/tokens_used": 931410400, + "objective/train/value_avg": -0.005245208740234375, + "objective/train/value_loss": 0.00010473921429365873, + "objective/train/value_max": -6.973743438720703e-05, + "objective/train/value_min": -0.466552734375, + "objective/train/value_reward_corr": 0.6725354994272675, + "objective/train/value_std": 0.0080413818359375, + "objective/train/weight_avg": 1.0004104375839233, + "objective/train/weighted_lm_loss": 1.550046443939209, + "objective/train/weights_max": 1.089442253112793, + "objective/train/weights_min": 0.6256026029586792, + "theoretical_loss": 3.710474227179893, + "tokens_seen": 910950400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007312630396405071, + "loss": 0.0742, + "theoretical_loss": 3.710474227179893, + "tokens_seen": 910950400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007311827956989248, + "loss": 0.0742, + "theoretical_loss": 3.710371664484547, + "tokens_seen": 911212544 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007311025517573423, + "loss": 0.0715, + "theoretical_loss": 3.710269139549869, + "tokens_seen": 911474688 + }, + { + "epoch": 0.28, + "learning_rate": 0.00073102230781576, + "loss": 0.0725, + "theoretical_loss": 3.7101666523511034, + "tokens_seen": 911736832 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007309420638741775, + "loss": 0.0749, + "theoretical_loss": 3.710064202863517, + "tokens_seen": 911998976 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007308618199325952, + "loss": 0.077, + "theoretical_loss": 3.7099617910623994, + "tokens_seen": 912261120 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007307815759910127, + "loss": 0.0755, + "theoretical_loss": 3.7098594169230648, + "tokens_seen": 912523264 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007307013320494302, + "loss": 0.0731, + "theoretical_loss": 3.7097570804208497, + "tokens_seen": 912785408 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007306210881078479, + "loss": 0.0741, + "theoretical_loss": 3.709654781531113, + "tokens_seen": 913047552 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007305408441662654, + "loss": 0.0752, + "theoretical_loss": 3.709552520229239, + "tokens_seen": 913309696 + }, + { + "epoch": 0.28, + "learning_rate": 0.000730460600224683, + "loss": 0.0742, + "theoretical_loss": 3.7094502964906337, + "tokens_seen": 913571840 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007303803562831007, + "loss": 0.0742, + "theoretical_loss": 3.709348110290726, + "tokens_seen": 913833984 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007303001123415183, + "loss": 0.0744, + "theoretical_loss": 3.7092459616049682, + "tokens_seen": 914096128 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0010315380059182644, + "objective/train/docs_used": 337261, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.495456576347351, + "objective/train/original_loss": 1.4954564571380615, + "objective/train/theoretical_loss": 3.709194901322231, + "objective/train/tokens_used": 934687200, + "objective/train/value_avg": -0.01190185546875, + "objective/train/value_loss": 0.00031099331681616604, + "objective/train/value_max": -0.0002453327178955078, + "objective/train/value_min": -0.70556640625, + "objective/train/value_reward_corr": 0.7287494601771468, + "objective/train/value_std": 0.0191650390625, + "objective/train/weight_avg": 1.0011770725250244, + "objective/train/weighted_lm_loss": 1.498216986656189, + "objective/train/weights_max": 1.606416940689087, + "objective/train/weights_min": 0.3689524829387665, + "theoretical_loss": 3.709194901322231, + "tokens_seen": 914227200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007302198683999358, + "loss": 0.0747, + "theoretical_loss": 3.709143850408837, + "tokens_seen": 914358272 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007301396244583534, + "loss": 0.0742, + "theoretical_loss": 3.7090417766778305, + "tokens_seen": 914620416 + }, + { + "epoch": 0.28, + "learning_rate": 0.000730059380516771, + "loss": 0.0738, + "theoretical_loss": 3.7089397403874704, + "tokens_seen": 914882560 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007299791365751885, + "loss": 0.0742, + "theoretical_loss": 3.708837741513301, + "tokens_seen": 915144704 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007298988926336062, + "loss": 0.0752, + "theoretical_loss": 3.7087357800308904, + "tokens_seen": 915406848 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007298186486920237, + "loss": 0.0735, + "theoretical_loss": 3.708633855915829, + "tokens_seen": 915668992 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007297384047504413, + "loss": 0.0742, + "theoretical_loss": 3.708531969143731, + "tokens_seen": 915931136 + }, + { + "epoch": 0.28, + "learning_rate": 0.000729658160808859, + "loss": 0.0769, + "theoretical_loss": 3.708430119690232, + "tokens_seen": 916193280 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007295779168672765, + "loss": 0.075, + "theoretical_loss": 3.708328307530991, + "tokens_seen": 916455424 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007294976729256942, + "loss": 0.0736, + "theoretical_loss": 3.7082265326416914, + "tokens_seen": 916717568 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007294174289841117, + "loss": 0.0733, + "theoretical_loss": 3.708124794998037, + "tokens_seen": 916979712 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007293371850425293, + "loss": 0.0724, + "theoretical_loss": 3.708023094575756, + "tokens_seen": 917241856 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0011083170538768172, + "objective/train/docs_used": 338425, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5612324476242065, + "objective/train/original_loss": 1.561232328414917, + "objective/train/theoretical_loss": 3.7079214313505986, + "objective/train/tokens_used": 937964000, + "objective/train/value_avg": -0.0082550048828125, + "objective/train/value_loss": 0.0003109085373580456, + "objective/train/value_max": -0.00016868114471435547, + "objective/train/value_min": -0.955078125, + "objective/train/value_reward_corr": 0.687470457987905, + "objective/train/value_std": 0.0196990966796875, + "objective/train/weight_avg": 1.0012561082839966, + "objective/train/weighted_lm_loss": 1.562901258468628, + "objective/train/weights_max": 2.4972524642944336, + "objective/train/weights_min": 0.38558143377304077, + "theoretical_loss": 3.7079214313505986, + "tokens_seen": 917504000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007292569411009469, + "loss": 0.0757, + "theoretical_loss": 3.7079214313505986, + "tokens_seen": 917504000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007291766971593645, + "loss": 0.073, + "theoretical_loss": 3.707819805298338, + "tokens_seen": 917766144 + }, + { + "epoch": 0.28, + "learning_rate": 0.000729096453217782, + "loss": 0.0754, + "theoretical_loss": 3.7077182163947704, + "tokens_seen": 918028288 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007290162092761998, + "loss": 0.0749, + "theoretical_loss": 3.7076166646157134, + "tokens_seen": 918290432 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007289359653346173, + "loss": 0.075, + "theoretical_loss": 3.7075151499370094, + "tokens_seen": 918552576 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007288557213930348, + "loss": 0.0769, + "theoretical_loss": 3.7074136723345212, + "tokens_seen": 918814720 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007287754774514525, + "loss": 0.0771, + "theoretical_loss": 3.707312231784136, + "tokens_seen": 919076864 + }, + { + "epoch": 0.28, + "learning_rate": 0.00072869523350987, + "loss": 0.0746, + "theoretical_loss": 3.7072108282617617, + "tokens_seen": 919339008 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007286149895682876, + "loss": 0.0757, + "theoretical_loss": 3.7071094617433307, + "tokens_seen": 919601152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007285347456267052, + "loss": 0.075, + "theoretical_loss": 3.707008132204796, + "tokens_seen": 919863296 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007284545016851228, + "loss": 0.0733, + "theoretical_loss": 3.7069068396221345, + "tokens_seen": 920125440 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007283742577435403, + "loss": 0.0733, + "theoretical_loss": 3.706805583971345, + "tokens_seen": 920387584 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007282940138019579, + "loss": 0.0757, + "theoretical_loss": 3.7067043652284495, + "tokens_seen": 920649728 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.00018246793479193002, + "objective/train/docs_used": 339576, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.468998908996582, + "objective/train/original_loss": 1.468998908996582, + "objective/train/theoretical_loss": 3.706653769689974, + "objective/train/tokens_used": 941240800, + "objective/train/value_avg": -0.0093231201171875, + "objective/train/value_loss": 0.0003314831992611289, + "objective/train/value_max": -0.00017261505126953125, + "objective/train/value_min": -0.313720703125, + "objective/train/value_reward_corr": 0.6134654432248161, + "objective/train/value_std": 0.0128936767578125, + "objective/train/weight_avg": 0.9999735355377197, + "objective/train/weighted_lm_loss": 1.4704101085662842, + "objective/train/weights_max": 1.2405765056610107, + "objective/train/weights_min": 0.38339895009994507, + "theoretical_loss": 3.706653769689974, + "tokens_seen": 920780800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007282137698603756, + "loss": 0.0812, + "theoretical_loss": 3.7066031833694906, + "tokens_seen": 920911872 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007281335259187932, + "loss": 0.0766, + "theoretical_loss": 3.7065020383705347, + "tokens_seen": 921174016 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007280532819772108, + "loss": 0.072, + "theoretical_loss": 3.70640093020767, + "tokens_seen": 921436160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007279730380356283, + "loss": 0.0763, + "theoretical_loss": 3.7062998588570073, + "tokens_seen": 921698304 + }, + { + "epoch": 0.28, + "learning_rate": 0.000727892794094046, + "loss": 0.0741, + "theoretical_loss": 3.7061988242946793, + "tokens_seen": 921960448 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007278125501524635, + "loss": 0.0745, + "theoretical_loss": 3.7060978264968423, + "tokens_seen": 922222592 + }, + { + "epoch": 0.28, + "learning_rate": 0.000727732306210881, + "loss": 0.0754, + "theoretical_loss": 3.705996865439672, + "tokens_seen": 922484736 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007276520622692987, + "loss": 0.0728, + "theoretical_loss": 3.7058959410993695, + "tokens_seen": 922746880 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007275718183277162, + "loss": 0.0744, + "theoretical_loss": 3.7057950534521558, + "tokens_seen": 923009024 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007274915743861338, + "loss": 0.0764, + "theoretical_loss": 3.705694202474275, + "tokens_seen": 923271168 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007274113304445515, + "loss": 0.0743, + "theoretical_loss": 3.7055933881419936, + "tokens_seen": 923533312 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007273310865029691, + "loss": 0.0708, + "theoretical_loss": 3.7054926104315995, + "tokens_seen": 923795456 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0013274255907163024, + "objective/train/docs_used": 340768, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6183089017868042, + "objective/train/original_loss": 1.6183090209960938, + "objective/train/theoretical_loss": 3.705391869319403, + "objective/train/tokens_used": 944517600, + "objective/train/value_avg": -0.00782012939453125, + "objective/train/value_loss": 0.00026030809385702014, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.3779296875, + "objective/train/value_reward_corr": 0.6056620533963706, + "objective/train/value_std": 0.0130767822265625, + "objective/train/weight_avg": 1.0014489889144897, + "objective/train/weighted_lm_loss": 1.620178461074829, + "objective/train/weights_max": 1.4343163967132568, + "objective/train/weights_min": 0.37169942259788513, + "theoretical_loss": 3.705391869319403, + "tokens_seen": 924057600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007272508425613866, + "loss": 0.0733, + "theoretical_loss": 3.705391869319403, + "tokens_seen": 924057600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007271705986198042, + "loss": 0.0717, + "theoretical_loss": 3.7052911647817357, + "tokens_seen": 924319744 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007270903546782218, + "loss": 0.0754, + "theoretical_loss": 3.7051904967949527, + "tokens_seen": 924581888 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007270101107366394, + "loss": 0.0712, + "theoretical_loss": 3.7050898653354296, + "tokens_seen": 924844032 + }, + { + "epoch": 0.28, + "learning_rate": 0.000726929866795057, + "loss": 0.0747, + "theoretical_loss": 3.7049892703795653, + "tokens_seen": 925106176 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007268496228534745, + "loss": 0.0742, + "theoretical_loss": 3.70488871190378, + "tokens_seen": 925368320 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007267693789118923, + "loss": 0.0738, + "theoretical_loss": 3.704788189884515, + "tokens_seen": 925630464 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007266891349703098, + "loss": 0.0755, + "theoretical_loss": 3.7046877042982347, + "tokens_seen": 925892608 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007266088910287273, + "loss": 0.076, + "theoretical_loss": 3.7045872551214254, + "tokens_seen": 926154752 + }, + { + "epoch": 0.28, + "learning_rate": 0.000726528647087145, + "loss": 0.072, + "theoretical_loss": 3.704486842330594, + "tokens_seen": 926416896 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007264484031455625, + "loss": 0.074, + "theoretical_loss": 3.7043864659022696, + "tokens_seen": 926679040 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007263681592039801, + "loss": 0.0717, + "theoretical_loss": 3.704286125813004, + "tokens_seen": 926941184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007262879152623977, + "loss": 0.074, + "theoretical_loss": 3.7041858220393706, + "tokens_seen": 927203328 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": -0.0009031386580318213, + "objective/train/docs_used": 341815, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.391129970550537, + "objective/train/original_loss": 1.391129970550537, + "objective/train/theoretical_loss": 3.7041356837636013, + "objective/train/tokens_used": 947794400, + "objective/train/value_avg": -0.0078125, + "objective/train/value_loss": 0.0002524466544855386, + "objective/train/value_max": -8.481740951538086e-05, + "objective/train/value_min": -0.256591796875, + "objective/train/value_reward_corr": 0.7173105114970126, + "objective/train/value_std": 0.012786865234375, + "objective/train/weight_avg": 0.9992140531539917, + "objective/train/weighted_lm_loss": 1.3906961679458618, + "objective/train/weights_max": 1.1406426429748535, + "objective/train/weights_min": 0.3694460988044739, + "theoretical_loss": 3.7041356837636013, + "tokens_seen": 927334400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007262076713208153, + "loss": 0.0731, + "theoretical_loss": 3.704085554557964, + "tokens_seen": 927465472 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007261274273792328, + "loss": 0.0729, + "theoretical_loss": 3.703985323345399, + "tokens_seen": 927727616 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007260471834376505, + "loss": 0.0707, + "theoretical_loss": 3.7038851283783156, + "tokens_seen": 927989760 + }, + { + "epoch": 0.28, + "learning_rate": 0.000725966939496068, + "loss": 0.0748, + "theoretical_loss": 3.7037849696333724, + "tokens_seen": 928251904 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007258866955544856, + "loss": 0.0767, + "theoretical_loss": 3.703684847087251, + "tokens_seen": 928514048 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007258064516129033, + "loss": 0.0762, + "theoretical_loss": 3.7035847607166534, + "tokens_seen": 928776192 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007257262076713208, + "loss": 0.0735, + "theoretical_loss": 3.703484710498306, + "tokens_seen": 929038336 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007256459637297385, + "loss": 0.0802, + "theoretical_loss": 3.703384696408953, + "tokens_seen": 929300480 + }, + { + "epoch": 0.28, + "learning_rate": 0.000725565719788156, + "loss": 0.0735, + "theoretical_loss": 3.7032847184253628, + "tokens_seen": 929562624 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007254854758465736, + "loss": 0.0732, + "theoretical_loss": 3.7031847765243233, + "tokens_seen": 929824768 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007254052319049912, + "loss": 0.0713, + "theoretical_loss": 3.7030848706826465, + "tokens_seen": 930086912 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007253249879634087, + "loss": 0.0751, + "theoretical_loss": 3.702985000877163, + "tokens_seen": 930349056 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.002034248784184456, + "objective/train/docs_used": 343003, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.581547737121582, + "objective/train/original_loss": 1.581547737121582, + "objective/train/theoretical_loss": 3.7028851670847267, + "objective/train/tokens_used": 951071200, + "objective/train/value_avg": -0.007381439208984375, + "objective/train/value_loss": 0.0002921258274000138, + "objective/train/value_max": -0.00011324882507324219, + "objective/train/value_min": -0.54833984375, + "objective/train/value_reward_corr": 0.569236282685358, + "objective/train/value_std": 0.01422882080078125, + "objective/train/weight_avg": 1.002168893814087, + "objective/train/weighted_lm_loss": 1.5849435329437256, + "objective/train/weights_max": 1.5144890546798706, + "objective/train/weights_min": 0.36928337812423706, + "theoretical_loss": 3.7028851670847267, + "tokens_seen": 930611200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007252447440218264, + "loss": 0.0746, + "theoretical_loss": 3.7028851670847267, + "tokens_seen": 930611200 + }, + { + "epoch": 0.28, + "learning_rate": 0.000725164500080244, + "loss": 0.0729, + "theoretical_loss": 3.7027853692822124, + "tokens_seen": 930873344 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007250842561386616, + "loss": 0.076, + "theoretical_loss": 3.702685607446516, + "tokens_seen": 931135488 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007250040121970791, + "loss": 0.0761, + "theoretical_loss": 3.7025858815545543, + "tokens_seen": 931397632 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007249237682554968, + "loss": 0.0746, + "theoretical_loss": 3.7024861915832665, + "tokens_seen": 931659776 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007248435243139143, + "loss": 0.0739, + "theoretical_loss": 3.7023865375096126, + "tokens_seen": 931921920 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007247632803723318, + "loss": 0.0758, + "theoretical_loss": 3.7022869193105734, + "tokens_seen": 932184064 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007246830364307495, + "loss": 0.073, + "theoretical_loss": 3.702187336963151, + "tokens_seen": 932446208 + }, + { + "epoch": 0.28, + "learning_rate": 0.000724602792489167, + "loss": 0.0742, + "theoretical_loss": 3.70208779044437, + "tokens_seen": 932708352 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007245225485475848, + "loss": 0.0719, + "theoretical_loss": 3.7019882797312746, + "tokens_seen": 932970496 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007244423046060023, + "loss": 0.0795, + "theoretical_loss": 3.701888804800931, + "tokens_seen": 933232640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007243620606644199, + "loss": 0.0709, + "theoretical_loss": 3.701789365630426, + "tokens_seen": 933494784 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007242818167228375, + "loss": 0.0728, + "theoretical_loss": 3.701689962196868, + "tokens_seen": 933756928 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0019412703113630414, + "objective/train/docs_used": 344002, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6019786596298218, + "objective/train/original_loss": 1.6019787788391113, + "objective/train/theoretical_loss": 3.7016402738742964, + "objective/train/tokens_used": 954348000, + "objective/train/value_avg": -0.00753021240234375, + "objective/train/value_loss": 0.00015285314293578267, + "objective/train/value_max": -0.00015234947204589844, + "objective/train/value_min": -0.267822265625, + "objective/train/value_reward_corr": 0.5820011602921469, + "objective/train/value_std": 0.0103759765625, + "objective/train/weight_avg": 1.0020129680633545, + "objective/train/weighted_lm_loss": 1.6059612035751343, + "objective/train/weights_max": 1.2121343612670898, + "objective/train/weights_min": 0.3773167133331299, + "theoretical_loss": 3.7016402738742964, + "tokens_seen": 933888000 + }, + { + "epoch": 0.28, + "learning_rate": 0.000724201572781255, + "loss": 0.0768, + "theoretical_loss": 3.701590594477387, + "tokens_seen": 934019072 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007241213288396726, + "loss": 0.0769, + "theoretical_loss": 3.701491262449131, + "tokens_seen": 934281216 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007240410848980902, + "loss": 0.0765, + "theoretical_loss": 3.7013919660892736, + "tokens_seen": 934543360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007239608409565078, + "loss": 0.0742, + "theoretical_loss": 3.701292705375006, + "tokens_seen": 934805504 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007238805970149253, + "loss": 0.0755, + "theoretical_loss": 3.701193480283542, + "tokens_seen": 935067648 + }, + { + "epoch": 0.28, + "learning_rate": 0.000723800353073343, + "loss": 0.0718, + "theoretical_loss": 3.701094290792116, + "tokens_seen": 935329792 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007237201091317606, + "loss": 0.0754, + "theoretical_loss": 3.7009951368779825, + "tokens_seen": 935591936 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007236398651901781, + "loss": 0.073, + "theoretical_loss": 3.700896018518418, + "tokens_seen": 935854080 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007235596212485958, + "loss": 0.0756, + "theoretical_loss": 3.700796935690719, + "tokens_seen": 936116224 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007234793773070133, + "loss": 0.0737, + "theoretical_loss": 3.700697888372204, + "tokens_seen": 936378368 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007233991333654309, + "loss": 0.0746, + "theoretical_loss": 3.7005988765402114, + "tokens_seen": 936640512 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007233188894238485, + "loss": 0.0737, + "theoretical_loss": 3.700499900172101, + "tokens_seen": 936902656 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0009387860773131251, + "objective/train/docs_used": 344763, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4261451959609985, + "objective/train/original_loss": 1.426145315170288, + "objective/train/theoretical_loss": 3.700400959245252, + "objective/train/tokens_used": 957624800, + "objective/train/value_avg": -0.01166534423828125, + "objective/train/value_loss": 0.0004122898681089282, + "objective/train/value_max": -0.0001398324966430664, + "objective/train/value_min": -0.81689453125, + "objective/train/value_reward_corr": 0.8483639387700215, + "objective/train/value_std": 0.03436279296875, + "objective/train/weight_avg": 1.0011343955993652, + "objective/train/weighted_lm_loss": 1.4269537925720215, + "objective/train/weights_max": 1.7235952615737915, + "objective/train/weights_min": 0.3695659041404724, + "theoretical_loss": 3.700400959245252, + "tokens_seen": 937164800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007232386454822661, + "loss": 0.0716, + "theoretical_loss": 3.700400959245252, + "tokens_seen": 937164800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007231584015406837, + "loss": 0.0732, + "theoretical_loss": 3.7003020537370657, + "tokens_seen": 937426944 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007230781575991013, + "loss": 0.0736, + "theoretical_loss": 3.7002031836249643, + "tokens_seen": 937689088 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007229979136575189, + "loss": 0.0756, + "theoretical_loss": 3.7001043488863896, + "tokens_seen": 937951232 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007229176697159365, + "loss": 0.0753, + "theoretical_loss": 3.7000055494988047, + "tokens_seen": 938213376 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007228374257743541, + "loss": 0.0772, + "theoretical_loss": 3.6999067854396936, + "tokens_seen": 938475520 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007227571818327716, + "loss": 0.0715, + "theoretical_loss": 3.6998080566865608, + "tokens_seen": 938737664 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007226769378911893, + "loss": 0.0738, + "theoretical_loss": 3.6997093632169307, + "tokens_seen": 938999808 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007225966939496068, + "loss": 0.0768, + "theoretical_loss": 3.699610705008349, + "tokens_seen": 939261952 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007225164500080244, + "loss": 0.0707, + "theoretical_loss": 3.6995120820383818, + "tokens_seen": 939524096 + }, + { + "epoch": 0.28, + "learning_rate": 0.000722436206066442, + "loss": 0.074, + "theoretical_loss": 3.6994134942846157, + "tokens_seen": 939786240 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007223559621248595, + "loss": 0.0758, + "theoretical_loss": 3.6993149417246576, + "tokens_seen": 940048384 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007222757181832772, + "loss": 0.0737, + "theoretical_loss": 3.699216424336135, + "tokens_seen": 940310528 + }, + { + "epoch": 0.28, + "objective/train/advantage_avg": 0.0008543147123418748, + "objective/train/docs_used": 345855, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.430222988128662, + "objective/train/original_loss": 1.430222988128662, + "objective/train/theoretical_loss": 3.6991671788241764, + "objective/train/tokens_used": 960901600, + "objective/train/value_avg": -0.00884246826171875, + "objective/train/value_loss": 0.00023561967827845365, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.6904296875, + "objective/train/value_reward_corr": 0.6824104864233338, + "objective/train/value_std": 0.0145416259765625, + "objective/train/weight_avg": 1.0009649991989136, + "objective/train/weighted_lm_loss": 1.4308393001556396, + "objective/train/weights_max": 1.3335462808609009, + "objective/train/weights_min": 0.3730488717556, + "theoretical_loss": 3.6991671788241764, + "tokens_seen": 940441600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007221954742416948, + "loss": 0.0746, + "theoretical_loss": 3.6991179420966964, + "tokens_seen": 940572672 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007221152303001124, + "loss": 0.0729, + "theoretical_loss": 3.69901949498401, + "tokens_seen": 940834816 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007220349863585299, + "loss": 0.0759, + "theoretical_loss": 3.6989210829757644, + "tokens_seen": 941096960 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007219547424169476, + "loss": 0.0758, + "theoretical_loss": 3.6988227060496692, + "tokens_seen": 941359104 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007218744984753651, + "loss": 0.0761, + "theoretical_loss": 3.6987243641834535, + "tokens_seen": 941621248 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007217942545337827, + "loss": 0.0743, + "theoretical_loss": 3.6986260573548675, + "tokens_seen": 941883392 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007217140105922003, + "loss": 0.0746, + "theoretical_loss": 3.698527785541682, + "tokens_seen": 942145536 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007216337666506178, + "loss": 0.0743, + "theoretical_loss": 3.6984295487216867, + "tokens_seen": 942407680 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007215535227090356, + "loss": 0.0746, + "theoretical_loss": 3.6983313468726924, + "tokens_seen": 942669824 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007214732787674531, + "loss": 0.0773, + "theoretical_loss": 3.6982331799725303, + "tokens_seen": 942931968 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007213930348258707, + "loss": 0.0734, + "theoretical_loss": 3.6981350479990525, + "tokens_seen": 943194112 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007213127908842883, + "loss": 0.0781, + "theoretical_loss": 3.6980369509301285, + "tokens_seen": 943456256 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0011019601952284575, + "objective/train/docs_used": 346959, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3961070775985718, + "objective/train/original_loss": 1.3961069583892822, + "objective/train/theoretical_loss": 3.6979388887436517, + "objective/train/tokens_used": 964178400, + "objective/train/value_avg": -0.00862884521484375, + "objective/train/value_loss": 0.0001508643908891827, + "objective/train/value_max": -0.00017535686492919922, + "objective/train/value_min": -0.24072265625, + "objective/train/value_reward_corr": 0.7373741548355937, + "objective/train/value_std": 0.012786865234375, + "objective/train/weight_avg": 1.0011765956878662, + "objective/train/weighted_lm_loss": 1.3976231813430786, + "objective/train/weights_max": 1.2201510667800903, + "objective/train/weights_min": 0.8218066692352295, + "theoretical_loss": 3.6979388887436517, + "tokens_seen": 943718400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007212325469427058, + "loss": 0.0758, + "theoretical_loss": 3.6979388887436517, + "tokens_seen": 943718400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007211523030011234, + "loss": 0.0759, + "theoretical_loss": 3.697840861417533, + "tokens_seen": 943980544 + }, + { + "epoch": 0.29, + "learning_rate": 0.000721072059059541, + "loss": 0.0755, + "theoretical_loss": 3.697742868929704, + "tokens_seen": 944242688 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007209918151179586, + "loss": 0.0745, + "theoretical_loss": 3.6976449112581173, + "tokens_seen": 944504832 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007209115711763761, + "loss": 0.0746, + "theoretical_loss": 3.697546988380744, + "tokens_seen": 944766976 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007208313272347939, + "loss": 0.076, + "theoretical_loss": 3.697449100275577, + "tokens_seen": 945029120 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007207510832932114, + "loss": 0.0726, + "theoretical_loss": 3.6973512469206278, + "tokens_seen": 945291264 + }, + { + "epoch": 0.29, + "learning_rate": 0.000720670839351629, + "loss": 0.0733, + "theoretical_loss": 3.6972534282939282, + "tokens_seen": 945553408 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007205905954100466, + "loss": 0.0726, + "theoretical_loss": 3.6971556443735314, + "tokens_seen": 945815552 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007205103514684641, + "loss": 0.0711, + "theoretical_loss": 3.697057895137508, + "tokens_seen": 946077696 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007204301075268818, + "loss": 0.0772, + "theoretical_loss": 3.696960180563951, + "tokens_seen": 946339840 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007203498635852993, + "loss": 0.0752, + "theoretical_loss": 3.6968625006309717, + "tokens_seen": 946601984 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007202696196437169, + "loss": 0.0746, + "theoretical_loss": 3.6967648553167014, + "tokens_seen": 946864128 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": -8.704046194907278e-05, + "objective/train/docs_used": 348186, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.480729579925537, + "objective/train/original_loss": 1.4807298183441162, + "objective/train/theoretical_loss": 3.696716045634754, + "objective/train/tokens_used": 967455200, + "objective/train/value_avg": -0.0102691650390625, + "objective/train/value_loss": 0.0004358619044069201, + "objective/train/value_max": -0.00021660327911376953, + "objective/train/value_min": -0.783203125, + "objective/train/value_reward_corr": 0.7756683391643069, + "objective/train/value_std": 0.0248260498046875, + "objective/train/weight_avg": 1.0001178979873657, + "objective/train/weighted_lm_loss": 1.4807584285736084, + "objective/train/weights_max": 2.014146089553833, + "objective/train/weights_min": 0.3976996839046478, + "theoretical_loss": 3.696716045634754, + "tokens_seen": 946995200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007201893757021345, + "loss": 0.0738, + "theoretical_loss": 3.696667244599292, + "tokens_seen": 947126272 + }, + { + "epoch": 0.29, + "learning_rate": 0.000720109131760552, + "loss": 0.0759, + "theoretical_loss": 3.6965696684569154, + "tokens_seen": 947388416 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007200288878189697, + "loss": 0.0763, + "theoretical_loss": 3.6964721268677616, + "tokens_seen": 947650560 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007199486438773873, + "loss": 0.0744, + "theoretical_loss": 3.696374619810043, + "tokens_seen": 947912704 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007198683999358049, + "loss": 0.0715, + "theoretical_loss": 3.6962771472619886, + "tokens_seen": 948174848 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007197881559942224, + "loss": 0.0743, + "theoretical_loss": 3.69617970920185, + "tokens_seen": 948436992 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007197079120526401, + "loss": 0.0766, + "theoretical_loss": 3.6960823056078973, + "tokens_seen": 948699136 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007196276681110576, + "loss": 0.0765, + "theoretical_loss": 3.6959849364584203, + "tokens_seen": 948961280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007195474241694752, + "loss": 0.0751, + "theoretical_loss": 3.695887601731728, + "tokens_seen": 949223424 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007194671802278928, + "loss": 0.0765, + "theoretical_loss": 3.69579030140615, + "tokens_seen": 949485568 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007193869362863103, + "loss": 0.0751, + "theoretical_loss": 3.6956930354600352, + "tokens_seen": 949747712 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007193066923447281, + "loss": 0.075, + "theoretical_loss": 3.6955958038717522, + "tokens_seen": 950009856 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0015586912631988525, + "objective/train/docs_used": 349336, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4084975719451904, + "objective/train/original_loss": 1.4084975719451904, + "objective/train/theoretical_loss": 3.695498606619688, + "objective/train/tokens_used": 970732000, + "objective/train/value_avg": -0.00797271728515625, + "objective/train/value_loss": 0.0002061313862213865, + "objective/train/value_max": -0.00015115737915039062, + "objective/train/value_min": -0.53125, + "objective/train/value_reward_corr": 0.6561778993169798, + "objective/train/value_std": 0.0132598876953125, + "objective/train/weight_avg": 1.0016520023345947, + "objective/train/weighted_lm_loss": 1.411009669303894, + "objective/train/weights_max": 1.2708643674850464, + "objective/train/weights_min": 0.36876535415649414, + "theoretical_loss": 3.695498606619688, + "tokens_seen": 950272000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007192264484031456, + "loss": 0.0747, + "theoretical_loss": 3.695498606619688, + "tokens_seen": 950272000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007191462044615632, + "loss": 0.0728, + "theoretical_loss": 3.6954014436822513, + "tokens_seen": 950534144 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007190659605199808, + "loss": 0.0749, + "theoretical_loss": 3.695304315037868, + "tokens_seen": 950796288 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007189857165783984, + "loss": 0.0747, + "theoretical_loss": 3.6952072206649857, + "tokens_seen": 951058432 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007189054726368159, + "loss": 0.075, + "theoretical_loss": 3.695110160542069, + "tokens_seen": 951320576 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007188252286952335, + "loss": 0.0744, + "theoretical_loss": 3.6950131346476054, + "tokens_seen": 951582720 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007187449847536511, + "loss": 0.0755, + "theoretical_loss": 3.694916142960098, + "tokens_seen": 951844864 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007186647408120686, + "loss": 0.0723, + "theoretical_loss": 3.6948191854580728, + "tokens_seen": 952107008 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007185844968704864, + "loss": 0.0738, + "theoretical_loss": 3.694722262120072, + "tokens_seen": 952369152 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007185042529289039, + "loss": 0.0752, + "theoretical_loss": 3.6946253729246594, + "tokens_seen": 952631296 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007184240089873215, + "loss": 0.0731, + "theoretical_loss": 3.6945285178504172, + "tokens_seen": 952893440 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007183437650457391, + "loss": 0.0729, + "theoretical_loss": 3.694431696875948, + "tokens_seen": 953155584 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007182635211041566, + "loss": 0.0778, + "theoretical_loss": 3.6943349099798715, + "tokens_seen": 953417728 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0013693609507754445, + "objective/train/docs_used": 350619, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3620078563690186, + "objective/train/original_loss": 1.3620076179504395, + "objective/train/theoretical_loss": 3.694286529304555, + "objective/train/tokens_used": 974008800, + "objective/train/value_avg": -0.005817413330078125, + "objective/train/value_loss": 0.00011372018343536183, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.55126953125, + "objective/train/value_reward_corr": 0.6999231970698898, + "objective/train/value_std": 0.009918212890625, + "objective/train/weight_avg": 1.0014216899871826, + "objective/train/weighted_lm_loss": 1.3646951913833618, + "objective/train/weights_max": 1.1521174907684326, + "objective/train/weights_min": 0.3827209174633026, + "theoretical_loss": 3.694286529304555, + "tokens_seen": 953548800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007181832771625743, + "loss": 0.0708, + "theoretical_loss": 3.6942381571408287, + "tokens_seen": 953679872 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007181030332209918, + "loss": 0.0761, + "theoretical_loss": 3.6941414383374793, + "tokens_seen": 953942016 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007180227892794094, + "loss": 0.0726, + "theoretical_loss": 3.6940447535485026, + "tokens_seen": 954204160 + }, + { + "epoch": 0.29, + "learning_rate": 0.000717942545337827, + "loss": 0.0736, + "theoretical_loss": 3.6939481027525956, + "tokens_seen": 954466304 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007178623013962447, + "loss": 0.074, + "theoretical_loss": 3.6938514859284766, + "tokens_seen": 954728448 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007177820574546622, + "loss": 0.0741, + "theoretical_loss": 3.6937549030548813, + "tokens_seen": 954990592 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007177018135130798, + "loss": 0.0749, + "theoretical_loss": 3.693658354110565, + "tokens_seen": 955252736 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007176215695714974, + "loss": 0.0737, + "theoretical_loss": 3.6935618390743032, + "tokens_seen": 955514880 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007175413256299149, + "loss": 0.0743, + "theoretical_loss": 3.6934653579248886, + "tokens_seen": 955777024 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007174610816883326, + "loss": 0.0729, + "theoretical_loss": 3.693368910641135, + "tokens_seen": 956039168 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007173808377467501, + "loss": 0.0761, + "theoretical_loss": 3.693272497201874, + "tokens_seen": 956301312 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007173005938051677, + "loss": 0.0777, + "theoretical_loss": 3.6931761175859554, + "tokens_seen": 956563456 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0013215347426012158, + "objective/train/docs_used": 351696, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4953937530517578, + "objective/train/original_loss": 1.4953936338424683, + "objective/train/theoretical_loss": 3.693079771772251, + "objective/train/tokens_used": 977285600, + "objective/train/value_avg": -0.005706787109375, + "objective/train/value_loss": 0.00014413167082238942, + "objective/train/value_max": -0.00010889768600463867, + "objective/train/value_min": -0.2381591796875, + "objective/train/value_reward_corr": 0.5217617356406479, + "objective/train/value_std": 0.00839996337890625, + "objective/train/weight_avg": 1.0013859272003174, + "objective/train/weighted_lm_loss": 1.4972920417785645, + "objective/train/weights_max": 1.1233699321746826, + "objective/train/weights_min": 0.3683270514011383, + "theoretical_loss": 3.693079771772251, + "tokens_seen": 956825600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007172203498635853, + "loss": 0.0747, + "theoretical_loss": 3.693079771772251, + "tokens_seen": 956825600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007171401059220028, + "loss": 0.0763, + "theoretical_loss": 3.692983459739649, + "tokens_seen": 957087744 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007170598619804205, + "loss": 0.0776, + "theoretical_loss": 3.6928871814670563, + "tokens_seen": 957349888 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007169796180388381, + "loss": 0.0763, + "theoretical_loss": 3.6927909369334007, + "tokens_seen": 957612032 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007168993740972557, + "loss": 0.0725, + "theoretical_loss": 3.6926947261176277, + "tokens_seen": 957874176 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007168191301556733, + "loss": 0.0762, + "theoretical_loss": 3.692598548998702, + "tokens_seen": 958136320 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007167388862140909, + "loss": 0.0773, + "theoretical_loss": 3.692502405555606, + "tokens_seen": 958398464 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007166586422725084, + "loss": 0.077, + "theoretical_loss": 3.692406295767344, + "tokens_seen": 958660608 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007165783983309261, + "loss": 0.0753, + "theoretical_loss": 3.692310219612936, + "tokens_seen": 958922752 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007164981543893436, + "loss": 0.0743, + "theoretical_loss": 3.6922141770714214, + "tokens_seen": 959184896 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007164179104477611, + "loss": 0.0737, + "theoretical_loss": 3.6921181681218602, + "tokens_seen": 959447040 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007163376665061789, + "loss": 0.0755, + "theoretical_loss": 3.6920221927433294, + "tokens_seen": 959709184 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007162574225645964, + "loss": 0.0772, + "theoretical_loss": 3.691926250914925, + "tokens_seen": 959971328 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0015116332797333598, + "objective/train/docs_used": 352874, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4265995025634766, + "objective/train/original_loss": 1.4265992641448975, + "objective/train/theoretical_loss": 3.6918782925754936, + "objective/train/tokens_used": 980562400, + "objective/train/value_avg": -0.0121917724609375, + "objective/train/value_loss": 0.00023754734138492495, + "objective/train/value_max": -0.00017130374908447266, + "objective/train/value_min": -0.31884765625, + "objective/train/value_reward_corr": 0.9000245293805224, + "objective/train/value_std": 0.03515625, + "objective/train/weight_avg": 1.0016270875930786, + "objective/train/weighted_lm_loss": 1.4295847415924072, + "objective/train/weights_max": 1.215097427368164, + "objective/train/weights_min": 0.3761727213859558, + "theoretical_loss": 3.6918782925754936, + "tokens_seen": 960102400 + }, + { + "epoch": 0.29, + "learning_rate": 0.000716177178623014, + "loss": 0.0754, + "theoretical_loss": 3.691830342615763, + "tokens_seen": 960233472 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007160969346814316, + "loss": 0.0744, + "theoretical_loss": 3.6917344678249755, + "tokens_seen": 960495616 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007160166907398492, + "loss": 0.0734, + "theoretical_loss": 3.6916386265217156, + "tokens_seen": 960757760 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007159364467982667, + "loss": 0.0726, + "theoretical_loss": 3.6915428186851553, + "tokens_seen": 961019904 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007158562028566843, + "loss": 0.075, + "theoretical_loss": 3.6914470442944824, + "tokens_seen": 961282048 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007157759589151019, + "loss": 0.0722, + "theoretical_loss": 3.691351303328907, + "tokens_seen": 961544192 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007156957149735194, + "loss": 0.0754, + "theoretical_loss": 3.691255595767654, + "tokens_seen": 961806336 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007156154710319372, + "loss": 0.0766, + "theoretical_loss": 3.6911599215899704, + "tokens_seen": 962068480 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007155352270903547, + "loss": 0.0759, + "theoretical_loss": 3.6910642807751195, + "tokens_seen": 962330624 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007154549831487724, + "loss": 0.0749, + "theoretical_loss": 3.6909686733023843, + "tokens_seen": 962592768 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007153747392071899, + "loss": 0.0722, + "theoretical_loss": 3.690873099151065, + "tokens_seen": 962854912 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007152944952656074, + "loss": 0.0729, + "theoretical_loss": 3.690777558300482, + "tokens_seen": 963117056 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.000455680739833042, + "objective/train/docs_used": 353969, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4282293319702148, + "objective/train/original_loss": 1.428229570388794, + "objective/train/theoretical_loss": 3.690682050729972, + "objective/train/tokens_used": 983839200, + "objective/train/value_avg": -0.010162353515625, + "objective/train/value_loss": 0.0002770853752736002, + "objective/train/value_max": -0.00014543533325195312, + "objective/train/value_min": -0.2489013671875, + "objective/train/value_reward_corr": 0.6144828349212833, + "objective/train/value_std": 0.01318359375, + "objective/train/weight_avg": 1.0005788803100586, + "objective/train/weighted_lm_loss": 1.4285677671432495, + "objective/train/weights_max": 1.145114541053772, + "objective/train/weights_min": 0.37042829394340515, + "theoretical_loss": 3.690682050729972, + "tokens_seen": 963379200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007152142513240251, + "loss": 0.0734, + "theoretical_loss": 3.690682050729972, + "tokens_seen": 963379200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007151340073824426, + "loss": 0.0756, + "theoretical_loss": 3.6905865764188923, + "tokens_seen": 963641344 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007150537634408602, + "loss": 0.0722, + "theoretical_loss": 3.6904911353466177, + "tokens_seen": 963903488 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007149735194992778, + "loss": 0.0754, + "theoretical_loss": 3.690395727492541, + "tokens_seen": 964165632 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007148932755576955, + "loss": 0.0733, + "theoretical_loss": 3.690300352836074, + "tokens_seen": 964427776 + }, + { + "epoch": 0.29, + "learning_rate": 0.000714813031616113, + "loss": 0.077, + "theoretical_loss": 3.690205011356646, + "tokens_seen": 964689920 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007147327876745306, + "loss": 0.0755, + "theoretical_loss": 3.6901097030337056, + "tokens_seen": 964952064 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007146525437329482, + "loss": 0.0727, + "theoretical_loss": 3.6900144278467204, + "tokens_seen": 965214208 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007145722997913657, + "loss": 0.0755, + "theoretical_loss": 3.6899191857751736, + "tokens_seen": 965476352 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007144920558497834, + "loss": 0.0723, + "theoretical_loss": 3.6898239767985688, + "tokens_seen": 965738496 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007144118119082009, + "loss": 0.0754, + "theoretical_loss": 3.689728800896428, + "tokens_seen": 966000640 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007143315679666186, + "loss": 0.0735, + "theoretical_loss": 3.68963365804829, + "tokens_seen": 966262784 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007142513240250361, + "loss": 0.076, + "theoretical_loss": 3.689538548233713, + "tokens_seen": 966524928 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.00043955291039310396, + "objective/train/docs_used": 355190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5631569623947144, + "objective/train/original_loss": 1.5631568431854248, + "objective/train/theoretical_loss": 3.6894910057076267, + "objective/train/tokens_used": 987116000, + "objective/train/value_avg": -0.00716400146484375, + "objective/train/value_loss": 0.0002913126372732222, + "objective/train/value_max": -0.00010150671005249023, + "objective/train/value_min": -0.6953125, + "objective/train/value_reward_corr": 0.7267076806923944, + "objective/train/value_std": 0.0150299072265625, + "objective/train/weight_avg": 1.0005711317062378, + "objective/train/weighted_lm_loss": 1.5634570121765137, + "objective/train/weights_max": 1.3262733221054077, + "objective/train/weights_min": 0.3728724420070648, + "theoretical_loss": 3.6894910057076267, + "tokens_seen": 966656000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007141710800834536, + "loss": 0.0774, + "theoretical_loss": 3.6894434714322726, + "tokens_seen": 966787072 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007140908361418714, + "loss": 0.0731, + "theoretical_loss": 3.689348427623563, + "tokens_seen": 967049216 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007140105922002889, + "loss": 0.0726, + "theoretical_loss": 3.689253416787197, + "tokens_seen": 967311360 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007139303482587065, + "loss": 0.0725, + "theoretical_loss": 3.6891584389028047, + "tokens_seen": 967573504 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007138501043171241, + "loss": 0.0746, + "theoretical_loss": 3.689063493950034, + "tokens_seen": 967835648 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007137698603755417, + "loss": 0.0736, + "theoretical_loss": 3.6889685819085525, + "tokens_seen": 968097792 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007136896164339592, + "loss": 0.0744, + "theoretical_loss": 3.688873702758044, + "tokens_seen": 968359936 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007136093724923768, + "loss": 0.073, + "theoretical_loss": 3.688778856478211, + "tokens_seen": 968622080 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007135291285507944, + "loss": 0.0737, + "theoretical_loss": 3.6886840430487746, + "tokens_seen": 968884224 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007134488846092119, + "loss": 0.0726, + "theoretical_loss": 3.688589262449474, + "tokens_seen": 969146368 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007133686406676297, + "loss": 0.0754, + "theoretical_loss": 3.6884945146600643, + "tokens_seen": 969408512 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007132883967260472, + "loss": 0.0739, + "theoretical_loss": 3.6883997996603215, + "tokens_seen": 969670656 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.0019111091969534755, + "objective/train/docs_used": 356486, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4429503679275513, + "objective/train/original_loss": 1.4429501295089722, + "objective/train/theoretical_loss": 3.688305117430038, + "objective/train/tokens_used": 990392800, + "objective/train/value_avg": -0.0103302001953125, + "objective/train/value_loss": 0.0004549300647340715, + "objective/train/value_max": -8.749961853027344e-05, + "objective/train/value_min": -0.64794921875, + "objective/train/value_reward_corr": 0.6670057748618838, + "objective/train/value_std": 0.0211181640625, + "objective/train/weight_avg": 1.0021229982376099, + "objective/train/weighted_lm_loss": 1.446812391281128, + "objective/train/weights_max": 1.8136069774627686, + "objective/train/weights_min": 0.3739864230155945, + "theoretical_loss": 3.688305117430038, + "tokens_seen": 969932800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007132081527844648, + "loss": 0.0744, + "theoretical_loss": 3.688305117430038, + "tokens_seen": 969932800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007131279088428824, + "loss": 0.0744, + "theoretical_loss": 3.688210467949023, + "tokens_seen": 970194944 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007130476649013, + "loss": 0.0741, + "theoretical_loss": 3.6881158511971055, + "tokens_seen": 970457088 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007129674209597176, + "loss": 0.0746, + "theoretical_loss": 3.6880212671541326, + "tokens_seen": 970719232 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007128871770181351, + "loss": 0.0752, + "theoretical_loss": 3.687926715799967, + "tokens_seen": 970981376 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007128069330765527, + "loss": 0.0746, + "theoretical_loss": 3.687832197114491, + "tokens_seen": 971243520 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007127266891349703, + "loss": 0.0762, + "theoretical_loss": 3.687737711077605, + "tokens_seen": 971505664 + }, + { + "epoch": 0.29, + "learning_rate": 0.000712646445193388, + "loss": 0.0739, + "theoretical_loss": 3.687643257669225, + "tokens_seen": 971767808 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007125662012518055, + "loss": 0.0747, + "theoretical_loss": 3.6875488368692877, + "tokens_seen": 972029952 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007124859573102232, + "loss": 0.0736, + "theoretical_loss": 3.687454448657745, + "tokens_seen": 972292096 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007124057133686407, + "loss": 0.0741, + "theoretical_loss": 3.687360093014568, + "tokens_seen": 972554240 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007123254694270582, + "loss": 0.0744, + "theoretical_loss": 3.687265769919745, + "tokens_seen": 972816384 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007122452254854759, + "loss": 0.0753, + "theoretical_loss": 3.6871714793532826, + "tokens_seen": 973078528 + }, + { + "epoch": 0.29, + "objective/train/advantage_avg": 0.000875156547408551, + "objective/train/docs_used": 357717, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.565974235534668, + "objective/train/original_loss": 1.5659743547439575, + "objective/train/theoretical_loss": 3.6871243462619425, + "objective/train/tokens_used": 993669600, + "objective/train/value_avg": -0.00701141357421875, + "objective/train/value_loss": 0.000197466419194825, + "objective/train/value_max": -8.958578109741211e-05, + "objective/train/value_min": -0.229248046875, + "objective/train/value_reward_corr": 0.6185040343790946, + "objective/train/value_std": 0.0106201171875, + "objective/train/weight_avg": 1.0009616613388062, + "objective/train/weighted_lm_loss": 1.5670831203460693, + "objective/train/weights_max": 1.1158901453018188, + "objective/train/weights_min": 0.3683288097381592, + "theoretical_loss": 3.6871243462619425, + "tokens_seen": 973209600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007121649815438934, + "loss": 0.0768, + "theoretical_loss": 3.687077221295203, + "tokens_seen": 973340672 + }, + { + "epoch": 0.3, + "learning_rate": 0.000712084737602311, + "loss": 0.0725, + "theoretical_loss": 3.6869829957255496, + "tokens_seen": 973602816 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007120044936607286, + "loss": 0.0734, + "theoretical_loss": 3.68688880262438, + "tokens_seen": 973864960 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007119242497191462, + "loss": 0.0729, + "theoretical_loss": 3.6867946419717716, + "tokens_seen": 974127104 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007118440057775638, + "loss": 0.0726, + "theoretical_loss": 3.6867005137478177, + "tokens_seen": 974389248 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007117637618359814, + "loss": 0.0768, + "theoretical_loss": 3.686606417932631, + "tokens_seen": 974651392 + }, + { + "epoch": 0.3, + "learning_rate": 0.000711683517894399, + "loss": 0.0728, + "theoretical_loss": 3.6865123545063403, + "tokens_seen": 974913536 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007116032739528166, + "loss": 0.0768, + "theoretical_loss": 3.686418323449093, + "tokens_seen": 975175680 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007115230300112342, + "loss": 0.0765, + "theoretical_loss": 3.6863243247410526, + "tokens_seen": 975437824 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007114427860696517, + "loss": 0.0735, + "theoretical_loss": 3.686230358362401, + "tokens_seen": 975699968 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007113625421280694, + "loss": 0.0739, + "theoretical_loss": 3.686136424293338, + "tokens_seen": 975962112 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007112822981864869, + "loss": 0.074, + "theoretical_loss": 3.68604252251408, + "tokens_seen": 976224256 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.001195325399748981, + "objective/train/docs_used": 358882, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3014249801635742, + "objective/train/original_loss": 1.3014247417449951, + "objective/train/theoretical_loss": 3.6859486530048615, + "objective/train/tokens_used": 996946400, + "objective/train/value_avg": -0.00737762451171875, + "objective/train/value_loss": 0.00010647853196132928, + "objective/train/value_max": -0.00010889768600463867, + "objective/train/value_min": -0.2327880859375, + "objective/train/value_reward_corr": 0.708244856375411, + "objective/train/value_std": 0.009979248046875, + "objective/train/weight_avg": 1.0012476444244385, + "objective/train/weighted_lm_loss": 1.3023656606674194, + "objective/train/weights_max": 1.2456159591674805, + "objective/train/weights_min": 0.611983060836792, + "theoretical_loss": 3.6859486530048615, + "tokens_seen": 976486400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007112020542449044, + "loss": 0.0734, + "theoretical_loss": 3.6859486530048615, + "tokens_seen": 976486400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007111218103033222, + "loss": 0.0736, + "theoretical_loss": 3.685854815745933, + "tokens_seen": 976748544 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007110415663617397, + "loss": 0.0745, + "theoretical_loss": 3.6857610107175645, + "tokens_seen": 977010688 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007109613224201573, + "loss": 0.0771, + "theoretical_loss": 3.6856672379000415, + "tokens_seen": 977272832 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007108810784785749, + "loss": 0.0763, + "theoretical_loss": 3.6855734972736682, + "tokens_seen": 977534976 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007108008345369925, + "loss": 0.0721, + "theoretical_loss": 3.685479788818766, + "tokens_seen": 977797120 + }, + { + "epoch": 0.3, + "learning_rate": 0.00071072059059541, + "loss": 0.0757, + "theoretical_loss": 3.6853861125156717, + "tokens_seen": 978059264 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007106403466538276, + "loss": 0.0743, + "theoretical_loss": 3.6852924683447412, + "tokens_seen": 978321408 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007105601027122452, + "loss": 0.0725, + "theoretical_loss": 3.6851988562863482, + "tokens_seen": 978583552 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007104798587706628, + "loss": 0.0749, + "theoretical_loss": 3.6851052763208823, + "tokens_seen": 978845696 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007103996148290805, + "loss": 0.0719, + "theoretical_loss": 3.6850117284287505, + "tokens_seen": 979107840 + }, + { + "epoch": 0.3, + "learning_rate": 0.000710319370887498, + "loss": 0.0708, + "theoretical_loss": 3.6849182125903774, + "tokens_seen": 979369984 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007102391269459157, + "loss": 0.0766, + "theoretical_loss": 3.6848247287862046, + "tokens_seen": 979632128 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0012934327824041247, + "objective/train/docs_used": 360128, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5772310495376587, + "objective/train/original_loss": 1.5772309303283691, + "objective/train/theoretical_loss": 3.6847779988908362, + "objective/train/tokens_used": 1000223200, + "objective/train/value_avg": -0.00775909423828125, + "objective/train/value_loss": 0.0002463175624143332, + "objective/train/value_max": -0.0002065896987915039, + "objective/train/value_min": -0.6826171875, + "objective/train/value_reward_corr": 0.5791152585073471, + "objective/train/value_std": 0.01091766357421875, + "objective/train/weight_avg": 1.0014005899429321, + "objective/train/weighted_lm_loss": 1.5800875425338745, + "objective/train/weights_max": 1.209107518196106, + "objective/train/weights_min": 0.36841312050819397, + "theoretical_loss": 3.6847779988908362, + "tokens_seen": 979763200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007101588830043332, + "loss": 0.0759, + "theoretical_loss": 3.684731276996691, + "tokens_seen": 979894272 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007100786390627508, + "loss": 0.0749, + "theoretical_loss": 3.684637857202312, + "tokens_seen": 980156416 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007099983951211684, + "loss": 0.0742, + "theoretical_loss": 3.684544469383562, + "tokens_seen": 980418560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007099181511795859, + "loss": 0.0726, + "theoretical_loss": 3.6844511135209497, + "tokens_seen": 980680704 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007098379072380035, + "loss": 0.0733, + "theoretical_loss": 3.684357789595003, + "tokens_seen": 980942848 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007097576632964211, + "loss": 0.0756, + "theoretical_loss": 3.684264497586266, + "tokens_seen": 981204992 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007096774193548388, + "loss": 0.0751, + "theoretical_loss": 3.684171237475301, + "tokens_seen": 981467136 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007095971754132563, + "loss": 0.0755, + "theoretical_loss": 3.6840780092426852, + "tokens_seen": 981729280 + }, + { + "epoch": 0.3, + "learning_rate": 0.000709516931471674, + "loss": 0.0738, + "theoretical_loss": 3.6839848128690145, + "tokens_seen": 981991424 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007094366875300915, + "loss": 0.0762, + "theoretical_loss": 3.683891648334901, + "tokens_seen": 982253568 + }, + { + "epoch": 0.3, + "learning_rate": 0.000709356443588509, + "loss": 0.0751, + "theoretical_loss": 3.6837985156209743, + "tokens_seen": 982515712 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007092761996469267, + "loss": 0.0774, + "theoretical_loss": 3.683705414707881, + "tokens_seen": 982777856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7488293247094114, + "debugging/entropy-1-grams": 5.349564433264487, + "debugging/length": 462.46153846153845, + "debugging/num_segments": 13, + "debugging/raw_token_scores_avg": 0.00979544036090374, + "debugging/raw_token_scores_std": 0.022818773984909058, + "debugging/score": 0.0073770168052465215, + "debugging/score_std": 0.0054551366535427, + "epoch": 0.3, + "objective/train/advantage_avg": 0.0004159576492384076, + "objective/train/docs_used": 361383, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.380717158317566, + "objective/train/original_loss": 1.3807172775268555, + "objective/train/theoretical_loss": 3.6836123455762837, + "objective/train/tokens_used": 1003500000, + "objective/train/value_avg": -0.0102081298828125, + "objective/train/value_loss": 0.00025552461738698184, + "objective/train/value_max": -0.00016605854034423828, + "objective/train/value_min": -0.74853515625, + "objective/train/value_reward_corr": 0.7139572076837593, + "objective/train/value_std": 0.0162353515625, + "objective/train/weight_avg": 1.000531554222107, + "objective/train/weighted_lm_loss": 1.3806174993515015, + "objective/train/weights_max": 1.1966832876205444, + "objective/train/weights_min": 0.3853108584880829, + "theoretical_loss": 3.6836123455762837, + "tokens_seen": 983040000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007091959557053442, + "loss": 0.0735, + "theoretical_loss": 3.6836123455762837, + "tokens_seen": 983040000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007091157117637619, + "loss": 0.075, + "theoretical_loss": 3.683519308206863, + "tokens_seen": 983302144 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007090354678221794, + "loss": 0.0749, + "theoretical_loss": 3.683426302580316, + "tokens_seen": 983564288 + }, + { + "epoch": 0.3, + "learning_rate": 0.000708955223880597, + "loss": 0.0761, + "theoretical_loss": 3.683333328677356, + "tokens_seen": 983826432 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007088749799390147, + "loss": 0.0707, + "theoretical_loss": 3.6832403864787144, + "tokens_seen": 984088576 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007087947359974322, + "loss": 0.0749, + "theoretical_loss": 3.683147475965139, + "tokens_seen": 984350720 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007087144920558498, + "loss": 0.076, + "theoretical_loss": 3.683054597117393, + "tokens_seen": 984612864 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007086342481142674, + "loss": 0.0756, + "theoretical_loss": 3.6829617499162595, + "tokens_seen": 984875008 + }, + { + "epoch": 0.3, + "learning_rate": 0.000708554004172685, + "loss": 0.0751, + "theoretical_loss": 3.6828689343425345, + "tokens_seen": 985137152 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007084737602311025, + "loss": 0.0741, + "theoretical_loss": 3.682776150377034, + "tokens_seen": 985399296 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007083935162895202, + "loss": 0.0765, + "theoretical_loss": 3.682683398000589, + "tokens_seen": 985661440 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007083132723479377, + "loss": 0.0773, + "theoretical_loss": 3.6825906771940478, + "tokens_seen": 985923584 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007082330284063552, + "loss": 0.0738, + "theoretical_loss": 3.682497987938275, + "tokens_seen": 986185728 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.002384291496127844, + "objective/train/docs_used": 362627, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.497067928314209, + "objective/train/original_loss": 1.4970680475234985, + "objective/train/theoretical_loss": 3.682451655135952, + "objective/train/tokens_used": 1006776800, + "objective/train/value_avg": -0.007381439208984375, + "objective/train/value_loss": 0.00016254279762506485, + "objective/train/value_max": -0.0001442432403564453, + "objective/train/value_min": -0.274169921875, + "objective/train/value_reward_corr": 0.5673249772676263, + "objective/train/value_std": 0.008941650390625, + "objective/train/weight_avg": 1.00246000289917, + "objective/train/weighted_lm_loss": 1.5009002685546875, + "objective/train/weights_max": 1.1678251028060913, + "objective/train/weights_min": 0.3721363842487335, + "theoretical_loss": 3.682451655135952, + "tokens_seen": 986316800 + }, + { + "epoch": 0.3, + "learning_rate": 0.000708152784464773, + "loss": 0.0731, + "theoretical_loss": 3.682405330214153, + "tokens_seen": 986447872 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007080725405231905, + "loss": 0.0717, + "theoretical_loss": 3.682312704002579, + "tokens_seen": 986710016 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007079922965816082, + "loss": 0.0762, + "theoretical_loss": 3.6822201092844686, + "tokens_seen": 986972160 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007079120526400257, + "loss": 0.0735, + "theoretical_loss": 3.682127546040753, + "tokens_seen": 987234304 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007078318086984433, + "loss": 0.0753, + "theoretical_loss": 3.6820350142523806, + "tokens_seen": 987496448 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007077515647568609, + "loss": 0.0749, + "theoretical_loss": 3.6819425139003155, + "tokens_seen": 987758592 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007076713208152784, + "loss": 0.0763, + "theoretical_loss": 3.6818500449655396, + "tokens_seen": 988020736 + }, + { + "epoch": 0.3, + "learning_rate": 0.000707591076873696, + "loss": 0.0724, + "theoretical_loss": 3.6817576074290503, + "tokens_seen": 988282880 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007075108329321136, + "loss": 0.0724, + "theoretical_loss": 3.681665201271862, + "tokens_seen": 988545024 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007074305889905313, + "loss": 0.0774, + "theoretical_loss": 3.681572826475006, + "tokens_seen": 988807168 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007073503450489488, + "loss": 0.0752, + "theoretical_loss": 3.681480483019529, + "tokens_seen": 989069312 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007072701011073665, + "loss": 0.0755, + "theoretical_loss": 3.6813881708864953, + "tokens_seen": 989331456 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.000814878090750426, + "objective/train/docs_used": 363941, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4229824542999268, + "objective/train/original_loss": 1.4229824542999268, + "objective/train/theoretical_loss": 3.681295890056985, + "objective/train/tokens_used": 1010053600, + "objective/train/value_avg": -0.007781982421875, + "objective/train/value_loss": 0.00012201992649352178, + "objective/train/value_max": -0.0001080632209777832, + "objective/train/value_min": -0.1939697265625, + "objective/train/value_reward_corr": 0.701736020129122, + "objective/train/value_std": 0.01049041748046875, + "objective/train/weight_avg": 1.000870704650879, + "objective/train/weighted_lm_loss": 1.4249759912490845, + "objective/train/weights_max": 1.2072614431381226, + "objective/train/weights_min": 0.3682592809200287, + "theoretical_loss": 3.681295890056985, + "tokens_seen": 989593600 + }, + { + "epoch": 0.3, + "learning_rate": 0.000707189857165784, + "loss": 0.0741, + "theoretical_loss": 3.681295890056985, + "tokens_seen": 989593600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007071096132242016, + "loss": 0.0718, + "theoretical_loss": 3.681203640512095, + "tokens_seen": 989855744 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007070293692826192, + "loss": 0.076, + "theoretical_loss": 3.681111422232937, + "tokens_seen": 990117888 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007069491253410367, + "loss": 0.0765, + "theoretical_loss": 3.681019235200643, + "tokens_seen": 990380032 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007068688813994543, + "loss": 0.0747, + "theoretical_loss": 3.680927079396357, + "tokens_seen": 990642176 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007067886374578719, + "loss": 0.0736, + "theoretical_loss": 3.680834954801242, + "tokens_seen": 990904320 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007067083935162896, + "loss": 0.0737, + "theoretical_loss": 3.6807428613964763, + "tokens_seen": 991166464 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007066281495747072, + "loss": 0.0778, + "theoretical_loss": 3.6806507991632555, + "tokens_seen": 991428608 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007065479056331248, + "loss": 0.0746, + "theoretical_loss": 3.68055876808279, + "tokens_seen": 991690752 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007064676616915423, + "loss": 0.0791, + "theoretical_loss": 3.680466768136308, + "tokens_seen": 991952896 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007063874177499599, + "loss": 0.0758, + "theoretical_loss": 3.680374799305053, + "tokens_seen": 992215040 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007063071738083775, + "loss": 0.0744, + "theoretical_loss": 3.6802828615702845, + "tokens_seen": 992477184 + }, + { + "epoch": 0.3, + "learning_rate": 0.000706226929866795, + "loss": 0.0765, + "theoretical_loss": 3.6801909549132796, + "tokens_seen": 992739328 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.001581284566782415, + "objective/train/docs_used": 365105, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4962949752807617, + "objective/train/original_loss": 1.4962949752807617, + "objective/train/theoretical_loss": 3.6801450132330915, + "objective/train/tokens_used": 1013330400, + "objective/train/value_avg": -0.00997161865234375, + "objective/train/value_loss": 0.00031275878427550197, + "objective/train/value_max": -7.486343383789062e-05, + "objective/train/value_min": -0.4453125, + "objective/train/value_reward_corr": 0.7353213483699139, + "objective/train/value_std": 0.020538330078125, + "objective/train/weight_avg": 1.0017253160476685, + "objective/train/weighted_lm_loss": 1.498146414756775, + "objective/train/weights_max": 1.244823932647705, + "objective/train/weights_min": 0.4185446798801422, + "theoretical_loss": 3.6801450132330915, + "tokens_seen": 992870400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007061466859252127, + "loss": 0.0758, + "theoretical_loss": 3.6800990793153305, + "tokens_seen": 993001472 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007060664419836302, + "loss": 0.075, + "theoretical_loss": 3.6800072347577455, + "tokens_seen": 993263616 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007059861980420478, + "loss": 0.0745, + "theoretical_loss": 3.6799154212218506, + "tokens_seen": 993525760 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007059059541004655, + "loss": 0.0726, + "theoretical_loss": 3.679823638688985, + "tokens_seen": 993787904 + }, + { + "epoch": 0.3, + "learning_rate": 0.000705825710158883, + "loss": 0.0742, + "theoretical_loss": 3.679731887140508, + "tokens_seen": 994050048 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007057454662173006, + "loss": 0.0712, + "theoretical_loss": 3.6796401665577916, + "tokens_seen": 994312192 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007056652222757182, + "loss": 0.0768, + "theoretical_loss": 3.679548476922225, + "tokens_seen": 994574336 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007055849783341358, + "loss": 0.0749, + "theoretical_loss": 3.6794568182152143, + "tokens_seen": 994836480 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007055047343925533, + "loss": 0.0756, + "theoretical_loss": 3.6793651904181806, + "tokens_seen": 995098624 + }, + { + "epoch": 0.3, + "learning_rate": 0.000705424490450971, + "loss": 0.0749, + "theoretical_loss": 3.679273593512563, + "tokens_seen": 995360768 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007053442465093885, + "loss": 0.0767, + "theoretical_loss": 3.679182027479812, + "tokens_seen": 995622912 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007052640025678061, + "loss": 0.0765, + "theoretical_loss": 3.6790904923014005, + "tokens_seen": 995885056 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0009565390646457672, + "objective/train/docs_used": 366329, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5922820568084717, + "objective/train/original_loss": 1.5922820568084717, + "objective/train/theoretical_loss": 3.6789989879588125, + "objective/train/tokens_used": 1016607200, + "objective/train/value_avg": -0.010162353515625, + "objective/train/value_loss": 0.0004036914324387908, + "objective/train/value_max": -8.028745651245117e-05, + "objective/train/value_min": -0.489990234375, + "objective/train/value_reward_corr": 0.6579596576861813, + "objective/train/value_std": 0.017059326171875, + "objective/train/weight_avg": 1.0011204481124878, + "objective/train/weighted_lm_loss": 1.594104528427124, + "objective/train/weights_max": 1.1680172681808472, + "objective/train/weights_min": 0.2262655794620514, + "theoretical_loss": 3.6789989879588125, + "tokens_seen": 996147200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007051837586262238, + "loss": 0.074, + "theoretical_loss": 3.6789989879588125, + "tokens_seen": 996147200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007051035146846413, + "loss": 0.0744, + "theoretical_loss": 3.6789075144335497, + "tokens_seen": 996409344 + }, + { + "epoch": 0.3, + "learning_rate": 0.000705023270743059, + "loss": 0.0729, + "theoretical_loss": 3.6788160717071303, + "tokens_seen": 996671488 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007049430268014765, + "loss": 0.0771, + "theoretical_loss": 3.678724659761087, + "tokens_seen": 996933632 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007048627828598941, + "loss": 0.0735, + "theoretical_loss": 3.6786332785769695, + "tokens_seen": 997195776 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007047825389183117, + "loss": 0.0736, + "theoretical_loss": 3.678541928136344, + "tokens_seen": 997457920 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007047022949767292, + "loss": 0.0755, + "theoretical_loss": 3.6784506084207904, + "tokens_seen": 997720064 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007046220510351468, + "loss": 0.0779, + "theoretical_loss": 3.6783593194119066, + "tokens_seen": 997982208 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007045418070935644, + "loss": 0.0761, + "theoretical_loss": 3.6782680610913054, + "tokens_seen": 998244352 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007044615631519821, + "loss": 0.079, + "theoretical_loss": 3.6781768334406157, + "tokens_seen": 998506496 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007043813192103996, + "loss": 0.0736, + "theoretical_loss": 3.678085636441482, + "tokens_seen": 998768640 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007043010752688173, + "loss": 0.0783, + "theoretical_loss": 3.677994470075565, + "tokens_seen": 999030784 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007042208313272348, + "loss": 0.0749, + "theoretical_loss": 3.6779033343245406, + "tokens_seen": 999292928 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0017308670794591308, + "objective/train/docs_used": 367579, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6493194103240967, + "objective/train/original_loss": 1.6493196487426758, + "objective/train/theoretical_loss": 3.677857777923891, + "objective/train/tokens_used": 1019884000, + "objective/train/value_avg": -0.006999969482421875, + "objective/train/value_loss": 0.00011768531840061769, + "objective/train/value_max": -0.00017130374908447266, + "objective/train/value_min": -0.298828125, + "objective/train/value_reward_corr": 0.617122584170849, + "objective/train/value_std": 0.00861358642578125, + "objective/train/weight_avg": 1.0017890930175781, + "objective/train/weighted_lm_loss": 1.6527791023254395, + "objective/train/weights_max": 1.1782879829406738, + "objective/train/weights_min": 0.7371587753295898, + "theoretical_loss": 3.677857777923891, + "tokens_seen": 999424000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007041405873856524, + "loss": 0.0756, + "theoretical_loss": 3.677812229170101, + "tokens_seen": 999555072 + }, + { + "epoch": 0.3, + "learning_rate": 0.00070406034344407, + "loss": 0.0732, + "theoretical_loss": 3.677721154593953, + "tokens_seen": 999817216 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007039800995024875, + "loss": 0.0756, + "theoretical_loss": 3.6776301105778213, + "tokens_seen": 1000079360 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007038998555609052, + "loss": 0.076, + "theoretical_loss": 3.6775390971034447, + "tokens_seen": 1000341504 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007038196116193227, + "loss": 0.0742, + "theoretical_loss": 3.6774481141525777, + "tokens_seen": 1000603648 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007037393676777404, + "loss": 0.075, + "theoretical_loss": 3.6773571617069907, + "tokens_seen": 1000865792 + }, + { + "epoch": 0.3, + "learning_rate": 0.000703659123736158, + "loss": 0.0731, + "theoretical_loss": 3.6772662397484703, + "tokens_seen": 1001127936 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007035788797945756, + "loss": 0.0704, + "theoretical_loss": 3.6771753482588183, + "tokens_seen": 1001390080 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007034986358529931, + "loss": 0.0741, + "theoretical_loss": 3.6770844872198523, + "tokens_seen": 1001652224 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007034183919114107, + "loss": 0.0733, + "theoretical_loss": 3.6769936566134045, + "tokens_seen": 1001914368 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007033381479698283, + "loss": 0.0755, + "theoretical_loss": 3.676902856421324, + "tokens_seen": 1002176512 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007032579040282458, + "loss": 0.076, + "theoretical_loss": 3.6768120866254757, + "tokens_seen": 1002438656 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": -0.0004565780109260231, + "objective/train/docs_used": 368716, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.602346420288086, + "objective/train/original_loss": 1.6023463010787964, + "objective/train/theoretical_loss": 3.6767213472077387, + "objective/train/tokens_used": 1023160800, + "objective/train/value_avg": -0.0089874267578125, + "objective/train/value_loss": 0.0004929814604111016, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.70947265625, + "objective/train/value_reward_corr": 0.6152421290672049, + "objective/train/value_std": 0.01763916015625, + "objective/train/weight_avg": 0.9997656345367432, + "objective/train/weighted_lm_loss": 1.6013120412826538, + "objective/train/weights_max": 1.700226902961731, + "objective/train/weights_min": 0.3712657690048218, + "theoretical_loss": 3.6767213472077387, + "tokens_seen": 1002700800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007031776600866635, + "loss": 0.0745, + "theoretical_loss": 3.6767213472077387, + "tokens_seen": 1002700800 + }, + { + "epoch": 0.3, + "learning_rate": 0.000703097416145081, + "loss": 0.073, + "theoretical_loss": 3.676630638150008, + "tokens_seen": 1002962944 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007030171722034986, + "loss": 0.0722, + "theoretical_loss": 3.6765399594341943, + "tokens_seen": 1003225088 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007029369282619163, + "loss": 0.0758, + "theoretical_loss": 3.676449311042225, + "tokens_seen": 1003487232 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007028566843203338, + "loss": 0.0789, + "theoretical_loss": 3.6763586929560415, + "tokens_seen": 1003749376 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007027764403787515, + "loss": 0.0758, + "theoretical_loss": 3.6762681051576003, + "tokens_seen": 1004011520 + }, + { + "epoch": 0.3, + "learning_rate": 0.000702696196437169, + "loss": 0.0738, + "theoretical_loss": 3.6761775476288747, + "tokens_seen": 1004273664 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007026159524955866, + "loss": 0.075, + "theoretical_loss": 3.6760870203518525, + "tokens_seen": 1004535808 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007025357085540042, + "loss": 0.0715, + "theoretical_loss": 3.6759965233085383, + "tokens_seen": 1004797952 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007024554646124218, + "loss": 0.0748, + "theoretical_loss": 3.6759060564809496, + "tokens_seen": 1005060096 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007023752206708393, + "loss": 0.0748, + "theoretical_loss": 3.6758156198511216, + "tokens_seen": 1005322240 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007022949767292569, + "loss": 0.076, + "theoretical_loss": 3.675725213401104, + "tokens_seen": 1005584384 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007022147327876746, + "loss": 0.0757, + "theoretical_loss": 3.6756348371129617, + "tokens_seen": 1005846528 + }, + { + "epoch": 0.3, + "objective/train/advantage_avg": 0.0010321858571842313, + "objective/train/docs_used": 369909, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.614028811454773, + "objective/train/original_loss": 1.6140289306640625, + "objective/train/theoretical_loss": 3.6755896602739933, + "objective/train/tokens_used": 1026437600, + "objective/train/value_avg": -0.00745391845703125, + "objective/train/value_loss": 0.00014535474474541843, + "objective/train/value_max": -8.153915405273438e-05, + "objective/train/value_min": -0.311767578125, + "objective/train/value_reward_corr": 0.76924190262928, + "objective/train/value_std": 0.01385498046875, + "objective/train/weight_avg": 1.001103401184082, + "objective/train/weighted_lm_loss": 1.6157536506652832, + "objective/train/weights_max": 1.166409969329834, + "objective/train/weights_min": 0.6640326976776123, + "theoretical_loss": 3.6755896602739933, + "tokens_seen": 1005977600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007021344888460921, + "loss": 0.0776, + "theoretical_loss": 3.6755444909687744, + "tokens_seen": 1006108672 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007020542449045098, + "loss": 0.0745, + "theoretical_loss": 3.675454174950639, + "tokens_seen": 1006370816 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007019740009629273, + "loss": 0.0734, + "theoretical_loss": 3.675363889040666, + "tokens_seen": 1006632960 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007018937570213449, + "loss": 0.0748, + "theoretical_loss": 3.675273633220981, + "tokens_seen": 1006895104 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007018135130797625, + "loss": 0.0746, + "theoretical_loss": 3.6751834074737264, + "tokens_seen": 1007157248 + }, + { + "epoch": 0.31, + "learning_rate": 0.00070173326913818, + "loss": 0.0762, + "theoretical_loss": 3.675093211781059, + "tokens_seen": 1007419392 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007016530251965977, + "loss": 0.0773, + "theoretical_loss": 3.67500304612515, + "tokens_seen": 1007681536 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007015727812550152, + "loss": 0.0756, + "theoretical_loss": 3.674912910488187, + "tokens_seen": 1007943680 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007014925373134329, + "loss": 0.0759, + "theoretical_loss": 3.6748228048523726, + "tokens_seen": 1008205824 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007014122933718505, + "loss": 0.0783, + "theoretical_loss": 3.674732729199924, + "tokens_seen": 1008467968 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007013320494302681, + "loss": 0.0742, + "theoretical_loss": 3.674642683513074, + "tokens_seen": 1008730112 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007012518054886856, + "loss": 0.0765, + "theoretical_loss": 3.674552667774071, + "tokens_seen": 1008992256 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0011902566766366363, + "objective/train/docs_used": 370943, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5907751321792603, + "objective/train/original_loss": 1.5907747745513916, + "objective/train/theoretical_loss": 3.6744626819651773, + "objective/train/tokens_used": 1029714400, + "objective/train/value_avg": -0.00921630859375, + "objective/train/value_loss": 0.0002929775801021606, + "objective/train/value_max": -0.00011324882507324219, + "objective/train/value_min": -0.263427734375, + "objective/train/value_reward_corr": 0.6733462873979555, + "objective/train/value_std": 0.0144805908203125, + "objective/train/weight_avg": 1.0013235807418823, + "objective/train/weighted_lm_loss": 1.5915753841400146, + "objective/train/weights_max": 1.3013832569122314, + "objective/train/weights_min": 0.39872050285339355, + "theoretical_loss": 3.6744626819651773, + "tokens_seen": 1009254400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007011715615471032, + "loss": 0.0732, + "theoretical_loss": 3.6744626819651773, + "tokens_seen": 1009254400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007010913176055208, + "loss": 0.0748, + "theoretical_loss": 3.674372726068671, + "tokens_seen": 1009516544 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007010110736639383, + "loss": 0.0738, + "theoretical_loss": 3.6742828000668464, + "tokens_seen": 1009778688 + }, + { + "epoch": 0.31, + "learning_rate": 0.000700930829722356, + "loss": 0.0746, + "theoretical_loss": 3.6741929039420103, + "tokens_seen": 1010040832 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007008505857807735, + "loss": 0.0738, + "theoretical_loss": 3.6741030376764865, + "tokens_seen": 1010302976 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007007703418391912, + "loss": 0.0764, + "theoretical_loss": 3.674013201252614, + "tokens_seen": 1010565120 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007006900978976088, + "loss": 0.0751, + "theoretical_loss": 3.6739233946527454, + "tokens_seen": 1010827264 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007006098539560264, + "loss": 0.0728, + "theoretical_loss": 3.6738336178592492, + "tokens_seen": 1011089408 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007005296100144439, + "loss": 0.0791, + "theoretical_loss": 3.6737438708545094, + "tokens_seen": 1011351552 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007004493660728615, + "loss": 0.0751, + "theoretical_loss": 3.673654153620924, + "tokens_seen": 1011613696 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007003691221312791, + "loss": 0.0784, + "theoretical_loss": 3.673564466140906, + "tokens_seen": 1011875840 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007002888781896967, + "loss": 0.0747, + "theoretical_loss": 3.6734748083968842, + "tokens_seen": 1012137984 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007002086342481143, + "loss": 0.0759, + "theoretical_loss": 3.6733851803713016, + "tokens_seen": 1012400128 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.00044222682481631637, + "objective/train/docs_used": 372166, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5056043863296509, + "objective/train/original_loss": 1.5056045055389404, + "objective/train/theoretical_loss": 3.6733403774974427, + "objective/train/tokens_used": 1032991200, + "objective/train/value_avg": -0.01678466796875, + "objective/train/value_loss": 0.00021295166516210884, + "objective/train/value_max": -0.00011146068572998047, + "objective/train/value_min": -0.6337890625, + "objective/train/value_reward_corr": 0.9449292467966458, + "objective/train/value_std": 0.041168212890625, + "objective/train/weight_avg": 0.9996622800827026, + "objective/train/weighted_lm_loss": 1.5041799545288086, + "objective/train/weights_max": 1.2211940288543701, + "objective/train/weights_min": 0.6597907543182373, + "theoretical_loss": 3.6733403774974427, + "tokens_seen": 1012531200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007001283903065318, + "loss": 0.076, + "theoretical_loss": 3.673295582046616, + "tokens_seen": 1012662272 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007000481463649496, + "loss": 0.0755, + "theoretical_loss": 3.6732060134053013, + "tokens_seen": 1012924416 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006999679024233671, + "loss": 0.0761, + "theoretical_loss": 3.673116474429844, + "tokens_seen": 1013186560 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006998876584817846, + "loss": 0.0775, + "theoretical_loss": 3.673026965102748, + "tokens_seen": 1013448704 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006998074145402023, + "loss": 0.0739, + "theoretical_loss": 3.67293748540653, + "tokens_seen": 1013710848 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006997271705986198, + "loss": 0.0738, + "theoretical_loss": 3.672848035323723, + "tokens_seen": 1013972992 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006996469266570374, + "loss": 0.0746, + "theoretical_loss": 3.6727586148368743, + "tokens_seen": 1014235136 + }, + { + "epoch": 0.31, + "learning_rate": 0.000699566682715455, + "loss": 0.0755, + "theoretical_loss": 3.672669223928545, + "tokens_seen": 1014497280 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006994864387738726, + "loss": 0.0745, + "theoretical_loss": 3.672579862581313, + "tokens_seen": 1014759424 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006994061948322901, + "loss": 0.0746, + "theoretical_loss": 3.672490530777769, + "tokens_seen": 1015021568 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006993259508907077, + "loss": 0.0752, + "theoretical_loss": 3.6724012285005196, + "tokens_seen": 1015283712 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006992457069491254, + "loss": 0.0762, + "theoretical_loss": 3.6723119557321864, + "tokens_seen": 1015545856 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.000864784000441432, + "objective/train/docs_used": 372984, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4644211530685425, + "objective/train/original_loss": 1.4644211530685425, + "objective/train/theoretical_loss": 3.6722227124554045, + "objective/train/tokens_used": 1036268000, + "objective/train/value_avg": -0.00815582275390625, + "objective/train/value_loss": 0.0006307647563517094, + "objective/train/value_max": -0.00012433528900146484, + "objective/train/value_min": -0.7392578125, + "objective/train/value_reward_corr": 0.6358148568839063, + "objective/train/value_std": 0.01560211181640625, + "objective/train/weight_avg": 0.999402642250061, + "objective/train/weighted_lm_loss": 1.4642397165298462, + "objective/train/weights_max": 2.048352003097534, + "objective/train/weights_min": 0.39648786187171936, + "theoretical_loss": 3.6722227124554045, + "tokens_seen": 1015808000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006991654630075429, + "loss": 0.0757, + "theoretical_loss": 3.6722227124554045, + "tokens_seen": 1015808000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006990852190659606, + "loss": 0.0728, + "theoretical_loss": 3.6721334986528236, + "tokens_seen": 1016070144 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006990049751243781, + "loss": 0.0767, + "theoretical_loss": 3.6720443143071106, + "tokens_seen": 1016332288 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006989247311827958, + "loss": 0.0744, + "theoretical_loss": 3.671955159400943, + "tokens_seen": 1016594432 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006988444872412133, + "loss": 0.0764, + "theoretical_loss": 3.6718660339170173, + "tokens_seen": 1016856576 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006987642432996308, + "loss": 0.076, + "theoretical_loss": 3.6717769378380414, + "tokens_seen": 1017118720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006986839993580485, + "loss": 0.075, + "theoretical_loss": 3.671687871146739, + "tokens_seen": 1017380864 + }, + { + "epoch": 0.31, + "learning_rate": 0.000698603755416466, + "loss": 0.0732, + "theoretical_loss": 3.6715988338258487, + "tokens_seen": 1017643008 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006985235114748837, + "loss": 0.0745, + "theoretical_loss": 3.6715098258581236, + "tokens_seen": 1017905152 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006984432675333013, + "loss": 0.0762, + "theoretical_loss": 3.6714208472263303, + "tokens_seen": 1018167296 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006983630235917189, + "loss": 0.0739, + "theoretical_loss": 3.6713318979132517, + "tokens_seen": 1018429440 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006982827796501364, + "loss": 0.0755, + "theoretical_loss": 3.671242977901683, + "tokens_seen": 1018691584 + }, + { + "epoch": 0.31, + "learning_rate": 0.000698202535708554, + "loss": 0.0761, + "theoretical_loss": 3.671154087174436, + "tokens_seen": 1018953728 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0015296280616894364, + "objective/train/docs_used": 374190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4388104677200317, + "objective/train/original_loss": 1.4388103485107422, + "objective/train/theoretical_loss": 3.6711096527870657, + "objective/train/tokens_used": 1039544800, + "objective/train/value_avg": -0.005893707275390625, + "objective/train/value_loss": 0.00029690880910493433, + "objective/train/value_max": -4.297494888305664e-05, + "objective/train/value_min": -0.744140625, + "objective/train/value_reward_corr": 0.5134253858490913, + "objective/train/value_std": 0.0106964111328125, + "objective/train/weight_avg": 1.0016546249389648, + "objective/train/weighted_lm_loss": 1.4419684410095215, + "objective/train/weights_max": 1.183031678199768, + "objective/train/weights_min": 0.3685206472873688, + "theoretical_loss": 3.6711096527870657, + "tokens_seen": 1019084800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006981222917669716, + "loss": 0.0761, + "theoretical_loss": 3.6710652257143366, + "tokens_seen": 1019215872 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006980420478253891, + "loss": 0.0756, + "theoretical_loss": 3.6709763935042243, + "tokens_seen": 1019478016 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006979618038838068, + "loss": 0.0759, + "theoretical_loss": 3.670887590526953, + "tokens_seen": 1019740160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006978815599422243, + "loss": 0.0755, + "theoretical_loss": 3.6707988167653927, + "tokens_seen": 1020002304 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006978013160006421, + "loss": 0.075, + "theoretical_loss": 3.670710072202426, + "tokens_seen": 1020264448 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006977210720590596, + "loss": 0.0763, + "theoretical_loss": 3.670621356820951, + "tokens_seen": 1020526592 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006976408281174772, + "loss": 0.0737, + "theoretical_loss": 3.67053267060388, + "tokens_seen": 1020788736 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006975605841758948, + "loss": 0.0761, + "theoretical_loss": 3.6704440135341394, + "tokens_seen": 1021050880 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006974803402343123, + "loss": 0.0755, + "theoretical_loss": 3.6703553855946702, + "tokens_seen": 1021313024 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006974000962927299, + "loss": 0.0734, + "theoretical_loss": 3.6702667867684275, + "tokens_seen": 1021575168 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006973198523511475, + "loss": 0.0727, + "theoretical_loss": 3.670178217038381, + "tokens_seen": 1021837312 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006972396084095651, + "loss": 0.0739, + "theoretical_loss": 3.670089676387515, + "tokens_seen": 1022099456 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0019567436538636684, + "objective/train/docs_used": 375283, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4512615203857422, + "objective/train/original_loss": 1.451261281967163, + "objective/train/theoretical_loss": 3.6700011647988275, + "objective/train/tokens_used": 1042821600, + "objective/train/value_avg": -0.01641845703125, + "objective/train/value_loss": 0.00019754536333493888, + "objective/train/value_max": -9.5367431640625e-05, + "objective/train/value_min": -0.307861328125, + "objective/train/value_reward_corr": 0.9487556089623391, + "objective/train/value_std": 0.040863037109375, + "objective/train/weight_avg": 1.0020544528961182, + "objective/train/weighted_lm_loss": 1.4532920122146606, + "objective/train/weights_max": 1.1703647375106812, + "objective/train/weights_min": 0.7569119334220886, + "theoretical_loss": 3.6700011647988275, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006971593644679826, + "loss": 0.0772, + "theoretical_loss": 3.6700011647988275, + "tokens_seen": 1022361600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006970791205264004, + "loss": 0.0726, + "theoretical_loss": 3.6699126822553314, + "tokens_seen": 1022623744 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006969988765848179, + "loss": 0.0735, + "theoretical_loss": 3.669824228740053, + "tokens_seen": 1022885888 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006969186326432354, + "loss": 0.0772, + "theoretical_loss": 3.6697358042360344, + "tokens_seen": 1023148032 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006968383887016531, + "loss": 0.0756, + "theoretical_loss": 3.6696474087263296, + "tokens_seen": 1023410176 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006967581447600706, + "loss": 0.0774, + "theoretical_loss": 3.6695590421940096, + "tokens_seen": 1023672320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006966779008184882, + "loss": 0.0762, + "theoretical_loss": 3.6694707046221575, + "tokens_seen": 1023934464 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006965976568769058, + "loss": 0.0742, + "theoretical_loss": 3.669382395993871, + "tokens_seen": 1024196608 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006965174129353234, + "loss": 0.0754, + "theoretical_loss": 3.669294116292263, + "tokens_seen": 1024458752 + }, + { + "epoch": 0.31, + "learning_rate": 0.000696437168993741, + "loss": 0.075, + "theoretical_loss": 3.6692058655004605, + "tokens_seen": 1024720896 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006963569250521585, + "loss": 0.0766, + "theoretical_loss": 3.669117643601602, + "tokens_seen": 1024983040 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006962766811105762, + "loss": 0.0718, + "theoretical_loss": 3.6690294505788446, + "tokens_seen": 1025245184 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006961964371689938, + "loss": 0.0738, + "theoretical_loss": 3.668941286415355, + "tokens_seen": 1025507328 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.001169724389910698, + "objective/train/docs_used": 376516, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2908331155776978, + "objective/train/original_loss": 1.2908332347869873, + "objective/train/theoretical_loss": 3.6688972151505803, + "objective/train/tokens_used": 1046098400, + "objective/train/value_avg": -0.006412506103515625, + "objective/train/value_loss": 0.00014659865701105446, + "objective/train/value_max": -6.204843521118164e-05, + "objective/train/value_min": -0.332275390625, + "objective/train/value_reward_corr": 0.7111525298676997, + "objective/train/value_std": 0.012237548828125, + "objective/train/weight_avg": 1.0012383460998535, + "objective/train/weighted_lm_loss": 1.2924283742904663, + "objective/train/weights_max": 1.180111289024353, + "objective/train/weights_min": 0.3988421857357025, + "theoretical_loss": 3.6688972151505803, + "tokens_seen": 1025638400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006961161932274114, + "loss": 0.0731, + "theoretical_loss": 3.668853151094318, + "tokens_seen": 1025769472 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006960359492858289, + "loss": 0.0728, + "theoretical_loss": 3.6687650445989295, + "tokens_seen": 1026031616 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006959557053442466, + "loss": 0.0732, + "theoretical_loss": 3.6686769669124004, + "tokens_seen": 1026293760 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006958754614026641, + "loss": 0.0744, + "theoretical_loss": 3.6685889180179565, + "tokens_seen": 1026555904 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006957952174610816, + "loss": 0.0741, + "theoretical_loss": 3.668500897898837, + "tokens_seen": 1026818048 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006957149735194993, + "loss": 0.0754, + "theoretical_loss": 3.668412906538295, + "tokens_seen": 1027080192 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006956347295779168, + "loss": 0.0759, + "theoretical_loss": 3.6683249439195977, + "tokens_seen": 1027342336 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006955544856363345, + "loss": 0.0751, + "theoretical_loss": 3.668237010026026, + "tokens_seen": 1027604480 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006954742416947521, + "loss": 0.0754, + "theoretical_loss": 3.668149104840876, + "tokens_seen": 1027866624 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006953939977531697, + "loss": 0.0749, + "theoretical_loss": 3.6680612283474567, + "tokens_seen": 1028128768 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006953137538115872, + "loss": 0.0743, + "theoretical_loss": 3.667973380529091, + "tokens_seen": 1028390912 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006952335098700048, + "loss": 0.0741, + "theoretical_loss": 3.6678855613691157, + "tokens_seen": 1028653056 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0010213935747742653, + "objective/train/docs_used": 377682, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4129959344863892, + "objective/train/original_loss": 1.4129958152770996, + "objective/train/theoretical_loss": 3.667797770850883, + "objective/train/tokens_used": 1049375200, + "objective/train/value_avg": -0.007488250732421875, + "objective/train/value_loss": 0.00019148027058690786, + "objective/train/value_max": -0.0001055598258972168, + "objective/train/value_min": -0.326904296875, + "objective/train/value_reward_corr": 0.5655283244055973, + "objective/train/value_std": 0.01061248779296875, + "objective/train/weight_avg": 1.0011082887649536, + "objective/train/weighted_lm_loss": 1.4145474433898926, + "objective/train/weights_max": 1.0799387693405151, + "objective/train/weights_min": 0.3737297058105469, + "theoretical_loss": 3.667797770850883, + "tokens_seen": 1028915200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006951532659284224, + "loss": 0.0734, + "theoretical_loss": 3.667797770850883, + "tokens_seen": 1028915200 + }, + { + "epoch": 0.31, + "learning_rate": 0.00069507302198684, + "loss": 0.0748, + "theoretical_loss": 3.667710008957756, + "tokens_seen": 1029177344 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006949927780452576, + "loss": 0.0776, + "theoretical_loss": 3.667622275673115, + "tokens_seen": 1029439488 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006949125341036751, + "loss": 0.0747, + "theoretical_loss": 3.667534570980353, + "tokens_seen": 1029701632 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006948322901620929, + "loss": 0.076, + "theoretical_loss": 3.667446894862876, + "tokens_seen": 1029963776 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006947520462205104, + "loss": 0.0731, + "theoretical_loss": 3.667359247304104, + "tokens_seen": 1030225920 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006946718022789279, + "loss": 0.074, + "theoretical_loss": 3.667271628287472, + "tokens_seen": 1030488064 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006945915583373456, + "loss": 0.0754, + "theoretical_loss": 3.6671840377964275, + "tokens_seen": 1030750208 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006945113143957631, + "loss": 0.0756, + "theoretical_loss": 3.667096475814433, + "tokens_seen": 1031012352 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006944310704541807, + "loss": 0.0751, + "theoretical_loss": 3.6670089423249643, + "tokens_seen": 1031274496 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006943508265125983, + "loss": 0.0785, + "theoretical_loss": 3.6669214373115104, + "tokens_seen": 1031536640 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006942705825710159, + "loss": 0.0746, + "theoretical_loss": 3.6668339607575744, + "tokens_seen": 1031798784 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006941903386294334, + "loss": 0.0726, + "theoretical_loss": 3.6667465126466743, + "tokens_seen": 1032060928 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.0018603794742375612, + "objective/train/docs_used": 378946, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4675285816192627, + "objective/train/original_loss": 1.4675285816192627, + "objective/train/theoretical_loss": 3.6667027992522145, + "objective/train/tokens_used": 1052652000, + "objective/train/value_avg": -0.00775909423828125, + "objective/train/value_loss": 0.00012807230814360082, + "objective/train/value_max": -0.0001150369644165039, + "objective/train/value_min": -0.2120361328125, + "objective/train/value_reward_corr": 0.680010560022861, + "objective/train/value_std": 0.01026153564453125, + "objective/train/weight_avg": 1.0019227266311646, + "objective/train/weighted_lm_loss": 1.4709603786468506, + "objective/train/weights_max": 1.1424355506896973, + "objective/train/weights_min": 0.5569349527359009, + "theoretical_loss": 3.6667027992522145, + "tokens_seen": 1032192000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006941100946878512, + "loss": 0.0707, + "theoretical_loss": 3.6666590929623393, + "tokens_seen": 1032323072 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006940298507462687, + "loss": 0.0717, + "theoretical_loss": 3.666571701688115, + "tokens_seen": 1032585216 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006939496068046863, + "loss": 0.0769, + "theoretical_loss": 3.6664843388075594, + "tokens_seen": 1032847360 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006938693628631039, + "loss": 0.0707, + "theoretical_loss": 3.6663970043042435, + "tokens_seen": 1033109504 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006937891189215214, + "loss": 0.0716, + "theoretical_loss": 3.6663096981617533, + "tokens_seen": 1033371648 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006937088749799391, + "loss": 0.0717, + "theoretical_loss": 3.6662224203636886, + "tokens_seen": 1033633792 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006936286310383566, + "loss": 0.0744, + "theoretical_loss": 3.6661351708936616, + "tokens_seen": 1033895936 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006935483870967742, + "loss": 0.0694, + "theoretical_loss": 3.6660479497352982, + "tokens_seen": 1034158080 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006934681431551918, + "loss": 0.0754, + "theoretical_loss": 3.665960756872239, + "tokens_seen": 1034420224 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006933878992136093, + "loss": 0.0745, + "theoretical_loss": 3.6658735922881376, + "tokens_seen": 1034682368 + }, + { + "epoch": 0.31, + "learning_rate": 0.000693307655272027, + "loss": 0.0745, + "theoretical_loss": 3.665786455966661, + "tokens_seen": 1034944512 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006932274113304446, + "loss": 0.0747, + "theoretical_loss": 3.6656993478914903, + "tokens_seen": 1035206656 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": 0.00034525172668509185, + "objective/train/docs_used": 379908, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.507829189300537, + "objective/train/original_loss": 1.507829189300537, + "objective/train/theoretical_loss": 3.6656122680463197, + "objective/train/tokens_used": 1055928800, + "objective/train/value_avg": -0.00595855712890625, + "objective/train/value_loss": 9.688719001132995e-05, + "objective/train/value_max": -0.00011682510375976562, + "objective/train/value_min": -0.22119140625, + "objective/train/value_reward_corr": 0.7160243405342503, + "objective/train/value_std": 0.00960540771484375, + "objective/train/weight_avg": 1.000393033027649, + "objective/train/weighted_lm_loss": 1.5080686807632446, + "objective/train/weights_max": 1.0766479969024658, + "objective/train/weights_min": 0.820656955242157, + "theoretical_loss": 3.6656122680463197, + "tokens_seen": 1035468800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006931471673888622, + "loss": 0.0755, + "theoretical_loss": 3.6656122680463197, + "tokens_seen": 1035468800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006930669234472797, + "loss": 0.0731, + "theoretical_loss": 3.6655252164148564, + "tokens_seen": 1035730944 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006929866795056974, + "loss": 0.0761, + "theoretical_loss": 3.6654381929808233, + "tokens_seen": 1035993088 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006929064355641149, + "loss": 0.0735, + "theoretical_loss": 3.6653511977279534, + "tokens_seen": 1036255232 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006928261916225324, + "loss": 0.0746, + "theoretical_loss": 3.6652642306399965, + "tokens_seen": 1036517376 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006927459476809501, + "loss": 0.075, + "theoretical_loss": 3.6651772917007137, + "tokens_seen": 1036779520 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006926657037393676, + "loss": 0.0756, + "theoretical_loss": 3.6650903808938806, + "tokens_seen": 1037041664 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006925854597977854, + "loss": 0.0752, + "theoretical_loss": 3.6650034982032857, + "tokens_seen": 1037303808 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006925052158562029, + "loss": 0.0735, + "theoretical_loss": 3.664916643612732, + "tokens_seen": 1037565952 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006924249719146205, + "loss": 0.0729, + "theoretical_loss": 3.6648298171060345, + "tokens_seen": 1037828096 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006923447279730381, + "loss": 0.0729, + "theoretical_loss": 3.6647430186670222, + "tokens_seen": 1038090240 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006922644840314556, + "loss": 0.0733, + "theoretical_loss": 3.6646562482795373, + "tokens_seen": 1038352384 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006921842400898732, + "loss": 0.0722, + "theoretical_loss": 3.664569505927436, + "tokens_seen": 1038614528 + }, + { + "epoch": 0.31, + "objective/train/advantage_avg": -0.00035626679891720414, + "objective/train/docs_used": 381080, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3043032884597778, + "objective/train/original_loss": 1.3043031692504883, + "objective/train/theoretical_loss": 3.6645261452596136, + "objective/train/tokens_used": 1059205600, + "objective/train/value_avg": -0.00853729248046875, + "objective/train/value_loss": 0.00025545063544996083, + "objective/train/value_max": -7.724761962890625e-05, + "objective/train/value_min": -0.296142578125, + "objective/train/value_reward_corr": 0.7234297240140802, + "objective/train/value_std": 0.01416015625, + "objective/train/weight_avg": 0.9997609257698059, + "objective/train/weighted_lm_loss": 1.3044672012329102, + "objective/train/weights_max": 1.3083919286727905, + "objective/train/weights_min": 0.3739407956600189, + "theoretical_loss": 3.6645261452596136, + "tokens_seen": 1038745600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006921039961482908, + "loss": 0.0737, + "theoretical_loss": 3.664482791594588, + "tokens_seen": 1038876672 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006920237522067084, + "loss": 0.0738, + "theoretical_loss": 3.664396105264875, + "tokens_seen": 1039138816 + }, + { + "epoch": 0.31, + "learning_rate": 0.0006919435082651259, + "loss": 0.0736, + "theoretical_loss": 3.6643094469221933, + "tokens_seen": 1039400960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006918632643235437, + "loss": 0.0734, + "theoretical_loss": 3.664222816550452, + "tokens_seen": 1039663104 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006917830203819612, + "loss": 0.0722, + "theoretical_loss": 3.6641362141335727, + "tokens_seen": 1039925248 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006917027764403787, + "loss": 0.0733, + "theoretical_loss": 3.6640496396554925, + "tokens_seen": 1040187392 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006916225324987964, + "loss": 0.0753, + "theoretical_loss": 3.6639630931001594, + "tokens_seen": 1040449536 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006915422885572139, + "loss": 0.0737, + "theoretical_loss": 3.6638765744515367, + "tokens_seen": 1040711680 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006914620446156316, + "loss": 0.0761, + "theoretical_loss": 3.6637900836935993, + "tokens_seen": 1040973824 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006913818006740491, + "loss": 0.0736, + "theoretical_loss": 3.6637036208103364, + "tokens_seen": 1041235968 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006913015567324667, + "loss": 0.0712, + "theoretical_loss": 3.663617185785749, + "tokens_seen": 1041498112 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006912213127908843, + "loss": 0.0725, + "theoretical_loss": 3.6635307786038536, + "tokens_seen": 1041760256 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0007461439236067235, + "objective/train/docs_used": 382133, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4432910680770874, + "objective/train/original_loss": 1.4432913064956665, + "objective/train/theoretical_loss": 3.663444399248678, + "objective/train/tokens_used": 1062482400, + "objective/train/value_avg": -0.006084442138671875, + "objective/train/value_loss": 0.00022422554320655763, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.318603515625, + "objective/train/value_reward_corr": 0.5543261672765586, + "objective/train/value_std": 0.00926971435546875, + "objective/train/weight_avg": 1.0008444786071777, + "objective/train/weighted_lm_loss": 1.443787693977356, + "objective/train/weights_max": 1.2419401407241821, + "objective/train/weights_min": 0.3867599070072174, + "theoretical_loss": 3.663444399248678, + "tokens_seen": 1042022400 + }, + { + "epoch": 0.32, + "learning_rate": 0.000691141068849302, + "loss": 0.072, + "theoretical_loss": 3.663444399248678, + "tokens_seen": 1042022400 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006910608249077195, + "loss": 0.0754, + "theoretical_loss": 3.6633580477042633, + "tokens_seen": 1042284544 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006909805809661371, + "loss": 0.0734, + "theoretical_loss": 3.663271723954665, + "tokens_seen": 1042546688 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006909003370245547, + "loss": 0.074, + "theoretical_loss": 3.6631854279839513, + "tokens_seen": 1042808832 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006908200930829722, + "loss": 0.0703, + "theoretical_loss": 3.6630991597762024, + "tokens_seen": 1043070976 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006907398491413899, + "loss": 0.0744, + "theoretical_loss": 3.6630129193155128, + "tokens_seen": 1043333120 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006906596051998074, + "loss": 0.0735, + "theoretical_loss": 3.6629267065859894, + "tokens_seen": 1043595264 + }, + { + "epoch": 0.32, + "learning_rate": 0.000690579361258225, + "loss": 0.0756, + "theoretical_loss": 3.662840521571753, + "tokens_seen": 1043857408 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006904991173166426, + "loss": 0.0741, + "theoretical_loss": 3.662754364256937, + "tokens_seen": 1044119552 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006904188733750601, + "loss": 0.0773, + "theoretical_loss": 3.662668234625688, + "tokens_seen": 1044381696 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006903386294334778, + "loss": 0.072, + "theoretical_loss": 3.6625821326621653, + "tokens_seen": 1044643840 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006902583854918954, + "loss": 0.0761, + "theoretical_loss": 3.6624960583505404, + "tokens_seen": 1044905984 + }, + { + "epoch": 0.32, + "learning_rate": 0.000690178141550313, + "loss": 0.0717, + "theoretical_loss": 3.662410011675001, + "tokens_seen": 1045168128 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0007527482812292874, + "objective/train/docs_used": 383342, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4821735620498657, + "objective/train/original_loss": 1.4821735620498657, + "objective/train/theoretical_loss": 3.6623669986958247, + "objective/train/tokens_used": 1065759200, + "objective/train/value_avg": -0.008392333984375, + "objective/train/value_loss": 0.00048586874618195, + "objective/train/value_max": -0.00010722875595092773, + "objective/train/value_min": -0.705078125, + "objective/train/value_reward_corr": 0.709452779874576, + "objective/train/value_std": 0.0186614990234375, + "objective/train/weight_avg": 1.000962257385254, + "objective/train/weighted_lm_loss": 1.482825517654419, + "objective/train/weights_max": 1.404705286026001, + "objective/train/weights_min": 0.36978310346603394, + "theoretical_loss": 3.6623669986958247, + "tokens_seen": 1045299200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006900978976087306, + "loss": 0.0748, + "theoretical_loss": 3.6623239926197444, + "tokens_seen": 1045430272 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006900176536671482, + "loss": 0.0741, + "theoretical_loss": 3.6622380011689826, + "tokens_seen": 1045692416 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006899374097255657, + "loss": 0.0736, + "theoretical_loss": 3.66215203730694, + "tokens_seen": 1045954560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006898571657839833, + "loss": 0.0728, + "theoretical_loss": 3.6620661010178543, + "tokens_seen": 1046216704 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006897769218424009, + "loss": 0.0738, + "theoretical_loss": 3.6619801922859763, + "tokens_seen": 1046478848 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006896966779008184, + "loss": 0.0728, + "theoretical_loss": 3.661894311095568, + "tokens_seen": 1046740992 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006896164339592362, + "loss": 0.0752, + "theoretical_loss": 3.6618084574309075, + "tokens_seen": 1047003136 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006895361900176537, + "loss": 0.0727, + "theoretical_loss": 3.6617226312762834, + "tokens_seen": 1047265280 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006894559460760713, + "loss": 0.0724, + "theoretical_loss": 3.6616368326159976, + "tokens_seen": 1047527424 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006893757021344889, + "loss": 0.0748, + "theoretical_loss": 3.6615510614343654, + "tokens_seen": 1047789568 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006892954581929064, + "loss": 0.0714, + "theoretical_loss": 3.661465317715715, + "tokens_seen": 1048051712 + }, + { + "epoch": 0.32, + "learning_rate": 0.000689215214251324, + "loss": 0.073, + "theoretical_loss": 3.6613796014443865, + "tokens_seen": 1048313856 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0009373921202495694, + "objective/train/docs_used": 384505, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5765043497085571, + "objective/train/original_loss": 1.5765044689178467, + "objective/train/theoretical_loss": 3.661293912604734, + "objective/train/tokens_used": 1069036000, + "objective/train/value_avg": -0.007293701171875, + "objective/train/value_loss": 0.00017708793166093528, + "objective/train/value_max": -5.561113357543945e-05, + "objective/train/value_min": -0.2237548828125, + "objective/train/value_reward_corr": 0.6360596224023429, + "objective/train/value_std": 0.0101165771484375, + "objective/train/weight_avg": 1.001020908355713, + "objective/train/weighted_lm_loss": 1.5784999132156372, + "objective/train/weights_max": 1.112996220588684, + "objective/train/weights_min": 0.3697379231452942, + "theoretical_loss": 3.661293912604734, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006891349703097416, + "loss": 0.0727, + "theoretical_loss": 3.661293912604734, + "tokens_seen": 1048576000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006890547263681592, + "loss": 0.0737, + "theoretical_loss": 3.661208251181124, + "tokens_seen": 1048838144 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006889744824265767, + "loss": 0.0719, + "theoretical_loss": 3.6611226171579356, + "tokens_seen": 1049100288 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006888942384849945, + "loss": 0.0739, + "theoretical_loss": 3.6610370105195607, + "tokens_seen": 1049362432 + }, + { + "epoch": 0.32, + "learning_rate": 0.000688813994543412, + "loss": 0.0753, + "theoretical_loss": 3.660951431250405, + "tokens_seen": 1049624576 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006887337506018296, + "loss": 0.0712, + "theoretical_loss": 3.6608658793348847, + "tokens_seen": 1049886720 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006886535066602472, + "loss": 0.074, + "theoretical_loss": 3.6607803547574314, + "tokens_seen": 1050148864 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006885732627186647, + "loss": 0.0755, + "theoretical_loss": 3.660694857502487, + "tokens_seen": 1050411008 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006884930187770824, + "loss": 0.073, + "theoretical_loss": 3.660609387554509, + "tokens_seen": 1050673152 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006884127748354999, + "loss": 0.0763, + "theoretical_loss": 3.6605239448979647, + "tokens_seen": 1050935296 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006883325308939175, + "loss": 0.0751, + "theoretical_loss": 3.660438529517336, + "tokens_seen": 1051197440 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006882522869523351, + "loss": 0.0762, + "theoretical_loss": 3.660353141397116, + "tokens_seen": 1051459584 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006881720430107528, + "loss": 0.0766, + "theoretical_loss": 3.6602677805218127, + "tokens_seen": 1051721728 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.00259937415830791, + "objective/train/docs_used": 385698, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4460582733154297, + "objective/train/original_loss": 1.4460582733154297, + "objective/train/theoretical_loss": 3.660225110296166, + "objective/train/tokens_used": 1072312800, + "objective/train/value_avg": -0.0083160400390625, + "objective/train/value_loss": 0.0003496213466860354, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.708984375, + "objective/train/value_reward_corr": 0.6992441106554197, + "objective/train/value_std": 0.0217742919921875, + "objective/train/weight_avg": 1.0027629137039185, + "objective/train/weighted_lm_loss": 1.450356125831604, + "objective/train/weights_max": 1.4854079484939575, + "objective/train/weights_min": 0.3811357617378235, + "theoretical_loss": 3.660225110296166, + "tokens_seen": 1051852800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006880917990691703, + "loss": 0.0753, + "theoretical_loss": 3.660182446875944, + "tokens_seen": 1051983872 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006880115551275879, + "loss": 0.0699, + "theoretical_loss": 3.6600971404440434, + "tokens_seen": 1052246016 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006879313111860055, + "loss": 0.0714, + "theoretical_loss": 3.660011861210654, + "tokens_seen": 1052508160 + }, + { + "epoch": 0.32, + "learning_rate": 0.000687851067244423, + "loss": 0.0713, + "theoretical_loss": 3.659926609160334, + "tokens_seen": 1052770304 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006877708233028407, + "loss": 0.0699, + "theoretical_loss": 3.6598413842776534, + "tokens_seen": 1053032448 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006876905793612582, + "loss": 0.073, + "theoretical_loss": 3.6597561865471935, + "tokens_seen": 1053294592 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006876103354196759, + "loss": 0.0773, + "theoretical_loss": 3.6596710159535504, + "tokens_seen": 1053556736 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006875300914780934, + "loss": 0.0752, + "theoretical_loss": 3.659585872481331, + "tokens_seen": 1053818880 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006874498475365109, + "loss": 0.075, + "theoretical_loss": 3.659500756115156, + "tokens_seen": 1054081024 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006873696035949287, + "loss": 0.0736, + "theoretical_loss": 3.659415666839658, + "tokens_seen": 1054343168 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006872893596533462, + "loss": 0.0723, + "theoretical_loss": 3.6593306046394813, + "tokens_seen": 1054605312 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006872091157117638, + "loss": 0.0748, + "theoretical_loss": 3.6592455694992854, + "tokens_seen": 1054867456 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0018493332900106907, + "objective/train/docs_used": 386814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4244455099105835, + "objective/train/original_loss": 1.424445390701294, + "objective/train/theoretical_loss": 3.659160561403739, + "objective/train/tokens_used": 1075589600, + "objective/train/value_avg": -0.00870513916015625, + "objective/train/value_loss": 0.00016267823230009526, + "objective/train/value_max": -0.00012636184692382812, + "objective/train/value_min": -0.32421875, + "objective/train/value_reward_corr": 0.6824163394532532, + "objective/train/value_std": 0.01215362548828125, + "objective/train/weight_avg": 1.0019264221191406, + "objective/train/weighted_lm_loss": 1.426944375038147, + "objective/train/weights_max": 1.2696858644485474, + "objective/train/weights_min": 0.3791867196559906, + "theoretical_loss": 3.659160561403739, + "tokens_seen": 1055129600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006871288717701814, + "loss": 0.0698, + "theoretical_loss": 3.659160561403739, + "tokens_seen": 1055129600 + }, + { + "epoch": 0.32, + "learning_rate": 0.000687048627828599, + "loss": 0.0717, + "theoretical_loss": 3.6590755803375252, + "tokens_seen": 1055391744 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006869683838870165, + "loss": 0.0734, + "theoretical_loss": 3.65899062628534, + "tokens_seen": 1055653888 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006868881399454341, + "loss": 0.0726, + "theoretical_loss": 3.65890569923189, + "tokens_seen": 1055916032 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006868078960038517, + "loss": 0.0742, + "theoretical_loss": 3.658820799161896, + "tokens_seen": 1056178176 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006867276520622692, + "loss": 0.0748, + "theoretical_loss": 3.65873592606009, + "tokens_seen": 1056440320 + }, + { + "epoch": 0.32, + "learning_rate": 0.000686647408120687, + "loss": 0.0707, + "theoretical_loss": 3.658651079911218, + "tokens_seen": 1056702464 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006865671641791045, + "loss": 0.0736, + "theoretical_loss": 3.658566260700036, + "tokens_seen": 1056964608 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006864869202375221, + "loss": 0.0734, + "theoretical_loss": 3.658481468411315, + "tokens_seen": 1057226752 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006864066762959397, + "loss": 0.0714, + "theoretical_loss": 3.6583967030298368, + "tokens_seen": 1057488896 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006863264323543572, + "loss": 0.0745, + "theoretical_loss": 3.6583119645403954, + "tokens_seen": 1057751040 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006862461884127749, + "loss": 0.071, + "theoretical_loss": 3.658227252927799, + "tokens_seen": 1058013184 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006861659444711924, + "loss": 0.0704, + "theoretical_loss": 3.6581425681768653, + "tokens_seen": 1058275328 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0014182066079229116, + "objective/train/docs_used": 388023, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3999180793762207, + "objective/train/original_loss": 1.3999180793762207, + "objective/train/theoretical_loss": 3.6581002358697816, + "objective/train/tokens_used": 1078866400, + "objective/train/value_avg": -0.0078582763671875, + "objective/train/value_loss": 0.00017249694792553782, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.32958984375, + "objective/train/value_reward_corr": 0.6715197510150517, + "objective/train/value_std": 0.01299285888671875, + "objective/train/weight_avg": 1.001496434211731, + "objective/train/weighted_lm_loss": 1.4018974304199219, + "objective/train/weights_max": 1.1797270774841309, + "objective/train/weights_min": 0.3719121813774109, + "theoretical_loss": 3.6581002358697816, + "tokens_seen": 1058406400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00068608570052961, + "loss": 0.0719, + "theoretical_loss": 3.6580579102724267, + "tokens_seen": 1058537472 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006860054565880276, + "loss": 0.0734, + "theoretical_loss": 3.657973279199327, + "tokens_seen": 1058799616 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006859252126464453, + "loss": 0.0735, + "theoretical_loss": 3.6578886749424226, + "tokens_seen": 1059061760 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006858449687048628, + "loss": 0.077, + "theoretical_loss": 3.657804097486581, + "tokens_seen": 1059323904 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006857647247632804, + "loss": 0.0704, + "theoretical_loss": 3.657719546816685, + "tokens_seen": 1059586048 + }, + { + "epoch": 0.32, + "learning_rate": 0.000685684480821698, + "loss": 0.071, + "theoretical_loss": 3.657635022917626, + "tokens_seen": 1059848192 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006856042368801155, + "loss": 0.0714, + "theoretical_loss": 3.65755052577431, + "tokens_seen": 1060110336 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006855239929385332, + "loss": 0.071, + "theoretical_loss": 3.657466055371654, + "tokens_seen": 1060372480 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006854437489969507, + "loss": 0.0722, + "theoretical_loss": 3.657381611694588, + "tokens_seen": 1060634624 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006853635050553683, + "loss": 0.073, + "theoretical_loss": 3.6572971947280544, + "tokens_seen": 1060896768 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006852832611137859, + "loss": 0.0763, + "theoretical_loss": 3.6572128044570067, + "tokens_seen": 1061158912 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006852030171722034, + "loss": 0.0725, + "theoretical_loss": 3.657128440866412, + "tokens_seen": 1061421056 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0011737276799976826, + "objective/train/docs_used": 389244, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5271493196487427, + "objective/train/original_loss": 1.5271493196487427, + "objective/train/theoretical_loss": 3.6570441039412485, + "objective/train/tokens_used": 1082143200, + "objective/train/value_avg": -0.00862884521484375, + "objective/train/value_loss": 0.0003618821792770177, + "objective/train/value_max": -0.00017261505126953125, + "objective/train/value_min": -0.403076171875, + "objective/train/value_reward_corr": 0.6260921546392633, + "objective/train/value_std": 0.01381683349609375, + "objective/train/weight_avg": 1.0013278722763062, + "objective/train/weighted_lm_loss": 1.529421329498291, + "objective/train/weights_max": 1.1920899152755737, + "objective/train/weights_min": 0.37519246339797974, + "theoretical_loss": 3.6570441039412485, + "tokens_seen": 1061683200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006851227732306211, + "loss": 0.0731, + "theoretical_loss": 3.6570441039412485, + "tokens_seen": 1061683200 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006850425292890387, + "loss": 0.0734, + "theoretical_loss": 3.6569597936665064, + "tokens_seen": 1061945344 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006849622853474563, + "loss": 0.0754, + "theoretical_loss": 3.6568755100271897, + "tokens_seen": 1062207488 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006848820414058739, + "loss": 0.0727, + "theoretical_loss": 3.656791253008313, + "tokens_seen": 1062469632 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006848017974642915, + "loss": 0.0737, + "theoretical_loss": 3.6567070225949028, + "tokens_seen": 1062731776 + }, + { + "epoch": 0.32, + "learning_rate": 0.000684721553522709, + "loss": 0.0727, + "theoretical_loss": 3.656622818771999, + "tokens_seen": 1062993920 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006846413095811267, + "loss": 0.0711, + "theoretical_loss": 3.6565386415246524, + "tokens_seen": 1063256064 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006845610656395442, + "loss": 0.0719, + "theoretical_loss": 3.6564544908379273, + "tokens_seen": 1063518208 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006844808216979617, + "loss": 0.0724, + "theoretical_loss": 3.6563703666968985, + "tokens_seen": 1063780352 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006844005777563795, + "loss": 0.0709, + "theoretical_loss": 3.656286269086653, + "tokens_seen": 1064042496 + }, + { + "epoch": 0.32, + "learning_rate": 0.000684320333814797, + "loss": 0.0733, + "theoretical_loss": 3.6562021979922923, + "tokens_seen": 1064304640 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006842400898732146, + "loss": 0.0755, + "theoretical_loss": 3.6561181533989267, + "tokens_seen": 1064566784 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006841598459316322, + "loss": 0.0731, + "theoretical_loss": 3.6560341352916796, + "tokens_seen": 1064828928 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.001128622330725193, + "objective/train/docs_used": 390462, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3764036893844604, + "objective/train/original_loss": 1.376403570175171, + "objective/train/theoretical_loss": 3.6559921361657057, + "objective/train/tokens_used": 1085420000, + "objective/train/value_avg": -0.01018524169921875, + "objective/train/value_loss": 0.00021231910795904696, + "objective/train/value_max": -6.35385513305664e-05, + "objective/train/value_min": -0.358642578125, + "objective/train/value_reward_corr": 0.7759159261494163, + "objective/train/value_std": 0.0171661376953125, + "objective/train/weight_avg": 1.0012286901474, + "objective/train/weighted_lm_loss": 1.3785223960876465, + "objective/train/weights_max": 1.2096216678619385, + "objective/train/weights_min": 0.37203988432884216, + "theoretical_loss": 3.6559921361657057, + "tokens_seen": 1064960000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006840796019900498, + "loss": 0.073, + "theoretical_loss": 3.655950143655688, + "tokens_seen": 1065091072 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006839993580484673, + "loss": 0.0734, + "theoretical_loss": 3.655866178476098, + "tokens_seen": 1065353216 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006839191141068849, + "loss": 0.0728, + "theoretical_loss": 3.65578223973807, + "tokens_seen": 1065615360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006838388701653025, + "loss": 0.077, + "theoretical_loss": 3.6556983274267765, + "tokens_seen": 1065877504 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006837586262237201, + "loss": 0.0731, + "theoretical_loss": 3.6556144415273994, + "tokens_seen": 1066139648 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006836783822821378, + "loss": 0.0734, + "theoretical_loss": 3.655530582025136, + "tokens_seen": 1066401792 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006835981383405553, + "loss": 0.0718, + "theoretical_loss": 3.6554467489051925, + "tokens_seen": 1066663936 + }, + { + "epoch": 0.32, + "learning_rate": 0.000683517894398973, + "loss": 0.0745, + "theoretical_loss": 3.6553629421527885, + "tokens_seen": 1066926080 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006834376504573905, + "loss": 0.0743, + "theoretical_loss": 3.655279161753156, + "tokens_seen": 1067188224 + }, + { + "epoch": 0.32, + "learning_rate": 0.000683357406515808, + "loss": 0.0712, + "theoretical_loss": 3.6551954076915374, + "tokens_seen": 1067450368 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006832771625742257, + "loss": 0.0727, + "theoretical_loss": 3.655111679953188, + "tokens_seen": 1067712512 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006831969186326432, + "loss": 0.0737, + "theoretical_loss": 3.6550279785233757, + "tokens_seen": 1067974656 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0003532489645294845, + "objective/train/docs_used": 391774, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5375131368637085, + "objective/train/original_loss": 1.5375131368637085, + "objective/train/theoretical_loss": 3.654944303387378, + "objective/train/tokens_used": 1088696800, + "objective/train/value_avg": -0.00994873046875, + "objective/train/value_loss": 0.00020713380945380777, + "objective/train/value_max": -7.724761962890625e-05, + "objective/train/value_min": -0.334228515625, + "objective/train/value_reward_corr": 0.7750881540426269, + "objective/train/value_std": 0.0160369873046875, + "objective/train/weight_avg": 1.000451683998108, + "objective/train/weighted_lm_loss": 1.5384892225265503, + "objective/train/weights_max": 1.286349892616272, + "objective/train/weights_min": 0.3698126971721649, + "theoretical_loss": 3.654944303387378, + "tokens_seen": 1068236800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006831166746910608, + "loss": 0.0735, + "theoretical_loss": 3.654944303387378, + "tokens_seen": 1068236800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006830364307494784, + "loss": 0.073, + "theoretical_loss": 3.654860654530486, + "tokens_seen": 1068498944 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006829561868078961, + "loss": 0.0742, + "theoretical_loss": 3.6547770319380026, + "tokens_seen": 1068761088 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006828759428663136, + "loss": 0.0712, + "theoretical_loss": 3.6546934355952425, + "tokens_seen": 1069023232 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006827956989247312, + "loss": 0.0743, + "theoretical_loss": 3.6546098654875303, + "tokens_seen": 1069285376 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006827154549831488, + "loss": 0.0743, + "theoretical_loss": 3.654526321600205, + "tokens_seen": 1069547520 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006826352110415663, + "loss": 0.0728, + "theoretical_loss": 3.6544428039186165, + "tokens_seen": 1069809664 + }, + { + "epoch": 0.32, + "learning_rate": 0.000682554967099984, + "loss": 0.0716, + "theoretical_loss": 3.6543593124281264, + "tokens_seen": 1070071808 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006824747231584015, + "loss": 0.074, + "theoretical_loss": 3.654275847114107, + "tokens_seen": 1070333952 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006823944792168192, + "loss": 0.0762, + "theoretical_loss": 3.6541924079619443, + "tokens_seen": 1070596096 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006823142352752367, + "loss": 0.0741, + "theoretical_loss": 3.654108994957034, + "tokens_seen": 1070858240 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006822339913336542, + "loss": 0.0751, + "theoretical_loss": 3.654025608084786, + "tokens_seen": 1071120384 + }, + { + "epoch": 0.32, + "learning_rate": 0.000682153747392072, + "loss": 0.0744, + "theoretical_loss": 3.653942247330619, + "tokens_seen": 1071382528 + }, + { + "epoch": 0.32, + "objective/train/advantage_avg": 0.0008207072969526052, + "objective/train/docs_used": 393037, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5273302793502808, + "objective/train/original_loss": 1.5273303985595703, + "objective/train/theoretical_loss": 3.6539005767432635, + "objective/train/tokens_used": 1091973600, + "objective/train/value_avg": -0.0079193115234375, + "objective/train/value_loss": 0.00018726506095845252, + "objective/train/value_max": -4.792213439941406e-05, + "objective/train/value_min": -0.328125, + "objective/train/value_reward_corr": 0.6683278593111599, + "objective/train/value_std": 0.0133209228515625, + "objective/train/weight_avg": 1.0009064674377441, + "objective/train/weighted_lm_loss": 1.5285539627075195, + "objective/train/weights_max": 1.301619052886963, + "objective/train/weights_min": 0.3697463870048523, + "theoretical_loss": 3.6539005767432635, + "tokens_seen": 1071513600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006820735034504895, + "loss": 0.0727, + "theoretical_loss": 3.653858912679966, + "tokens_seen": 1071644672 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006819932595089071, + "loss": 0.0737, + "theoretical_loss": 3.6537756041182696, + "tokens_seen": 1071906816 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006819130155673247, + "loss": 0.0739, + "theoretical_loss": 3.6536923216309862, + "tokens_seen": 1072168960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0006818327716257423, + "loss": 0.0744, + "theoretical_loss": 3.653609065203582, + "tokens_seen": 1072431104 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006817525276841598, + "loss": 0.0724, + "theoretical_loss": 3.6535258348215356, + "tokens_seen": 1072693248 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006816722837425775, + "loss": 0.0748, + "theoretical_loss": 3.653442630470337, + "tokens_seen": 1072955392 + }, + { + "epoch": 0.33, + "learning_rate": 0.000681592039800995, + "loss": 0.074, + "theoretical_loss": 3.653359452135488, + "tokens_seen": 1073217536 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006815117958594125, + "loss": 0.0737, + "theoretical_loss": 3.653276299802503, + "tokens_seen": 1073479680 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006814315519178303, + "loss": 0.0735, + "theoretical_loss": 3.6531931734569056, + "tokens_seen": 1073741824 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006813513079762478, + "loss": 0.0724, + "theoretical_loss": 3.6531100730842336, + "tokens_seen": 1074003968 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006812710640346655, + "loss": 0.0747, + "theoretical_loss": 3.653026998670035, + "tokens_seen": 1074266112 + }, + { + "epoch": 0.33, + "learning_rate": 0.000681190820093083, + "loss": 0.0734, + "theoretical_loss": 3.652943950199869, + "tokens_seen": 1074528256 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0013173749903216958, + "objective/train/docs_used": 394219, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3774545192718506, + "objective/train/original_loss": 1.3774546384811401, + "objective/train/theoretical_loss": 3.652860927659307, + "objective/train/tokens_used": 1095250400, + "objective/train/value_avg": -0.009002685546875, + "objective/train/value_loss": 0.000231547121074982, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.359130859375, + "objective/train/value_reward_corr": 0.6883898461921806, + "objective/train/value_std": 0.01419830322265625, + "objective/train/weight_avg": 1.0014238357543945, + "objective/train/weighted_lm_loss": 1.3791875839233398, + "objective/train/weights_max": 1.1214268207550049, + "objective/train/weights_min": 0.3686865568161011, + "theoretical_loss": 3.652860927659307, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006811105761515006, + "loss": 0.0735, + "theoretical_loss": 3.652860927659307, + "tokens_seen": 1074790400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006810303322099182, + "loss": 0.0728, + "theoretical_loss": 3.6527779310339326, + "tokens_seen": 1075052544 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006809500882683357, + "loss": 0.0741, + "theoretical_loss": 3.652694960309339, + "tokens_seen": 1075314688 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006808698443267533, + "loss": 0.075, + "theoretical_loss": 3.6526120154711332, + "tokens_seen": 1075576832 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006807896003851709, + "loss": 0.0721, + "theoretical_loss": 3.6525290965049324, + "tokens_seen": 1075838976 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006807093564435886, + "loss": 0.0738, + "theoretical_loss": 3.652446203396365, + "tokens_seen": 1076101120 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006806291125020061, + "loss": 0.0731, + "theoretical_loss": 3.6523633361310717, + "tokens_seen": 1076363264 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006805488685604238, + "loss": 0.0737, + "theoretical_loss": 3.6522804946947045, + "tokens_seen": 1076625408 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006804686246188413, + "loss": 0.0729, + "theoretical_loss": 3.6521976790729265, + "tokens_seen": 1076887552 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006803883806772588, + "loss": 0.0727, + "theoretical_loss": 3.652114889251412, + "tokens_seen": 1077149696 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006803081367356765, + "loss": 0.0764, + "theoretical_loss": 3.6520321252158485, + "tokens_seen": 1077411840 + }, + { + "epoch": 0.33, + "learning_rate": 0.000680227892794094, + "loss": 0.0723, + "theoretical_loss": 3.651949386951933, + "tokens_seen": 1077673984 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006801476488525116, + "loss": 0.073, + "theoretical_loss": 3.6518666744453734, + "tokens_seen": 1077936128 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0019057122990489006, + "objective/train/docs_used": 395298, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3716213703155518, + "objective/train/original_loss": 1.3716213703155518, + "objective/train/theoretical_loss": 3.6518253278466397, + "objective/train/tokens_used": 1098527200, + "objective/train/value_avg": -0.007762908935546875, + "objective/train/value_loss": 0.00023904572299215943, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.199462890625, + "objective/train/value_reward_corr": 0.600056819574208, + "objective/train/value_std": 0.0146636962890625, + "objective/train/weight_avg": 1.0020171403884888, + "objective/train/weighted_lm_loss": 1.3752607107162476, + "objective/train/weights_max": 1.1991831064224243, + "objective/train/weights_min": 0.3681375980377197, + "theoretical_loss": 3.6518253278466397, + "tokens_seen": 1078067200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006800674049109292, + "loss": 0.0731, + "theoretical_loss": 3.651783987681892, + "tokens_seen": 1078198272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006799871609693469, + "loss": 0.0735, + "theoretical_loss": 3.6517013266472187, + "tokens_seen": 1078460416 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006799069170277645, + "loss": 0.0735, + "theoretical_loss": 3.651618691327098, + "tokens_seen": 1078722560 + }, + { + "epoch": 0.33, + "learning_rate": 0.000679826673086182, + "loss": 0.075, + "theoretical_loss": 3.651536081707284, + "tokens_seen": 1078984704 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006797464291445996, + "loss": 0.0759, + "theoretical_loss": 3.651453497773543, + "tokens_seen": 1079246848 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006796661852030172, + "loss": 0.0745, + "theoretical_loss": 3.6513709395116516, + "tokens_seen": 1079508992 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006795859412614348, + "loss": 0.0716, + "theoretical_loss": 3.651288406907399, + "tokens_seen": 1079771136 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006795056973198523, + "loss": 0.0738, + "theoretical_loss": 3.6512058999465844, + "tokens_seen": 1080033280 + }, + { + "epoch": 0.33, + "learning_rate": 0.00067942545337827, + "loss": 0.0732, + "theoretical_loss": 3.6511234186150197, + "tokens_seen": 1080295424 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006793452094366875, + "loss": 0.0744, + "theoretical_loss": 3.6510409628985263, + "tokens_seen": 1080557568 + }, + { + "epoch": 0.33, + "learning_rate": 0.000679264965495105, + "loss": 0.0734, + "theoretical_loss": 3.6509585327829392, + "tokens_seen": 1080819712 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006791847215535228, + "loss": 0.0729, + "theoretical_loss": 3.6508761282541027, + "tokens_seen": 1081081856 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0018546562641859055, + "objective/train/docs_used": 396489, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.54283607006073, + "objective/train/original_loss": 1.5428359508514404, + "objective/train/theoretical_loss": 3.6507937492978733, + "objective/train/tokens_used": 1101804000, + "objective/train/value_avg": -0.007076263427734375, + "objective/train/value_loss": 0.0001660433190409094, + "objective/train/value_max": -0.00012242794036865234, + "objective/train/value_min": -0.2880859375, + "objective/train/value_reward_corr": 0.6533642274011282, + "objective/train/value_std": 0.01088714599609375, + "objective/train/weight_avg": 1.0019302368164062, + "objective/train/weighted_lm_loss": 1.5454530715942383, + "objective/train/weights_max": 1.150571584701538, + "objective/train/weights_min": 0.37533560395240784, + "theoretical_loss": 3.6507937492978733, + "tokens_seen": 1081344000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006791044776119403, + "loss": 0.0739, + "theoretical_loss": 3.6507937492978733, + "tokens_seen": 1081344000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006790242336703579, + "loss": 0.0713, + "theoretical_loss": 3.6507113959001183, + "tokens_seen": 1081606144 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006789439897287755, + "loss": 0.0771, + "theoretical_loss": 3.6506290680467166, + "tokens_seen": 1081868288 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006788637457871931, + "loss": 0.0739, + "theoretical_loss": 3.650546765723558, + "tokens_seen": 1082130432 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006787835018456106, + "loss": 0.0729, + "theoretical_loss": 3.650464488916544, + "tokens_seen": 1082392576 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006787032579040283, + "loss": 0.0746, + "theoretical_loss": 3.650382237611587, + "tokens_seen": 1082654720 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006786230139624458, + "loss": 0.0764, + "theoretical_loss": 3.65030001179461, + "tokens_seen": 1082916864 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006785427700208634, + "loss": 0.0735, + "theoretical_loss": 3.650217811451548, + "tokens_seen": 1083179008 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006784625260792811, + "loss": 0.0728, + "theoretical_loss": 3.650135636568347, + "tokens_seen": 1083441152 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006783822821376986, + "loss": 0.0734, + "theoretical_loss": 3.6500534871309642, + "tokens_seen": 1083703296 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006783020381961163, + "loss": 0.0737, + "theoretical_loss": 3.649971363125368, + "tokens_seen": 1083965440 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006782217942545338, + "loss": 0.074, + "theoretical_loss": 3.6498892645375367, + "tokens_seen": 1084227584 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006781415503129514, + "loss": 0.0727, + "theoretical_loss": 3.649807191353462, + "tokens_seen": 1084489728 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.000662712671328336, + "objective/train/docs_used": 397685, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3404814004898071, + "objective/train/original_loss": 1.3404817581176758, + "objective/train/theoretical_loss": 3.649766164283458, + "objective/train/tokens_used": 1105080800, + "objective/train/value_avg": -0.0102081298828125, + "objective/train/value_loss": 0.00020532849885057658, + "objective/train/value_max": -0.00010472536087036133, + "objective/train/value_min": -0.33837890625, + "objective/train/value_reward_corr": 0.7515684867138178, + "objective/train/value_std": 0.01517486572265625, + "objective/train/weight_avg": 1.0007604360580444, + "objective/train/weighted_lm_loss": 1.3414093255996704, + "objective/train/weights_max": 1.1722419261932373, + "objective/train/weights_min": 0.4255511462688446, + "theoretical_loss": 3.649766164283458, + "tokens_seen": 1084620800 + }, + { + "epoch": 0.33, + "learning_rate": 0.000678061306371369, + "loss": 0.0743, + "theoretical_loss": 3.6497251435591442, + "tokens_seen": 1084751872 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006779810624297865, + "loss": 0.0775, + "theoretical_loss": 3.6496431211405973, + "tokens_seen": 1085014016 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006779008184882041, + "loss": 0.0714, + "theoretical_loss": 3.649561124083844, + "tokens_seen": 1085276160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006778205745466217, + "loss": 0.0752, + "theoretical_loss": 3.6494791523749193, + "tokens_seen": 1085538304 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006777403306050394, + "loss": 0.0739, + "theoretical_loss": 3.6493972059998696, + "tokens_seen": 1085800448 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006776600866634569, + "loss": 0.0735, + "theoretical_loss": 3.649315284944751, + "tokens_seen": 1086062592 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006775798427218746, + "loss": 0.072, + "theoretical_loss": 3.649233389195632, + "tokens_seen": 1086324736 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006774995987802921, + "loss": 0.0736, + "theoretical_loss": 3.6491515187385914, + "tokens_seen": 1086586880 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006774193548387097, + "loss": 0.071, + "theoretical_loss": 3.649069673559719, + "tokens_seen": 1086849024 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006773391108971273, + "loss": 0.0729, + "theoretical_loss": 3.648987853645116, + "tokens_seen": 1087111168 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006772588669555448, + "loss": 0.0715, + "theoretical_loss": 3.648906058980894, + "tokens_seen": 1087373312 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006771786230139625, + "loss": 0.0733, + "theoretical_loss": 3.6488242895531764, + "tokens_seen": 1087635456 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0011196710402145982, + "objective/train/docs_used": 398862, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.379757046699524, + "objective/train/original_loss": 1.3797568082809448, + "objective/train/theoretical_loss": 3.6487425453480973, + "objective/train/tokens_used": 1108357600, + "objective/train/value_avg": -0.0118865966796875, + "objective/train/value_loss": 0.0003890149819198996, + "objective/train/value_max": -0.00010073184967041016, + "objective/train/value_min": -0.65087890625, + "objective/train/value_reward_corr": 0.7589761731426594, + "objective/train/value_std": 0.0198974609375, + "objective/train/weight_avg": 0.9990611672401428, + "objective/train/weighted_lm_loss": 1.3779809474945068, + "objective/train/weights_max": 1.7288053035736084, + "objective/train/weights_min": 0.3692087233066559, + "theoretical_loss": 3.6487425453480973, + "tokens_seen": 1087897600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00067709837907238, + "loss": 0.0722, + "theoretical_loss": 3.6487425453480973, + "tokens_seen": 1087897600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006770181351307977, + "loss": 0.0712, + "theoretical_loss": 3.648660826351801, + "tokens_seen": 1088159744 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006769378911892153, + "loss": 0.0735, + "theoretical_loss": 3.6485791325504437, + "tokens_seen": 1088421888 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006768576472476328, + "loss": 0.073, + "theoretical_loss": 3.648497463930192, + "tokens_seen": 1088684032 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006767774033060504, + "loss": 0.0729, + "theoretical_loss": 3.6484158204772235, + "tokens_seen": 1088946176 + }, + { + "epoch": 0.33, + "learning_rate": 0.000676697159364468, + "loss": 0.0749, + "theoretical_loss": 3.648334202177727, + "tokens_seen": 1089208320 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006766169154228856, + "loss": 0.0764, + "theoretical_loss": 3.648252609017902, + "tokens_seen": 1089470464 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006765366714813031, + "loss": 0.0741, + "theoretical_loss": 3.648171040983959, + "tokens_seen": 1089732608 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006764564275397208, + "loss": 0.0763, + "theoretical_loss": 3.648089498062119, + "tokens_seen": 1089994752 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006763761835981383, + "loss": 0.0753, + "theoretical_loss": 3.648007980238614, + "tokens_seen": 1090256896 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006762959396565558, + "loss": 0.0723, + "theoretical_loss": 3.6479264874996877, + "tokens_seen": 1090519040 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006762156957149736, + "loss": 0.0727, + "theoretical_loss": 3.6478450198315926, + "tokens_seen": 1090781184 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006761354517733911, + "loss": 0.0754, + "theoretical_loss": 3.6477635772205947, + "tokens_seen": 1091043328 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0015444932505488396, + "objective/train/docs_used": 400125, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.572818636894226, + "objective/train/original_loss": 1.572818636894226, + "objective/train/theoretical_loss": 3.647722865307218, + "objective/train/tokens_used": 1111634400, + "objective/train/value_avg": -0.007625579833984375, + "objective/train/value_loss": 0.00013872672570869327, + "objective/train/value_max": -0.00011146068572998047, + "objective/train/value_min": -0.3896484375, + "objective/train/value_reward_corr": 0.6624823334319214, + "objective/train/value_std": 0.0112152099609375, + "objective/train/weight_avg": 1.0016119480133057, + "objective/train/weighted_lm_loss": 1.574569821357727, + "objective/train/weights_max": 1.1813123226165771, + "objective/train/weights_min": 0.6096436381340027, + "theoretical_loss": 3.647722865307218, + "tokens_seen": 1091174400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006760552078318088, + "loss": 0.075, + "theoretical_loss": 3.647682159652969, + "tokens_seen": 1091305472 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006759749638902263, + "loss": 0.074, + "theoretical_loss": 3.647600767115002, + "tokens_seen": 1091567616 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006758947199486439, + "loss": 0.0756, + "theoretical_loss": 3.6475193995929907, + "tokens_seen": 1091829760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006758144760070615, + "loss": 0.0735, + "theoretical_loss": 3.6474380570732423, + "tokens_seen": 1092091904 + }, + { + "epoch": 0.33, + "learning_rate": 0.000675734232065479, + "loss": 0.075, + "theoretical_loss": 3.6473567395420767, + "tokens_seen": 1092354048 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006756539881238966, + "loss": 0.0729, + "theoretical_loss": 3.647275446985822, + "tokens_seen": 1092616192 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006755737441823142, + "loss": 0.074, + "theoretical_loss": 3.64719417939082, + "tokens_seen": 1092878336 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006754935002407319, + "loss": 0.0757, + "theoretical_loss": 3.6471129367434205, + "tokens_seen": 1093140480 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006754132562991494, + "loss": 0.0715, + "theoretical_loss": 3.647031719029985, + "tokens_seen": 1093402624 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006753330123575671, + "loss": 0.0731, + "theoretical_loss": 3.646950526236887, + "tokens_seen": 1093664768 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006752527684159846, + "loss": 0.0741, + "theoretical_loss": 3.6468693583505085, + "tokens_seen": 1093926912 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006751725244744022, + "loss": 0.0759, + "theoretical_loss": 3.646788215357244, + "tokens_seen": 1094189056 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0005579335847869515, + "objective/train/docs_used": 401268, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.393877625465393, + "objective/train/original_loss": 1.3938775062561035, + "objective/train/theoretical_loss": 3.646707097243498, + "objective/train/tokens_used": 1114911200, + "objective/train/value_avg": -0.007781982421875, + "objective/train/value_loss": 0.0001916129986057058, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.6875, + "objective/train/value_reward_corr": 0.7125330836958561, + "objective/train/value_std": 0.01245880126953125, + "objective/train/weight_avg": 1.000646948814392, + "objective/train/weighted_lm_loss": 1.3944873809814453, + "objective/train/weights_max": 1.2134755849838257, + "objective/train/weights_min": 0.3718355596065521, + "theoretical_loss": 3.646707097243498, + "tokens_seen": 1094451200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006750922805328198, + "loss": 0.0733, + "theoretical_loss": 3.646707097243498, + "tokens_seen": 1094451200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006750120365912373, + "loss": 0.0727, + "theoretical_loss": 3.646626003995685, + "tokens_seen": 1094713344 + }, + { + "epoch": 0.33, + "learning_rate": 0.000674931792649655, + "loss": 0.0757, + "theoretical_loss": 3.6465449356002315, + "tokens_seen": 1094975488 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006748515487080725, + "loss": 0.0713, + "theoretical_loss": 3.646463892043574, + "tokens_seen": 1095237632 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006747713047664902, + "loss": 0.0736, + "theoretical_loss": 3.6463828733121586, + "tokens_seen": 1095499776 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006746910608249078, + "loss": 0.0747, + "theoretical_loss": 3.6463018793924453, + "tokens_seen": 1095761920 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006746108168833254, + "loss": 0.0713, + "theoretical_loss": 3.6462209102709, + "tokens_seen": 1096024064 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006745305729417429, + "loss": 0.0722, + "theoretical_loss": 3.6461399659340037, + "tokens_seen": 1096286208 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006744503290001605, + "loss": 0.0738, + "theoretical_loss": 3.6460590463682454, + "tokens_seen": 1096548352 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006743700850585781, + "loss": 0.0727, + "theoretical_loss": 3.6459781515601244, + "tokens_seen": 1096810496 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006742898411169956, + "loss": 0.0748, + "theoretical_loss": 3.6458972814961528, + "tokens_seen": 1097072640 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006742095971754133, + "loss": 0.0723, + "theoretical_loss": 3.6458164361628516, + "tokens_seen": 1097334784 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006741293532338308, + "loss": 0.0723, + "theoretical_loss": 3.645735615546752, + "tokens_seen": 1097596928 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": 0.0027343458496034145, + "objective/train/docs_used": 402429, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3593459129333496, + "objective/train/original_loss": 1.35934579372406, + "objective/train/theoretical_loss": 3.645695214503448, + "objective/train/tokens_used": 1118188000, + "objective/train/value_avg": -0.00693511962890625, + "objective/train/value_loss": 0.00017142666911240667, + "objective/train/value_max": -9.316205978393555e-05, + "objective/train/value_min": -0.6669921875, + "objective/train/value_reward_corr": 0.6122719969080082, + "objective/train/value_std": 0.012725830078125, + "objective/train/weight_avg": 1.0028126239776611, + "objective/train/weighted_lm_loss": 1.363199234008789, + "objective/train/weights_max": 1.3812626600265503, + "objective/train/weights_min": 0.38981711864471436, + "theoretical_loss": 3.645695214503448, + "tokens_seen": 1097728000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006740491092922485, + "loss": 0.0753, + "theoretical_loss": 3.645654819634397, + "tokens_seen": 1097859072 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006739688653506661, + "loss": 0.0735, + "theoretical_loss": 3.6455740484123407, + "tokens_seen": 1098121216 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006738886214090836, + "loss": 0.0713, + "theoretical_loss": 3.645493301867145, + "tokens_seen": 1098383360 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006738083774675012, + "loss": 0.0749, + "theoretical_loss": 3.6454125799853854, + "tokens_seen": 1098645504 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006737281335259188, + "loss": 0.0735, + "theoretical_loss": 3.645331882753645, + "tokens_seen": 1098907648 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006736478895843364, + "loss": 0.0733, + "theoretical_loss": 3.6452512101585195, + "tokens_seen": 1099169792 + }, + { + "epoch": 0.33, + "learning_rate": 0.000673567645642754, + "loss": 0.0701, + "theoretical_loss": 3.645170562186615, + "tokens_seen": 1099431936 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006734874017011716, + "loss": 0.0742, + "theoretical_loss": 3.6450899388245466, + "tokens_seen": 1099694080 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006734071577595891, + "loss": 0.0747, + "theoretical_loss": 3.645009340058941, + "tokens_seen": 1099956224 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006733269138180067, + "loss": 0.0734, + "theoretical_loss": 3.644928765876436, + "tokens_seen": 1100218368 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006732466698764244, + "loss": 0.0722, + "theoretical_loss": 3.644848216263678, + "tokens_seen": 1100480512 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006731664259348419, + "loss": 0.0749, + "theoretical_loss": 3.6447676912073255, + "tokens_seen": 1100742656 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0021009258925914764, + "objective/train/docs_used": 403530, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.643606185913086, + "objective/train/original_loss": 1.6436063051223755, + "objective/train/theoretical_loss": 3.6446871906940466, + "objective/train/tokens_used": 1121464800, + "objective/train/value_avg": -0.01171875, + "objective/train/value_loss": 0.00041994385537691414, + "objective/train/value_max": -0.00010311603546142578, + "objective/train/value_min": -0.2127685546875, + "objective/train/value_reward_corr": 0.8979253185065612, + "objective/train/value_std": 0.0230712890625, + "objective/train/weight_avg": 0.998100221157074, + "objective/train/weighted_lm_loss": 1.6399933099746704, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.3681386411190033, + "theoretical_loss": 3.6446871906940466, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006730861819932596, + "loss": 0.0756, + "theoretical_loss": 3.6446871906940466, + "tokens_seen": 1101004800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006730059380516771, + "loss": 0.072, + "theoretical_loss": 3.6446067147105197, + "tokens_seen": 1101266944 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006729256941100947, + "loss": 0.0712, + "theoretical_loss": 3.644526263243433, + "tokens_seen": 1101529088 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006728454501685123, + "loss": 0.0749, + "theoretical_loss": 3.644445836279488, + "tokens_seen": 1101791232 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006727652062269298, + "loss": 0.0697, + "theoretical_loss": 3.644365433805393, + "tokens_seen": 1102053376 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006726849622853474, + "loss": 0.0719, + "theoretical_loss": 3.6442850558078685, + "tokens_seen": 1102315520 + }, + { + "epoch": 0.33, + "learning_rate": 0.000672604718343765, + "loss": 0.0766, + "theoretical_loss": 3.6442047022736452, + "tokens_seen": 1102577664 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006725244744021827, + "loss": 0.0758, + "theoretical_loss": 3.644124373189464, + "tokens_seen": 1102839808 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006724442304606002, + "loss": 0.0745, + "theoretical_loss": 3.644044068542076, + "tokens_seen": 1103101952 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006723639865190179, + "loss": 0.0728, + "theoretical_loss": 3.643963788318242, + "tokens_seen": 1103364096 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006722837425774354, + "loss": 0.0724, + "theoretical_loss": 3.6438835325047356, + "tokens_seen": 1103626240 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006722034986358531, + "loss": 0.073, + "theoretical_loss": 3.6438033010883375, + "tokens_seen": 1103888384 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006721232546942706, + "loss": 0.0745, + "theoretical_loss": 3.643723094055841, + "tokens_seen": 1104150528 + }, + { + "epoch": 0.33, + "objective/train/advantage_avg": -0.0009628982516005635, + "objective/train/docs_used": 404699, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5864622592926025, + "objective/train/original_loss": 1.5864622592926025, + "objective/train/theoretical_loss": 3.643682999679431, + "objective/train/tokens_used": 1124741600, + "objective/train/value_avg": -0.00875091552734375, + "objective/train/value_loss": 0.0005373280146159232, + "objective/train/value_max": -6.502866744995117e-05, + "objective/train/value_min": -0.462158203125, + "objective/train/value_reward_corr": 0.6116719535059587, + "objective/train/value_std": 0.014617919921875, + "objective/train/weight_avg": 0.9992680549621582, + "objective/train/weighted_lm_loss": 1.5853101015090942, + "objective/train/weights_max": 1.250488042831421, + "objective/train/weights_min": 0.3689074218273163, + "theoretical_loss": 3.643682999679431, + "tokens_seen": 1104281600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006720430107526881, + "loss": 0.0769, + "theoretical_loss": 3.643642911394048, + "tokens_seen": 1104412672 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006719627668111058, + "loss": 0.0735, + "theoretical_loss": 3.643562753089772, + "tokens_seen": 1104674816 + }, + { + "epoch": 0.33, + "learning_rate": 0.0006718825228695233, + "loss": 0.0733, + "theoretical_loss": 3.6434826191298364, + "tokens_seen": 1104936960 + }, + { + "epoch": 0.33, + "learning_rate": 0.000671802278927941, + "loss": 0.0728, + "theoretical_loss": 3.6434025095010747, + "tokens_seen": 1105199104 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006717220349863586, + "loss": 0.0764, + "theoretical_loss": 3.6433224241903304, + "tokens_seen": 1105461248 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006716417910447762, + "loss": 0.0749, + "theoretical_loss": 3.643242363184458, + "tokens_seen": 1105723392 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006715615471031937, + "loss": 0.074, + "theoretical_loss": 3.6431623264703212, + "tokens_seen": 1105985536 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006714813031616113, + "loss": 0.071, + "theoretical_loss": 3.6430823140347943, + "tokens_seen": 1106247680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006714010592200289, + "loss": 0.0727, + "theoretical_loss": 3.643002325864763, + "tokens_seen": 1106509824 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006713208152784464, + "loss": 0.0727, + "theoretical_loss": 3.6429223619471207, + "tokens_seen": 1106771968 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006712405713368641, + "loss": 0.0749, + "theoretical_loss": 3.6428424222687736, + "tokens_seen": 1107034112 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006711603273952816, + "loss": 0.0739, + "theoretical_loss": 3.642762506816636, + "tokens_seen": 1107296256 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0005870533641427755, + "objective/train/docs_used": 405946, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4835734367370605, + "objective/train/original_loss": 1.4835731983184814, + "objective/train/theoretical_loss": 3.642682615577634, + "objective/train/tokens_used": 1128018400, + "objective/train/value_avg": -0.007633209228515625, + "objective/train/value_loss": 0.00033086538314819336, + "objective/train/value_max": -3.916025161743164e-05, + "objective/train/value_min": -0.341796875, + "objective/train/value_reward_corr": 0.5667257952888192, + "objective/train/value_std": 0.01132965087890625, + "objective/train/weight_avg": 0.999566912651062, + "objective/train/weighted_lm_loss": 1.4834892749786377, + "objective/train/weights_max": 1.2339105606079102, + "objective/train/weights_min": 0.6143267154693604, + "theoretical_loss": 3.642682615577634, + "tokens_seen": 1107558400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006710800834536994, + "loss": 0.0717, + "theoretical_loss": 3.642682615577634, + "tokens_seen": 1107558400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006709998395121169, + "loss": 0.0757, + "theoretical_loss": 3.6426027485387023, + "tokens_seen": 1107820544 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006709195955705344, + "loss": 0.0737, + "theoretical_loss": 3.6425229056867865, + "tokens_seen": 1108082688 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006708393516289521, + "loss": 0.0723, + "theoretical_loss": 3.642443087008844, + "tokens_seen": 1108344832 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006707591076873696, + "loss": 0.0718, + "theoretical_loss": 3.6423632924918383, + "tokens_seen": 1108606976 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006706788637457872, + "loss": 0.0728, + "theoretical_loss": 3.6422835221227468, + "tokens_seen": 1108869120 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006705986198042048, + "loss": 0.0729, + "theoretical_loss": 3.6422037758885555, + "tokens_seen": 1109131264 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006705183758626224, + "loss": 0.0746, + "theoretical_loss": 3.6421240537762607, + "tokens_seen": 1109393408 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006704381319210399, + "loss": 0.0774, + "theoretical_loss": 3.6420443557728674, + "tokens_seen": 1109655552 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006703578879794575, + "loss": 0.0742, + "theoretical_loss": 3.6419646818653932, + "tokens_seen": 1109917696 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006702776440378752, + "loss": 0.076, + "theoretical_loss": 3.641885032040864, + "tokens_seen": 1110179840 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006701974000962927, + "loss": 0.0751, + "theoretical_loss": 3.6418054062863163, + "tokens_seen": 1110441984 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006701171561547104, + "loss": 0.0731, + "theoretical_loss": 3.6417258045887966, + "tokens_seen": 1110704128 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0009894539834931493, + "objective/train/docs_used": 406993, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.475312352180481, + "objective/train/original_loss": 1.4753124713897705, + "objective/train/theoretical_loss": 3.6416860127573765, + "objective/train/tokens_used": 1131295200, + "objective/train/value_avg": -0.00717926025390625, + "objective/train/value_loss": 0.00043393525993451476, + "objective/train/value_max": -3.069639205932617e-05, + "objective/train/value_min": -0.8212890625, + "objective/train/value_reward_corr": 0.6151101633973011, + "objective/train/value_std": 0.01456451416015625, + "objective/train/weight_avg": 0.999183714389801, + "objective/train/weighted_lm_loss": 1.4731035232543945, + "objective/train/weights_max": 1.11464262008667, + "objective/train/weights_min": 0.22452190518379211, + "theoretical_loss": 3.6416860127573765, + "tokens_seen": 1110835200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006700369122131279, + "loss": 0.0746, + "theoretical_loss": 3.641646226935361, + "tokens_seen": 1110966272 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006699566682715455, + "loss": 0.076, + "theoretical_loss": 3.641566673313076, + "tokens_seen": 1111228416 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006698764243299631, + "loss": 0.0776, + "theoretical_loss": 3.6414871437090186, + "tokens_seen": 1111490560 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006697961803883806, + "loss": 0.0752, + "theoretical_loss": 3.641407638110275, + "tokens_seen": 1111752704 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006697159364467983, + "loss": 0.0755, + "theoretical_loss": 3.641328156503942, + "tokens_seen": 1112014848 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006696356925052158, + "loss": 0.0757, + "theoretical_loss": 3.6412486988771255, + "tokens_seen": 1112276992 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006695554485636335, + "loss": 0.0727, + "theoretical_loss": 3.6411692652169423, + "tokens_seen": 1112539136 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006694752046220511, + "loss": 0.0749, + "theoretical_loss": 3.641089855510518, + "tokens_seen": 1112801280 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006693949606804687, + "loss": 0.0726, + "theoretical_loss": 3.64101046974499, + "tokens_seen": 1113063424 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006693147167388862, + "loss": 0.074, + "theoretical_loss": 3.640931107907504, + "tokens_seen": 1113325568 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006692344727973039, + "loss": 0.0768, + "theoretical_loss": 3.6408517699852165, + "tokens_seen": 1113587712 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006691542288557214, + "loss": 0.078, + "theoretical_loss": 3.640772455965293, + "tokens_seen": 1113849856 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0012157823657616973, + "objective/train/docs_used": 408181, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.420784592628479, + "objective/train/original_loss": 1.4207844734191895, + "objective/train/theoretical_loss": 3.64069316583491, + "objective/train/tokens_used": 1134572000, + "objective/train/value_avg": -0.0094451904296875, + "objective/train/value_loss": 0.00029014694155193865, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.5224609375, + "objective/train/value_reward_corr": 0.6501879473840528, + "objective/train/value_std": 0.016510009765625, + "objective/train/weight_avg": 1.0013483762741089, + "objective/train/weighted_lm_loss": 1.4223439693450928, + "objective/train/weights_max": 1.2760461568832397, + "objective/train/weights_min": 0.36923545598983765, + "theoretical_loss": 3.64069316583491, + "tokens_seen": 1114112000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006690739849141389, + "loss": 0.0726, + "theoretical_loss": 3.64069316583491, + "tokens_seen": 1114112000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006689937409725566, + "loss": 0.0752, + "theoretical_loss": 3.640613899581253, + "tokens_seen": 1114374144 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006689134970309741, + "loss": 0.0777, + "theoretical_loss": 3.6405346571915187, + "tokens_seen": 1114636288 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006688332530893918, + "loss": 0.0756, + "theoretical_loss": 3.6404554386529115, + "tokens_seen": 1114898432 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006687530091478094, + "loss": 0.072, + "theoretical_loss": 3.640376243952648, + "tokens_seen": 1115160576 + }, + { + "epoch": 0.34, + "learning_rate": 0.000668672765206227, + "loss": 0.072, + "theoretical_loss": 3.640297073077953, + "tokens_seen": 1115422720 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006685925212646445, + "loss": 0.0779, + "theoretical_loss": 3.640217926016061, + "tokens_seen": 1115684864 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006685122773230621, + "loss": 0.0741, + "theoretical_loss": 3.6401388027542185, + "tokens_seen": 1115947008 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006684320333814797, + "loss": 0.0708, + "theoretical_loss": 3.6400597032796798, + "tokens_seen": 1116209152 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006683517894398973, + "loss": 0.0753, + "theoretical_loss": 3.6399806275797095, + "tokens_seen": 1116471296 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006682715454983149, + "loss": 0.0761, + "theoretical_loss": 3.639901575641582, + "tokens_seen": 1116733440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006681913015567324, + "loss": 0.0741, + "theoretical_loss": 3.6398225474525816, + "tokens_seen": 1116995584 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006681110576151502, + "loss": 0.0726, + "theoretical_loss": 3.639743543000003, + "tokens_seen": 1117257728 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.000741979107260704, + "objective/train/docs_used": 409409, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5466253757476807, + "objective/train/original_loss": 1.5466253757476807, + "objective/train/theoretical_loss": 3.639704049670904, + "objective/train/tokens_used": 1137848800, + "objective/train/value_avg": -0.00763702392578125, + "objective/train/value_loss": 0.0003765631699934602, + "objective/train/value_max": -5.8770179748535156e-05, + "objective/train/value_min": -0.3564453125, + "objective/train/value_reward_corr": 0.5197531699733359, + "objective/train/value_std": 0.01153564453125, + "objective/train/weight_avg": 1.0008995532989502, + "objective/train/weighted_lm_loss": 1.5469353199005127, + "objective/train/weights_max": 1.3829063177108765, + "objective/train/weights_min": 0.22673387825489044, + "theoretical_loss": 3.639704049670904, + "tokens_seen": 1117388800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006680308136735677, + "loss": 0.073, + "theoretical_loss": 3.6396645622711494, + "tokens_seen": 1117519872 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006679505697319852, + "loss": 0.0764, + "theoretical_loss": 3.639585605253335, + "tokens_seen": 1117782016 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006678703257904029, + "loss": 0.0738, + "theoretical_loss": 3.639506671933882, + "tokens_seen": 1118044160 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006677900818488204, + "loss": 0.0739, + "theoretical_loss": 3.639427762300125, + "tokens_seen": 1118306304 + }, + { + "epoch": 0.34, + "learning_rate": 0.000667709837907238, + "loss": 0.0748, + "theoretical_loss": 3.6393488763394064, + "tokens_seen": 1118568448 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006676295939656556, + "loss": 0.0738, + "theoretical_loss": 3.639270014039078, + "tokens_seen": 1118830592 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006675493500240732, + "loss": 0.074, + "theoretical_loss": 3.6391911753865034, + "tokens_seen": 1119092736 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006674691060824907, + "loss": 0.0754, + "theoretical_loss": 3.639112360369054, + "tokens_seen": 1119354880 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006673888621409083, + "loss": 0.0763, + "theoretical_loss": 3.6390335689741113, + "tokens_seen": 1119617024 + }, + { + "epoch": 0.34, + "learning_rate": 0.000667308618199326, + "loss": 0.075, + "theoretical_loss": 3.638954801189067, + "tokens_seen": 1119879168 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006672283742577436, + "loss": 0.076, + "theoretical_loss": 3.6388760570013226, + "tokens_seen": 1120141312 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006671481303161612, + "loss": 0.0732, + "theoretical_loss": 3.6387973363982877, + "tokens_seen": 1120403456 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0013283737935125828, + "objective/train/docs_used": 410712, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4400638341903687, + "objective/train/original_loss": 1.440063714981079, + "objective/train/theoretical_loss": 3.638718639367384, + "objective/train/tokens_used": 1141125600, + "objective/train/value_avg": -0.007556915283203125, + "objective/train/value_loss": 0.0001608040911378339, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.611328125, + "objective/train/value_reward_corr": 0.7982278408915517, + "objective/train/value_std": 0.01517486572265625, + "objective/train/weight_avg": 1.0014057159423828, + "objective/train/weighted_lm_loss": 1.4431976079940796, + "objective/train/weights_max": 1.3930864334106445, + "objective/train/weights_min": 0.5440833568572998, + "theoretical_loss": 3.638718639367384, + "tokens_seen": 1120665600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006670678863745787, + "loss": 0.0728, + "theoretical_loss": 3.638718639367384, + "tokens_seen": 1120665600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006669876424329964, + "loss": 0.0737, + "theoretical_loss": 3.638639965896041, + "tokens_seen": 1120927744 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006669073984914139, + "loss": 0.0766, + "theoretical_loss": 3.638561315971698, + "tokens_seen": 1121189888 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006668271545498314, + "loss": 0.0733, + "theoretical_loss": 3.638482689581805, + "tokens_seen": 1121452032 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006667469106082491, + "loss": 0.0724, + "theoretical_loss": 3.6384040867138214, + "tokens_seen": 1121714176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006666666666666666, + "loss": 0.0765, + "theoretical_loss": 3.6383255073552148, + "tokens_seen": 1121976320 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006665864227250843, + "loss": 0.0754, + "theoretical_loss": 3.638246951493463, + "tokens_seen": 1122238464 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006665061787835019, + "loss": 0.0741, + "theoretical_loss": 3.6381684191160555, + "tokens_seen": 1122500608 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006664259348419195, + "loss": 0.0726, + "theoretical_loss": 3.638089910210488, + "tokens_seen": 1122762752 + }, + { + "epoch": 0.34, + "learning_rate": 0.000666345690900337, + "loss": 0.0762, + "theoretical_loss": 3.638011424764269, + "tokens_seen": 1123024896 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006662654469587546, + "loss": 0.0711, + "theoretical_loss": 3.6379329627649137, + "tokens_seen": 1123287040 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006661852030171722, + "loss": 0.0732, + "theoretical_loss": 3.6378545241999487, + "tokens_seen": 1123549184 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006661049590755897, + "loss": 0.0745, + "theoretical_loss": 3.637776109056909, + "tokens_seen": 1123811328 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.0026691153179854155, + "objective/train/docs_used": 411904, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2471657991409302, + "objective/train/original_loss": 1.2471659183502197, + "objective/train/theoretical_loss": 3.637736910264719, + "objective/train/tokens_used": 1144402400, + "objective/train/value_avg": -0.00782012939453125, + "objective/train/value_loss": 0.0007978877983987331, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.71435546875, + "objective/train/value_reward_corr": 0.6231384912903344, + "objective/train/value_std": 0.0173797607421875, + "objective/train/weight_avg": 0.9976327419281006, + "objective/train/weighted_lm_loss": 1.243854284286499, + "objective/train/weights_max": 2.0428695678710938, + "objective/train/weights_min": 0.05494469031691551, + "theoretical_loss": 3.637736910264719, + "tokens_seen": 1123942400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006660247151340074, + "loss": 0.0738, + "theoretical_loss": 3.6376977173233405, + "tokens_seen": 1124073472 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006659444711924249, + "loss": 0.073, + "theoretical_loss": 3.6376193489867976, + "tokens_seen": 1124335616 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006658642272508427, + "loss": 0.0757, + "theoretical_loss": 3.6375410040348446, + "tokens_seen": 1124597760 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006657839833092602, + "loss": 0.0712, + "theoretical_loss": 3.637462682455055, + "tokens_seen": 1124859904 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006657037393676778, + "loss": 0.0735, + "theoretical_loss": 3.6373843842350118, + "tokens_seen": 1125122048 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006656234954260954, + "loss": 0.0752, + "theoretical_loss": 3.637306109362308, + "tokens_seen": 1125384192 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006655432514845129, + "loss": 0.0738, + "theoretical_loss": 3.6372278578245454, + "tokens_seen": 1125646336 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006654630075429305, + "loss": 0.0734, + "theoretical_loss": 3.6371496296093357, + "tokens_seen": 1125908480 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006653827636013481, + "loss": 0.0748, + "theoretical_loss": 3.6370714247043003, + "tokens_seen": 1126170624 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006653025196597657, + "loss": 0.0738, + "theoretical_loss": 3.6369932430970695, + "tokens_seen": 1126432768 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006652222757181832, + "loss": 0.0772, + "theoretical_loss": 3.6369150847752834, + "tokens_seen": 1126694912 + }, + { + "epoch": 0.34, + "learning_rate": 0.000665142031776601, + "loss": 0.0748, + "theoretical_loss": 3.6368369497265913, + "tokens_seen": 1126957056 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": -0.004586232826113701, + "objective/train/docs_used": 413086, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5389773845672607, + "objective/train/original_loss": 1.5389773845672607, + "objective/train/theoretical_loss": 3.6367588379386513, + "objective/train/tokens_used": 1147679200, + "objective/train/value_avg": -0.01276397705078125, + "objective/train/value_loss": 0.001206457382068038, + "objective/train/value_max": -8.285045623779297e-05, + "objective/train/value_min": -0.6474609375, + "objective/train/value_reward_corr": 0.7707702012782308, + "objective/train/value_std": 0.0240020751953125, + "objective/train/weight_avg": 0.9959734678268433, + "objective/train/weighted_lm_loss": 1.5339165925979614, + "objective/train/weights_max": 1.6746848821640015, + "objective/train/weights_min": 0.36978310346603394, + "theoretical_loss": 3.6367588379386513, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006650617878350185, + "loss": 0.0734, + "theoretical_loss": 3.6367588379386513, + "tokens_seen": 1127219200 + }, + { + "epoch": 0.34, + "learning_rate": 0.000664981543893436, + "loss": 0.0756, + "theoretical_loss": 3.636680749399133, + "tokens_seen": 1127481344 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006649012999518537, + "loss": 0.0745, + "theoretical_loss": 3.6366026840957133, + "tokens_seen": 1127743488 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006648210560102712, + "loss": 0.076, + "theoretical_loss": 3.636524642016079, + "tokens_seen": 1128005632 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006647408120686889, + "loss": 0.0745, + "theoretical_loss": 3.636446623147927, + "tokens_seen": 1128267776 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006646605681271064, + "loss": 0.0763, + "theoretical_loss": 3.6363686274789626, + "tokens_seen": 1128529920 + }, + { + "epoch": 0.34, + "learning_rate": 0.000664580324185524, + "loss": 0.0731, + "theoretical_loss": 3.6362906549969014, + "tokens_seen": 1128792064 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006645000802439416, + "loss": 0.0743, + "theoretical_loss": 3.6362127056894673, + "tokens_seen": 1129054208 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006644198363023591, + "loss": 0.0753, + "theoretical_loss": 3.6361347795443955, + "tokens_seen": 1129316352 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006643395923607768, + "loss": 0.0739, + "theoretical_loss": 3.636056876549427, + "tokens_seen": 1129578496 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006642593484191944, + "loss": 0.074, + "theoretical_loss": 3.6359789966923164, + "tokens_seen": 1129840640 + }, + { + "epoch": 0.34, + "learning_rate": 0.000664179104477612, + "loss": 0.0773, + "theoretical_loss": 3.6359011399608243, + "tokens_seen": 1130102784 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006640988605360295, + "loss": 0.0749, + "theoretical_loss": 3.6358233063427225, + "tokens_seen": 1130364928 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0018270047148689628, + "objective/train/docs_used": 414307, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.627175211906433, + "objective/train/original_loss": 1.6271753311157227, + "objective/train/theoretical_loss": 3.635784398197374, + "objective/train/tokens_used": 1150956000, + "objective/train/value_avg": -0.00772857666015625, + "objective/train/value_loss": 0.0002652402617968619, + "objective/train/value_max": -0.00010472536087036133, + "objective/train/value_min": -0.96875, + "objective/train/value_reward_corr": 0.6389814979319877, + "objective/train/value_std": 0.01456451416015625, + "objective/train/weight_avg": 1.0019452571868896, + "objective/train/weighted_lm_loss": 1.630281686782837, + "objective/train/weights_max": 1.5185024738311768, + "objective/train/weights_min": 0.3883684575557709, + "theoretical_loss": 3.635784398197374, + "tokens_seen": 1130496000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006640186165944472, + "loss": 0.0716, + "theoretical_loss": 3.635745495825791, + "tokens_seen": 1130627072 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006639383726528647, + "loss": 0.0721, + "theoretical_loss": 3.63566770839782, + "tokens_seen": 1130889216 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006638581287112822, + "loss": 0.0743, + "theoretical_loss": 3.6355899440466075, + "tokens_seen": 1131151360 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006637778847696999, + "loss": 0.0731, + "theoretical_loss": 3.635512202759964, + "tokens_seen": 1131413504 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006636976408281174, + "loss": 0.0738, + "theoretical_loss": 3.635434484525704, + "tokens_seen": 1131675648 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006636173968865351, + "loss": 0.076, + "theoretical_loss": 3.6353567893316567, + "tokens_seen": 1131937792 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006635371529449527, + "loss": 0.0738, + "theoretical_loss": 3.6352791171656573, + "tokens_seen": 1132199936 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006634569090033703, + "loss": 0.0743, + "theoretical_loss": 3.635201468015551, + "tokens_seen": 1132462080 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006633766650617879, + "loss": 0.074, + "theoretical_loss": 3.635123841869193, + "tokens_seen": 1132724224 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006632964211202054, + "loss": 0.0752, + "theoretical_loss": 3.6350462387144464, + "tokens_seen": 1132986368 + }, + { + "epoch": 0.34, + "learning_rate": 0.000663216177178623, + "loss": 0.0748, + "theoretical_loss": 3.634968658539184, + "tokens_seen": 1133248512 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006631359332370406, + "loss": 0.0768, + "theoretical_loss": 3.6348911013312883, + "tokens_seen": 1133510656 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0008693314739502966, + "objective/train/docs_used": 415442, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.477584958076477, + "objective/train/original_loss": 1.477584958076477, + "objective/train/theoretical_loss": 3.6348135670786506, + "objective/train/tokens_used": 1154232800, + "objective/train/value_avg": -0.007232666015625, + "objective/train/value_loss": 0.00015491771046072245, + "objective/train/value_max": -9.685754776000977e-05, + "objective/train/value_min": -0.66357421875, + "objective/train/value_reward_corr": 0.5982827764776568, + "objective/train/value_std": 0.01332855224609375, + "objective/train/weight_avg": 1.0009486675262451, + "objective/train/weighted_lm_loss": 1.47885262966156, + "objective/train/weights_max": 1.8655112981796265, + "objective/train/weights_min": 0.6109683513641357, + "theoretical_loss": 3.6348135670786506, + "tokens_seen": 1133772800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006630556892954582, + "loss": 0.0736, + "theoretical_loss": 3.6348135670786506, + "tokens_seen": 1133772800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006629754453538757, + "loss": 0.0746, + "theoretical_loss": 3.6347360557691712, + "tokens_seen": 1134034944 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006628952014122935, + "loss": 0.0739, + "theoretical_loss": 3.63465856739076, + "tokens_seen": 1134297088 + }, + { + "epoch": 0.34, + "learning_rate": 0.000662814957470711, + "loss": 0.0747, + "theoretical_loss": 3.634581101931336, + "tokens_seen": 1134559232 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006627347135291286, + "loss": 0.0753, + "theoretical_loss": 3.6345036593788276, + "tokens_seen": 1134821376 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006626544695875462, + "loss": 0.0758, + "theoretical_loss": 3.6344262397211704, + "tokens_seen": 1135083520 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006625742256459637, + "loss": 0.0754, + "theoretical_loss": 3.6343488429463124, + "tokens_seen": 1135345664 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006624939817043813, + "loss": 0.076, + "theoretical_loss": 3.634271469042208, + "tokens_seen": 1135607808 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006624137377627989, + "loss": 0.0728, + "theoretical_loss": 3.634194117996822, + "tokens_seen": 1135869952 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006623334938212165, + "loss": 0.0736, + "theoretical_loss": 3.634116789798129, + "tokens_seen": 1136132096 + }, + { + "epoch": 0.34, + "learning_rate": 0.000662253249879634, + "loss": 0.077, + "theoretical_loss": 3.6340394844341097, + "tokens_seen": 1136394240 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006621730059380518, + "loss": 0.0771, + "theoretical_loss": 3.6339622018927575, + "tokens_seen": 1136656384 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006620927619964693, + "loss": 0.0741, + "theoretical_loss": 3.633884942162073, + "tokens_seen": 1136918528 + }, + { + "epoch": 0.34, + "objective/train/advantage_avg": 0.0013180155074223876, + "objective/train/docs_used": 416597, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.555419683456421, + "objective/train/original_loss": 1.555419683456421, + "objective/train/theoretical_loss": 3.633846320846984, + "objective/train/tokens_used": 1157509600, + "objective/train/value_avg": -0.006999969482421875, + "objective/train/value_loss": 0.00019526074174791574, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.3037109375, + "objective/train/value_reward_corr": 0.5462273072896175, + "objective/train/value_std": 0.009796142578125, + "objective/train/weight_avg": 1.0014064311981201, + "objective/train/weighted_lm_loss": 1.5573372840881348, + "objective/train/weights_max": 1.306922197341919, + "objective/train/weights_min": 0.36874493956565857, + "theoretical_loss": 3.633846320846984, + "tokens_seen": 1137049600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006620125180548869, + "loss": 0.0735, + "theoretical_loss": 3.6338077052300664, + "tokens_seen": 1137180672 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006619322741133045, + "loss": 0.0725, + "theoretical_loss": 3.633730491084756, + "tokens_seen": 1137442816 + }, + { + "epoch": 0.34, + "learning_rate": 0.000661852030171722, + "loss": 0.0747, + "theoretical_loss": 3.6336532997141706, + "tokens_seen": 1137704960 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006617717862301397, + "loss": 0.0781, + "theoretical_loss": 3.6335761311063473, + "tokens_seen": 1137967104 + }, + { + "epoch": 0.34, + "learning_rate": 0.0006616915422885572, + "loss": 0.0764, + "theoretical_loss": 3.633498985249332, + "tokens_seen": 1138229248 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006616112983469748, + "loss": 0.074, + "theoretical_loss": 3.63342186213118, + "tokens_seen": 1138491392 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006615310544053924, + "loss": 0.0737, + "theoretical_loss": 3.6333447617399557, + "tokens_seen": 1138753536 + }, + { + "epoch": 0.35, + "learning_rate": 0.00066145081046381, + "loss": 0.0752, + "theoretical_loss": 3.6332676840637324, + "tokens_seen": 1139015680 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006613705665222276, + "loss": 0.076, + "theoretical_loss": 3.633190629090592, + "tokens_seen": 1139277824 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006612903225806452, + "loss": 0.0751, + "theoretical_loss": 3.6331135968086263, + "tokens_seen": 1139539968 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006612100786390628, + "loss": 0.0739, + "theoretical_loss": 3.633036587205935, + "tokens_seen": 1139802112 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006611298346974803, + "loss": 0.0747, + "theoretical_loss": 3.6329596002706275, + "tokens_seen": 1140064256 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0006165788508951664, + "objective/train/docs_used": 417664, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4508864879608154, + "objective/train/original_loss": 1.4508862495422363, + "objective/train/theoretical_loss": 3.632882635990822, + "objective/train/tokens_used": 1160786400, + "objective/train/value_avg": -0.006664276123046875, + "objective/train/value_loss": 0.00015237460320349783, + "objective/train/value_max": -8.285045623779297e-05, + "objective/train/value_min": -0.4951171875, + "objective/train/value_reward_corr": 0.7813708563816653, + "objective/train/value_std": 0.0169219970703125, + "objective/train/weight_avg": 1.000691533088684, + "objective/train/weighted_lm_loss": 1.4525622129440308, + "objective/train/weights_max": 1.1556388139724731, + "objective/train/weights_min": 0.6773775815963745, + "theoretical_loss": 3.632882635990822, + "tokens_seen": 1140326400 + }, + { + "epoch": 0.35, + "learning_rate": 0.000661049590755898, + "loss": 0.0745, + "theoretical_loss": 3.632882635990822, + "tokens_seen": 1140326400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006609693468143155, + "loss": 0.0764, + "theoretical_loss": 3.632805694354646, + "tokens_seen": 1140588544 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006608891028727331, + "loss": 0.0752, + "theoretical_loss": 3.6327287753502358, + "tokens_seen": 1140850688 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006608088589311507, + "loss": 0.0753, + "theoretical_loss": 3.632651878965735, + "tokens_seen": 1141112832 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006607286149895682, + "loss": 0.0734, + "theoretical_loss": 3.632575005189299, + "tokens_seen": 1141374976 + }, + { + "epoch": 0.35, + "learning_rate": 0.000660648371047986, + "loss": 0.0765, + "theoretical_loss": 3.6324981540090895, + "tokens_seen": 1141637120 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006605681271064035, + "loss": 0.074, + "theoretical_loss": 3.6324213254132793, + "tokens_seen": 1141899264 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006604878831648211, + "loss": 0.0758, + "theoretical_loss": 3.632344519390049, + "tokens_seen": 1142161408 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006604076392232387, + "loss": 0.0726, + "theoretical_loss": 3.632267735927588, + "tokens_seen": 1142423552 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006603273952816562, + "loss": 0.0747, + "theoretical_loss": 3.632190975014094, + "tokens_seen": 1142685696 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006602471513400738, + "loss": 0.0723, + "theoretical_loss": 3.6321142366377757, + "tokens_seen": 1142947840 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006601669073984914, + "loss": 0.072, + "theoretical_loss": 3.6320375207868483, + "tokens_seen": 1143209984 + }, + { + "epoch": 0.35, + "learning_rate": 0.000660086663456909, + "loss": 0.0732, + "theoretical_loss": 3.6319608274495376, + "tokens_seen": 1143472128 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.002307515824213624, + "objective/train/docs_used": 418760, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4072736501693726, + "objective/train/original_loss": 1.407273769378662, + "objective/train/theoretical_loss": 3.6319224892198108, + "objective/train/tokens_used": 1164063200, + "objective/train/value_avg": -0.00730133056640625, + "objective/train/value_loss": 0.00011779867054428905, + "objective/train/value_max": -0.00014889240264892578, + "objective/train/value_min": -0.4365234375, + "objective/train/value_reward_corr": 0.7994153780614749, + "objective/train/value_std": 0.01413726806640625, + "objective/train/weight_avg": 1.0023618936538696, + "objective/train/weighted_lm_loss": 1.4107744693756104, + "objective/train/weights_max": 1.1094887256622314, + "objective/train/weights_min": 0.37357577681541443, + "theoretical_loss": 3.6319224892198108, + "tokens_seen": 1143603200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006600064195153265, + "loss": 0.0745, + "theoretical_loss": 3.6318841566140767, + "tokens_seen": 1143734272 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006599261755737443, + "loss": 0.0724, + "theoretical_loss": 3.63180750826871, + "tokens_seen": 1143996416 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006598459316321618, + "loss": 0.0701, + "theoretical_loss": 3.6317308824016874, + "tokens_seen": 1144258560 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006597656876905794, + "loss": 0.0707, + "theoretical_loss": 3.6316542790012702, + "tokens_seen": 1144520704 + }, + { + "epoch": 0.35, + "learning_rate": 0.000659685443748997, + "loss": 0.0718, + "theoretical_loss": 3.631577698055727, + "tokens_seen": 1144782848 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006596051998074145, + "loss": 0.0708, + "theoretical_loss": 3.631501139553337, + "tokens_seen": 1145044992 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006595249558658322, + "loss": 0.0739, + "theoretical_loss": 3.6314246034823867, + "tokens_seen": 1145307136 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006594447119242497, + "loss": 0.0751, + "theoretical_loss": 3.631348089831171, + "tokens_seen": 1145569280 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006593644679826673, + "loss": 0.0737, + "theoretical_loss": 3.631271598587995, + "tokens_seen": 1145831424 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006592842240410849, + "loss": 0.0741, + "theoretical_loss": 3.631195129741172, + "tokens_seen": 1146093568 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006592039800995026, + "loss": 0.0749, + "theoretical_loss": 3.631118683279024, + "tokens_seen": 1146355712 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006591237361579201, + "loss": 0.0735, + "theoretical_loss": 3.6310422591898814, + "tokens_seen": 1146617856 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.001571335014887154, + "objective/train/docs_used": 419954, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4320921897888184, + "objective/train/original_loss": 1.4320919513702393, + "objective/train/theoretical_loss": 3.630965857462084, + "objective/train/tokens_used": 1167340000, + "objective/train/value_avg": -0.00972747802734375, + "objective/train/value_loss": 0.00031021423637866974, + "objective/train/value_max": -0.00012934207916259766, + "objective/train/value_min": -0.625, + "objective/train/value_reward_corr": 0.6921582137917927, + "objective/train/value_std": 0.0173187255859375, + "objective/train/weight_avg": 1.0017169713974, + "objective/train/weighted_lm_loss": 1.4336016178131104, + "objective/train/weights_max": 1.7058966159820557, + "objective/train/weights_min": 0.5872719287872314, + "theoretical_loss": 3.630965857462084, + "tokens_seen": 1146880000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006590434922163377, + "loss": 0.0716, + "theoretical_loss": 3.630965857462084, + "tokens_seen": 1146880000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006589632482747553, + "loss": 0.0744, + "theoretical_loss": 3.6308894780839798, + "tokens_seen": 1147142144 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006588830043331728, + "loss": 0.0716, + "theoretical_loss": 3.630813121043926, + "tokens_seen": 1147404288 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006588027603915905, + "loss": 0.0737, + "theoretical_loss": 3.630736786330288, + "tokens_seen": 1147666432 + }, + { + "epoch": 0.35, + "learning_rate": 0.000658722516450008, + "loss": 0.0746, + "theoretical_loss": 3.630660473931441, + "tokens_seen": 1147928576 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006586422725084256, + "loss": 0.0756, + "theoretical_loss": 3.6305841838357673, + "tokens_seen": 1148190720 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006585620285668432, + "loss": 0.0706, + "theoretical_loss": 3.630507916031659, + "tokens_seen": 1148452864 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006584817846252607, + "loss": 0.0745, + "theoretical_loss": 3.630431670507517, + "tokens_seen": 1148715008 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006584015406836785, + "loss": 0.0741, + "theoretical_loss": 3.6303554472517496, + "tokens_seen": 1148977152 + }, + { + "epoch": 0.35, + "learning_rate": 0.000658321296742096, + "loss": 0.0716, + "theoretical_loss": 3.6302792462527758, + "tokens_seen": 1149239296 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006582410528005136, + "loss": 0.071, + "theoretical_loss": 3.6302030674990213, + "tokens_seen": 1149501440 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006581608088589312, + "loss": 0.075, + "theoretical_loss": 3.6301269109789214, + "tokens_seen": 1149763584 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006580805649173488, + "loss": 0.0719, + "theoretical_loss": 3.63005077668092, + "tokens_seen": 1150025728 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0009393309592269361, + "objective/train/docs_used": 421268, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4447449445724487, + "objective/train/original_loss": 1.4447450637817383, + "objective/train/theoretical_loss": 3.630012717861597, + "objective/train/tokens_used": 1170616800, + "objective/train/value_avg": -0.00754547119140625, + "objective/train/value_loss": 0.0002241473994217813, + "objective/train/value_max": -0.0001767873764038086, + "objective/train/value_min": -0.6748046875, + "objective/train/value_reward_corr": 0.743822312179771, + "objective/train/value_std": 0.0171661376953125, + "objective/train/weight_avg": 1.0010449886322021, + "objective/train/weighted_lm_loss": 1.4458165168762207, + "objective/train/weights_max": 1.4855008125305176, + "objective/train/weights_min": 0.5288990139961243, + "theoretical_loss": 3.630012717861597, + "tokens_seen": 1150156800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006580003209757663, + "loss": 0.0723, + "theoretical_loss": 3.62997466459347, + "tokens_seen": 1150287872 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006579200770341839, + "loss": 0.0705, + "theoretical_loss": 3.629898574705031, + "tokens_seen": 1150550016 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006578398330926015, + "loss": 0.0735, + "theoretical_loss": 3.629822507004075, + "tokens_seen": 1150812160 + }, + { + "epoch": 0.35, + "learning_rate": 0.000657759589151019, + "loss": 0.0723, + "theoretical_loss": 3.629746461479079, + "tokens_seen": 1151074304 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006576793452094368, + "loss": 0.0714, + "theoretical_loss": 3.62967043811853, + "tokens_seen": 1151336448 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006575991012678543, + "loss": 0.0736, + "theoretical_loss": 3.629594436910924, + "tokens_seen": 1151598592 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006575188573262719, + "loss": 0.0728, + "theoretical_loss": 3.6295184578447643, + "tokens_seen": 1151860736 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006574386133846895, + "loss": 0.0699, + "theoretical_loss": 3.6294425009085645, + "tokens_seen": 1152122880 + }, + { + "epoch": 0.35, + "learning_rate": 0.000657358369443107, + "loss": 0.073, + "theoretical_loss": 3.6293665660908454, + "tokens_seen": 1152385024 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006572781255015246, + "loss": 0.0718, + "theoretical_loss": 3.6292906533801372, + "tokens_seen": 1152647168 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006571978815599422, + "loss": 0.0742, + "theoretical_loss": 3.6292147627649776, + "tokens_seen": 1152909312 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006571176376183598, + "loss": 0.0752, + "theoretical_loss": 3.6291388942339147, + "tokens_seen": 1153171456 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.00047070140135474503, + "objective/train/docs_used": 422447, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6008491516113281, + "objective/train/original_loss": 1.6008491516113281, + "objective/train/theoretical_loss": 3.6290630477755026, + "objective/train/tokens_used": 1173893600, + "objective/train/value_avg": -0.00966644287109375, + "objective/train/value_loss": 0.0003931581450160593, + "objective/train/value_max": -9.608268737792969e-05, + "objective/train/value_min": -0.392333984375, + "objective/train/value_reward_corr": 0.647847413214932, + "objective/train/value_std": 0.015899658203125, + "objective/train/weight_avg": 1.0006462335586548, + "objective/train/weighted_lm_loss": 1.601322054862976, + "objective/train/weights_max": 1.4302529096603394, + "objective/train/weights_min": 0.3789495527744293, + "theoretical_loss": 3.6290630477755026, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006570373936767774, + "loss": 0.0714, + "theoretical_loss": 3.6290630477755026, + "tokens_seen": 1153433600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006569571497351951, + "loss": 0.0717, + "theoretical_loss": 3.6289872233783065, + "tokens_seen": 1153695744 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006568769057936126, + "loss": 0.0705, + "theoretical_loss": 3.6289114210308977, + "tokens_seen": 1153957888 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006567966618520302, + "loss": 0.0728, + "theoretical_loss": 3.628835640721859, + "tokens_seen": 1154220032 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006567164179104478, + "loss": 0.0725, + "theoretical_loss": 3.6287598824397787, + "tokens_seen": 1154482176 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006566361739688653, + "loss": 0.0747, + "theoretical_loss": 3.6286841461732546, + "tokens_seen": 1154744320 + }, + { + "epoch": 0.35, + "learning_rate": 0.000656555930027283, + "loss": 0.0713, + "theoretical_loss": 3.6286084319108944, + "tokens_seen": 1155006464 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006564756860857005, + "loss": 0.0701, + "theoretical_loss": 3.628532739641312, + "tokens_seen": 1155268608 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006563954421441181, + "loss": 0.0699, + "theoretical_loss": 3.6284570693531317, + "tokens_seen": 1155530752 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006563151982025357, + "loss": 0.0717, + "theoretical_loss": 3.6283814210349847, + "tokens_seen": 1155792896 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006562349542609534, + "loss": 0.0709, + "theoretical_loss": 3.628305794675512, + "tokens_seen": 1156055040 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006561547103193709, + "loss": 0.0711, + "theoretical_loss": 3.6282301902633627, + "tokens_seen": 1156317184 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006560744663777885, + "loss": 0.0692, + "theoretical_loss": 3.628154607787194, + "tokens_seen": 1156579328 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.000545949034858495, + "objective/train/docs_used": 423626, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2599838972091675, + "objective/train/original_loss": 1.2599838972091675, + "objective/train/theoretical_loss": 3.62811682477156, + "objective/train/tokens_used": 1177170400, + "objective/train/value_avg": -0.0085296630859375, + "objective/train/value_loss": 0.00013630140165332705, + "objective/train/value_max": -8.094310760498047e-05, + "objective/train/value_min": -0.234619140625, + "objective/train/value_reward_corr": 0.6883133819120735, + "objective/train/value_std": 0.01348876953125, + "objective/train/weight_avg": 1.000613808631897, + "objective/train/weighted_lm_loss": 1.2607276439666748, + "objective/train/weights_max": 1.1398074626922607, + "objective/train/weights_min": 0.8252780437469482, + "theoretical_loss": 3.62811682477156, + "tokens_seen": 1156710400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006559942224362061, + "loss": 0.0703, + "theoretical_loss": 3.6280790472356705, + "tokens_seen": 1156841472 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006559139784946236, + "loss": 0.07, + "theoretical_loss": 3.628003508597468, + "tokens_seen": 1157103616 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006558337345530413, + "loss": 0.0734, + "theoretical_loss": 3.6279279918612675, + "tokens_seen": 1157365760 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006557534906114588, + "loss": 0.0705, + "theoretical_loss": 3.6278524970157613, + "tokens_seen": 1157627904 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006556732466698765, + "loss": 0.0729, + "theoretical_loss": 3.6277770240496476, + "tokens_seen": 1157890048 + }, + { + "epoch": 0.35, + "learning_rate": 0.000655593002728294, + "loss": 0.0695, + "theoretical_loss": 3.6277015729516355, + "tokens_seen": 1158152192 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006555127587867115, + "loss": 0.0709, + "theoretical_loss": 3.6276261437104402, + "tokens_seen": 1158414336 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006554325148451293, + "loss": 0.0741, + "theoretical_loss": 3.6275507363147868, + "tokens_seen": 1158676480 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006553522709035468, + "loss": 0.0715, + "theoretical_loss": 3.6274753507534077, + "tokens_seen": 1158938624 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006552720269619644, + "loss": 0.0747, + "theoretical_loss": 3.6273999870150444, + "tokens_seen": 1159200768 + }, + { + "epoch": 0.35, + "learning_rate": 0.000655191783020382, + "loss": 0.0684, + "theoretical_loss": 3.627324645088446, + "tokens_seen": 1159462912 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006551115390787996, + "loss": 0.0722, + "theoretical_loss": 3.627249324962371, + "tokens_seen": 1159725056 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0010166645515710115, + "objective/train/docs_used": 424758, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4893251657485962, + "objective/train/original_loss": 1.4893251657485962, + "objective/train/theoretical_loss": 3.6271740266255854, + "objective/train/tokens_used": 1180447200, + "objective/train/value_avg": -0.00568389892578125, + "objective/train/value_loss": 0.00012459652498364449, + "objective/train/value_max": -9.101629257202148e-05, + "objective/train/value_min": -0.2247314453125, + "objective/train/value_reward_corr": 0.6477667845280287, + "objective/train/value_std": 0.009185791015625, + "objective/train/weight_avg": 1.0010737180709839, + "objective/train/weighted_lm_loss": 1.4913969039916992, + "objective/train/weights_max": 1.1035935878753662, + "objective/train/weights_min": 0.36922982335090637, + "theoretical_loss": 3.6271740266255854, + "tokens_seen": 1159987200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006550312951372171, + "loss": 0.0719, + "theoretical_loss": 3.6271740266255854, + "tokens_seen": 1159987200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006549510511956347, + "loss": 0.0727, + "theoretical_loss": 3.6270987500668648, + "tokens_seen": 1160249344 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006548708072540523, + "loss": 0.0727, + "theoretical_loss": 3.6270234952749902, + "tokens_seen": 1160511488 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006547905633124698, + "loss": 0.0712, + "theoretical_loss": 3.6269482622387548, + "tokens_seen": 1160773632 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006547103193708876, + "loss": 0.0725, + "theoretical_loss": 3.6268730509469567, + "tokens_seen": 1161035776 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006546300754293051, + "loss": 0.0699, + "theoretical_loss": 3.626797861388404, + "tokens_seen": 1161297920 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006545498314877228, + "loss": 0.073, + "theoretical_loss": 3.6267226935519132, + "tokens_seen": 1161560064 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006544695875461403, + "loss": 0.0723, + "theoretical_loss": 3.626647547426309, + "tokens_seen": 1161822208 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006543893436045578, + "loss": 0.0728, + "theoretical_loss": 3.6265724230004226, + "tokens_seen": 1162084352 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006543090996629755, + "loss": 0.0713, + "theoretical_loss": 3.6264973202630966, + "tokens_seen": 1162346496 + }, + { + "epoch": 0.35, + "learning_rate": 0.000654228855721393, + "loss": 0.0697, + "theoretical_loss": 3.6264222392031797, + "tokens_seen": 1162608640 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006541486117798106, + "loss": 0.0696, + "theoretical_loss": 3.6263471798095286, + "tokens_seen": 1162870784 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006540683678382282, + "loss": 0.0749, + "theoretical_loss": 3.6262721420710093, + "tokens_seen": 1163132928 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0016251426422968507, + "objective/train/docs_used": 425912, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4275366067886353, + "objective/train/original_loss": 1.4275367259979248, + "objective/train/theoretical_loss": 3.6262346313189475, + "objective/train/tokens_used": 1183724000, + "objective/train/value_avg": -0.008636474609375, + "objective/train/value_loss": 0.00021610024850815535, + "objective/train/value_max": -0.0001366138458251953, + "objective/train/value_min": -0.300537109375, + "objective/train/value_reward_corr": 0.6914367282185478, + "objective/train/value_std": 0.01494598388671875, + "objective/train/weight_avg": 1.0017211437225342, + "objective/train/weighted_lm_loss": 1.4297378063201904, + "objective/train/weights_max": 1.1434952020645142, + "objective/train/weights_min": 0.37196043133735657, + "theoretical_loss": 3.6262346313189475, + "tokens_seen": 1163264000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006539881238966459, + "loss": 0.0709, + "theoretical_loss": 3.6261971259764962, + "tokens_seen": 1163395072 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006539078799550634, + "loss": 0.0696, + "theoretical_loss": 3.626122131514871, + "tokens_seen": 1163657216 + }, + { + "epoch": 0.35, + "learning_rate": 0.000653827636013481, + "loss": 0.0724, + "theoretical_loss": 3.626047158675024, + "tokens_seen": 1163919360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006537473920718986, + "loss": 0.0702, + "theoretical_loss": 3.625972207445854, + "tokens_seen": 1164181504 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006536671481303161, + "loss": 0.0686, + "theoretical_loss": 3.625897277816267, + "tokens_seen": 1164443648 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006535869041887338, + "loss": 0.0709, + "theoretical_loss": 3.625822369775179, + "tokens_seen": 1164705792 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006535066602471513, + "loss": 0.0731, + "theoretical_loss": 3.6257474833115113, + "tokens_seen": 1164967936 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006534264163055689, + "loss": 0.0726, + "theoretical_loss": 3.625672618414198, + "tokens_seen": 1165230080 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006533461723639865, + "loss": 0.0715, + "theoretical_loss": 3.6255977750721753, + "tokens_seen": 1165492224 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006532659284224042, + "loss": 0.0746, + "theoretical_loss": 3.6255229532743933, + "tokens_seen": 1165754368 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006531856844808218, + "loss": 0.0713, + "theoretical_loss": 3.625448153009807, + "tokens_seen": 1166016512 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006531054405392393, + "loss": 0.0692, + "theoretical_loss": 3.6253733742673795, + "tokens_seen": 1166278656 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.0018858092371374369, + "objective/train/docs_used": 427016, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.495458960533142, + "objective/train/original_loss": 1.4954588413238525, + "objective/train/theoretical_loss": 3.625298617036084, + "objective/train/tokens_used": 1187000800, + "objective/train/value_avg": -0.008880615234375, + "objective/train/value_loss": 0.00015289847215171903, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.302001953125, + "objective/train/value_reward_corr": 0.7871390335182029, + "objective/train/value_std": 0.01666259765625, + "objective/train/weight_avg": 1.001957893371582, + "objective/train/weighted_lm_loss": 1.4991862773895264, + "objective/train/weights_max": 1.1765505075454712, + "objective/train/weights_min": 0.373849481344223, + "theoretical_loss": 3.625298617036084, + "tokens_seen": 1166540800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006530251965976569, + "loss": 0.0746, + "theoretical_loss": 3.625298617036084, + "tokens_seen": 1166540800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006529449526560745, + "loss": 0.0728, + "theoretical_loss": 3.6252238813049, + "tokens_seen": 1166802944 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006528647087144921, + "loss": 0.0732, + "theoretical_loss": 3.6251491670628155, + "tokens_seen": 1167065088 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006527844647729096, + "loss": 0.071, + "theoretical_loss": 3.6250744742988275, + "tokens_seen": 1167327232 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006527042208313273, + "loss": 0.0729, + "theoretical_loss": 3.6249998030019404, + "tokens_seen": 1167589376 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006526239768897448, + "loss": 0.0708, + "theoretical_loss": 3.6249251531611666, + "tokens_seen": 1167851520 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006525437329481623, + "loss": 0.0716, + "theoretical_loss": 3.6248505247655265, + "tokens_seen": 1168113664 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006524634890065801, + "loss": 0.073, + "theoretical_loss": 3.6247759178040493, + "tokens_seen": 1168375808 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006523832450649976, + "loss": 0.0705, + "theoretical_loss": 3.624701332265772, + "tokens_seen": 1168637952 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006523030011234152, + "loss": 0.0703, + "theoretical_loss": 3.6246267681397386, + "tokens_seen": 1168900096 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006522227571818328, + "loss": 0.0713, + "theoretical_loss": 3.624552225415003, + "tokens_seen": 1169162240 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006521425132402504, + "loss": 0.0692, + "theoretical_loss": 3.6244777040806255, + "tokens_seen": 1169424384 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006520622692986679, + "loss": 0.0723, + "theoretical_loss": 3.624403204125676, + "tokens_seen": 1169686528 + }, + { + "epoch": 0.35, + "objective/train/advantage_avg": 0.00018043404270429164, + "objective/train/docs_used": 428177, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4640066623687744, + "objective/train/original_loss": 1.4640066623687744, + "objective/train/theoretical_loss": 3.6243659621620727, + "objective/train/tokens_used": 1190277600, + "objective/train/value_avg": -0.010498046875, + "objective/train/value_loss": 0.00026088516460731626, + "objective/train/value_max": -5.4776668548583984e-05, + "objective/train/value_min": -0.31982421875, + "objective/train/value_reward_corr": 0.7486174859686674, + "objective/train/value_std": 0.017181396484375, + "objective/train/weight_avg": 1.000304102897644, + "objective/train/weighted_lm_loss": 1.4634703397750854, + "objective/train/weights_max": 1.2624222040176392, + "objective/train/weights_min": 0.3704579770565033, + "theoretical_loss": 3.6243659621620727, + "tokens_seen": 1169817600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006519820253570855, + "loss": 0.0749, + "theoretical_loss": 3.6243287255392307, + "tokens_seen": 1169948672 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006519017814155031, + "loss": 0.0723, + "theoretical_loss": 3.624254268310375, + "tokens_seen": 1170210816 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006518215374739207, + "loss": 0.0697, + "theoretical_loss": 3.6241798324282017, + "tokens_seen": 1170472960 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006517412935323384, + "loss": 0.0731, + "theoretical_loss": 3.624105417881813, + "tokens_seen": 1170735104 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006516610495907559, + "loss": 0.0731, + "theoretical_loss": 3.624031024660317, + "tokens_seen": 1170997248 + }, + { + "epoch": 0.35, + "learning_rate": 0.0006515808056491736, + "loss": 0.0734, + "theoretical_loss": 3.6239566527528306, + "tokens_seen": 1171259392 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006515005617075911, + "loss": 0.0715, + "theoretical_loss": 3.6238823021484796, + "tokens_seen": 1171521536 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006514203177660086, + "loss": 0.0729, + "theoretical_loss": 3.6238079728363974, + "tokens_seen": 1171783680 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006513400738244263, + "loss": 0.0711, + "theoretical_loss": 3.6237336648057243, + "tokens_seen": 1172045824 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006512598298828438, + "loss": 0.0692, + "theoretical_loss": 3.62365937804561, + "tokens_seen": 1172307968 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006511795859412614, + "loss": 0.0686, + "theoretical_loss": 3.6235851125452108, + "tokens_seen": 1172570112 + }, + { + "epoch": 0.36, + "learning_rate": 0.000651099341999679, + "loss": 0.0722, + "theoretical_loss": 3.6235108682936916, + "tokens_seen": 1172832256 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0007615335052832961, + "objective/train/docs_used": 429267, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.439132809638977, + "objective/train/original_loss": 1.4391329288482666, + "objective/train/theoretical_loss": 3.623436645280226, + "objective/train/tokens_used": 1193554400, + "objective/train/value_avg": -0.005706787109375, + "objective/train/value_loss": 8.419386722380295e-05, + "objective/train/value_max": -5.692243576049805e-05, + "objective/train/value_min": -0.278564453125, + "objective/train/value_reward_corr": 0.6225248522847999, + "objective/train/value_std": 0.007747650146484375, + "objective/train/weight_avg": 1.0008031129837036, + "objective/train/weighted_lm_loss": 1.4398229122161865, + "objective/train/weights_max": 1.1341171264648438, + "objective/train/weights_min": 0.7353724837303162, + "theoretical_loss": 3.623436645280226, + "tokens_seen": 1173094400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006510190980580967, + "loss": 0.0723, + "theoretical_loss": 3.623436645280226, + "tokens_seen": 1173094400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006509388541165142, + "loss": 0.0725, + "theoretical_loss": 3.6233624434939946, + "tokens_seen": 1173356544 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006508586101749318, + "loss": 0.07, + "theoretical_loss": 3.623288262924186, + "tokens_seen": 1173618688 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006507783662333494, + "loss": 0.0739, + "theoretical_loss": 3.623214103559997, + "tokens_seen": 1173880832 + }, + { + "epoch": 0.36, + "learning_rate": 0.000650698122291767, + "loss": 0.072, + "theoretical_loss": 3.6231399653906315, + "tokens_seen": 1174142976 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006506178783501846, + "loss": 0.0686, + "theoretical_loss": 3.623065848405303, + "tokens_seen": 1174405120 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006505376344086021, + "loss": 0.0717, + "theoretical_loss": 3.622991752593231, + "tokens_seen": 1174667264 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006504573904670198, + "loss": 0.0724, + "theoretical_loss": 3.6229176779436445, + "tokens_seen": 1174929408 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006503771465254373, + "loss": 0.0706, + "theoretical_loss": 3.6228436244457796, + "tokens_seen": 1175191552 + }, + { + "epoch": 0.36, + "learning_rate": 0.000650296902583855, + "loss": 0.072, + "theoretical_loss": 3.6227695920888796, + "tokens_seen": 1175453696 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006502166586422726, + "loss": 0.0705, + "theoretical_loss": 3.622695580862197, + "tokens_seen": 1175715840 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006501364147006901, + "loss": 0.0702, + "theoretical_loss": 3.6226215907549912, + "tokens_seen": 1175977984 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006500561707591077, + "loss": 0.0719, + "theoretical_loss": 3.6225476217565307, + "tokens_seen": 1176240128 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 2.649899761308916e-05, + "objective/train/docs_used": 430645, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.540775179862976, + "objective/train/original_loss": 1.5407750606536865, + "objective/train/theoretical_loss": 3.6225106451697275, + "objective/train/tokens_used": 1196831200, + "objective/train/value_avg": -0.00846099853515625, + "objective/train/value_loss": 0.00018133767298422754, + "objective/train/value_max": -3.0219554901123047e-05, + "objective/train/value_min": -0.51318359375, + "objective/train/value_reward_corr": 0.7236088657344646, + "objective/train/value_std": 0.01409912109375, + "objective/train/weight_avg": 1.0001122951507568, + "objective/train/weighted_lm_loss": 1.5408101081848145, + "objective/train/weights_max": 1.1531023979187012, + "objective/train/weights_min": 0.3940271735191345, + "theoretical_loss": 3.6225106451697275, + "tokens_seen": 1176371200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006499759268175253, + "loss": 0.0713, + "theoretical_loss": 3.6224736738560894, + "tokens_seen": 1176502272 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006498956828759429, + "loss": 0.0755, + "theoretical_loss": 3.6223997470429516, + "tokens_seen": 1176764416 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006498154389343604, + "loss": 0.0713, + "theoretical_loss": 3.6223258413064086, + "tokens_seen": 1177026560 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006497351949927781, + "loss": 0.0734, + "theoretical_loss": 3.6222519566357585, + "tokens_seen": 1177288704 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006496549510511956, + "loss": 0.0711, + "theoretical_loss": 3.6221780930203096, + "tokens_seen": 1177550848 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006495747071096131, + "loss": 0.0729, + "theoretical_loss": 3.6221042504493743, + "tokens_seen": 1177812992 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006494944631680309, + "loss": 0.0738, + "theoretical_loss": 3.622030428912276, + "tokens_seen": 1178075136 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006494142192264484, + "loss": 0.0699, + "theoretical_loss": 3.6219566283983458, + "tokens_seen": 1178337280 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006493339752848661, + "loss": 0.0747, + "theoretical_loss": 3.6218828488969197, + "tokens_seen": 1178599424 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006492537313432836, + "loss": 0.0706, + "theoretical_loss": 3.6218090903973446, + "tokens_seen": 1178861568 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006491734874017012, + "loss": 0.0764, + "theoretical_loss": 3.621735352888974, + "tokens_seen": 1179123712 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006490932434601188, + "loss": 0.0787, + "theoretical_loss": 3.621661636361169, + "tokens_seen": 1179385856 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": -0.00019712685025297105, + "objective/train/docs_used": 431781, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4777541160583496, + "objective/train/original_loss": 1.4777541160583496, + "objective/train/theoretical_loss": 3.6215879408032987, + "objective/train/tokens_used": 1200108000, + "objective/train/value_avg": -0.006439208984375, + "objective/train/value_loss": 0.000245880801230669, + "objective/train/value_max": -6.920099258422852e-05, + "objective/train/value_min": -0.25927734375, + "objective/train/value_reward_corr": 0.6481294948793725, + "objective/train/value_std": 0.011993408203125, + "objective/train/weight_avg": 0.999910295009613, + "objective/train/weighted_lm_loss": 1.476799488067627, + "objective/train/weights_max": 1.2023646831512451, + "objective/train/weights_min": 0.37629902362823486, + "theoretical_loss": 3.6215879408032987, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006490129995185363, + "loss": 0.0742, + "theoretical_loss": 3.6215879408032987, + "tokens_seen": 1179648000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006489327555769539, + "loss": 0.074, + "theoretical_loss": 3.6215142662047395, + "tokens_seen": 1179910144 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006488525116353715, + "loss": 0.0742, + "theoretical_loss": 3.621440612554876, + "tokens_seen": 1180172288 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006487722676937892, + "loss": 0.0741, + "theoretical_loss": 3.6213669798431005, + "tokens_seen": 1180434432 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006486920237522067, + "loss": 0.0724, + "theoretical_loss": 3.621293368058813, + "tokens_seen": 1180696576 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006486117798106244, + "loss": 0.0739, + "theoretical_loss": 3.621219777191421, + "tokens_seen": 1180958720 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006485315358690419, + "loss": 0.0746, + "theoretical_loss": 3.62114620723034, + "tokens_seen": 1181220864 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006484512919274594, + "loss": 0.0738, + "theoretical_loss": 3.621072658164993, + "tokens_seen": 1181483008 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006483710479858771, + "loss": 0.0735, + "theoretical_loss": 3.6209991299848108, + "tokens_seen": 1181745152 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006482908040442946, + "loss": 0.0751, + "theoretical_loss": 3.620925622679232, + "tokens_seen": 1182007296 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006482105601027123, + "loss": 0.0738, + "theoretical_loss": 3.620852136237702, + "tokens_seen": 1182269440 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006481303161611298, + "loss": 0.0741, + "theoretical_loss": 3.620778670649676, + "tokens_seen": 1182531584 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006480500722195475, + "loss": 0.0709, + "theoretical_loss": 3.620705225904614, + "tokens_seen": 1182793728 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0012156600132584572, + "objective/train/docs_used": 433029, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4418714046478271, + "objective/train/original_loss": 1.4418714046478271, + "objective/train/theoretical_loss": 3.6206685113449044, + "objective/train/tokens_used": 1203384800, + "objective/train/value_avg": -0.007598876953125, + "objective/train/value_loss": 0.0002602523018140346, + "objective/train/value_max": -0.00014889240264892578, + "objective/train/value_min": -0.91015625, + "objective/train/value_reward_corr": 0.7045290549833653, + "objective/train/value_std": 0.014434814453125, + "objective/train/weight_avg": 1.0013296604156494, + "objective/train/weighted_lm_loss": 1.4433629512786865, + "objective/train/weights_max": 1.1642762422561646, + "objective/train/weights_min": 0.37042829394340515, + "theoretical_loss": 3.6206685113449044, + "tokens_seen": 1182924800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006479698282779651, + "loss": 0.0734, + "theoretical_loss": 3.620631801991987, + "tokens_seen": 1183055872 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006478895843363826, + "loss": 0.0752, + "theoretical_loss": 3.6205583989012697, + "tokens_seen": 1183318016 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006478093403948002, + "loss": 0.075, + "theoretical_loss": 3.6204850166219478, + "tokens_seen": 1183580160 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006477290964532178, + "loss": 0.07, + "theoretical_loss": 3.6204116551435126, + "tokens_seen": 1183842304 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006476488525116354, + "loss": 0.0746, + "theoretical_loss": 3.620338314455465, + "tokens_seen": 1184104448 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006475686085700529, + "loss": 0.0741, + "theoretical_loss": 3.6202649945473113, + "tokens_seen": 1184366592 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006474883646284706, + "loss": 0.0724, + "theoretical_loss": 3.6201916954085664, + "tokens_seen": 1184628736 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006474081206868881, + "loss": 0.0724, + "theoretical_loss": 3.620118417028754, + "tokens_seen": 1184890880 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006473278767453056, + "loss": 0.0735, + "theoretical_loss": 3.620045159397403, + "tokens_seen": 1185153024 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006472476328037234, + "loss": 0.0741, + "theoretical_loss": 3.619971922504052, + "tokens_seen": 1185415168 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006471673888621409, + "loss": 0.0712, + "theoretical_loss": 3.6198987063382457, + "tokens_seen": 1185677312 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006470871449205585, + "loss": 0.0757, + "theoretical_loss": 3.6198255108895374, + "tokens_seen": 1185939456 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.001317921094596386, + "objective/train/docs_used": 434243, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5565327405929565, + "objective/train/original_loss": 1.556532859802246, + "objective/train/theoretical_loss": 3.6197523361474877, + "objective/train/tokens_used": 1206661600, + "objective/train/value_avg": -0.0074920654296875, + "objective/train/value_loss": 0.00021194624423515052, + "objective/train/value_max": -0.00010389089584350586, + "objective/train/value_min": -0.22802734375, + "objective/train/value_reward_corr": 0.585920330003381, + "objective/train/value_std": 0.01111602783203125, + "objective/train/weight_avg": 1.0014089345932007, + "objective/train/weighted_lm_loss": 1.5591508150100708, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.3731912076473236, + "theoretical_loss": 3.6197523361474877, + "tokens_seen": 1186201600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006470069009789761, + "loss": 0.0754, + "theoretical_loss": 3.6197523361474877, + "tokens_seen": 1186201600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006469266570373937, + "loss": 0.0723, + "theoretical_loss": 3.619679182101664, + "tokens_seen": 1186463744 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006468464130958113, + "loss": 0.0766, + "theoretical_loss": 3.619606048741643, + "tokens_seen": 1186725888 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006467661691542289, + "loss": 0.0751, + "theoretical_loss": 3.6195329360570065, + "tokens_seen": 1186988032 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006466859252126464, + "loss": 0.0743, + "theoretical_loss": 3.6194598440373467, + "tokens_seen": 1187250176 + }, + { + "epoch": 0.36, + "learning_rate": 0.000646605681271064, + "loss": 0.0721, + "theoretical_loss": 3.619386772672261, + "tokens_seen": 1187512320 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006465254373294817, + "loss": 0.0767, + "theoretical_loss": 3.6193137219513556, + "tokens_seen": 1187774464 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006464451933878992, + "loss": 0.0759, + "theoretical_loss": 3.619240691864243, + "tokens_seen": 1188036608 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006463649494463169, + "loss": 0.071, + "theoretical_loss": 3.619167682400545, + "tokens_seen": 1188298752 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006462847055047344, + "loss": 0.0769, + "theoretical_loss": 3.619094693549889, + "tokens_seen": 1188560896 + }, + { + "epoch": 0.36, + "learning_rate": 0.000646204461563152, + "loss": 0.0729, + "theoretical_loss": 3.6190217253019124, + "tokens_seen": 1188823040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006461242176215696, + "loss": 0.0745, + "theoretical_loss": 3.6189487776462568, + "tokens_seen": 1189085184 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006460439736799871, + "loss": 0.0707, + "theoretical_loss": 3.6188758505725738, + "tokens_seen": 1189347328 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0009901868179440498, + "objective/train/docs_used": 435462, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4739112854003906, + "objective/train/original_loss": 1.4739114046096802, + "objective/train/theoretical_loss": 3.61883939475074, + "objective/train/tokens_used": 1209938400, + "objective/train/value_avg": -0.00931549072265625, + "objective/train/value_loss": 0.0008738775504752994, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.97705078125, + "objective/train/value_reward_corr": 0.5927413814353182, + "objective/train/value_std": 0.02313232421875, + "objective/train/weight_avg": 1.0013524293899536, + "objective/train/weighted_lm_loss": 1.4759513139724731, + "objective/train/weights_max": 2.4594109058380127, + "objective/train/weights_min": 0.22472499310970306, + "theoretical_loss": 3.61883939475074, + "tokens_seen": 1189478400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006459637297384047, + "loss": 0.0769, + "theoretical_loss": 3.618802944070522, + "tokens_seen": 1189609472 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006458834857968223, + "loss": 0.0741, + "theoretical_loss": 3.618730058129766, + "tokens_seen": 1189871616 + }, + { + "epoch": 0.36, + "learning_rate": 0.00064580324185524, + "loss": 0.0746, + "theoretical_loss": 3.61865719273998, + "tokens_seen": 1190133760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006457229979136575, + "loss": 0.0738, + "theoretical_loss": 3.6185843478908453, + "tokens_seen": 1190395904 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006456427539720752, + "loss": 0.0736, + "theoretical_loss": 3.618511523572049, + "tokens_seen": 1190658048 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006455625100304927, + "loss": 0.0728, + "theoretical_loss": 3.6184387197732875, + "tokens_seen": 1190920192 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006454822660889103, + "loss": 0.0773, + "theoretical_loss": 3.6183659364842624, + "tokens_seen": 1191182336 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006454020221473279, + "loss": 0.0743, + "theoretical_loss": 3.6182931736946857, + "tokens_seen": 1191444480 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006453217782057454, + "loss": 0.0722, + "theoretical_loss": 3.618220431394274, + "tokens_seen": 1191706624 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006452415342641631, + "loss": 0.0704, + "theoretical_loss": 3.618147709572754, + "tokens_seen": 1191968768 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006451612903225806, + "loss": 0.0739, + "theoretical_loss": 3.618075008219858, + "tokens_seen": 1192230912 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006450810463809983, + "loss": 0.0716, + "theoretical_loss": 3.6180023273253252, + "tokens_seen": 1192493056 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0011386601254343987, + "objective/train/docs_used": 436657, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5473337173461914, + "objective/train/original_loss": 1.5473337173461914, + "objective/train/theoretical_loss": 3.6179296668789043, + "objective/train/tokens_used": 1213215200, + "objective/train/value_avg": -0.01073455810546875, + "objective/train/value_loss": 0.00023775658337399364, + "objective/train/value_max": -0.00012934207916259766, + "objective/train/value_min": -0.73193359375, + "objective/train/value_reward_corr": 0.7040417032080557, + "objective/train/value_std": 0.0167999267578125, + "objective/train/weight_avg": 1.0012502670288086, + "objective/train/weighted_lm_loss": 1.5486918687820435, + "objective/train/weights_max": 1.341107964515686, + "objective/train/weights_min": 0.3696603775024414, + "theoretical_loss": 3.6179296668789043, + "tokens_seen": 1192755200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006450008024394159, + "loss": 0.0754, + "theoretical_loss": 3.6179296668789043, + "tokens_seen": 1192755200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006449205584978334, + "loss": 0.0737, + "theoretical_loss": 3.6178570268703494, + "tokens_seen": 1193017344 + }, + { + "epoch": 0.36, + "learning_rate": 0.000644840314556251, + "loss": 0.0734, + "theoretical_loss": 3.617784407289424, + "tokens_seen": 1193279488 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006447600706146686, + "loss": 0.0747, + "theoretical_loss": 3.617711808125896, + "tokens_seen": 1193541632 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006446798266730862, + "loss": 0.0753, + "theoretical_loss": 3.6176392293695434, + "tokens_seen": 1193803776 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006445995827315037, + "loss": 0.0752, + "theoretical_loss": 3.6175666710101506, + "tokens_seen": 1194065920 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006445193387899214, + "loss": 0.0732, + "theoretical_loss": 3.6174941330375097, + "tokens_seen": 1194328064 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006444390948483389, + "loss": 0.0752, + "theoretical_loss": 3.617421615441419, + "tokens_seen": 1194590208 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006443588509067564, + "loss": 0.0778, + "theoretical_loss": 3.617349118211685, + "tokens_seen": 1194852352 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006442786069651742, + "loss": 0.0732, + "theoretical_loss": 3.6172766413381225, + "tokens_seen": 1195114496 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006441983630235917, + "loss": 0.0702, + "theoretical_loss": 3.617204184810552, + "tokens_seen": 1195376640 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006441181190820094, + "loss": 0.074, + "theoretical_loss": 3.6171317486188013, + "tokens_seen": 1195638784 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006440378751404269, + "loss": 0.0744, + "theoretical_loss": 3.6170593327527074, + "tokens_seen": 1195900928 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0009150534751825035, + "objective/train/docs_used": 437781, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5300925970077515, + "objective/train/original_loss": 1.530092477798462, + "objective/train/theoretical_loss": 3.617023132438607, + "objective/train/tokens_used": 1216492000, + "objective/train/value_avg": -0.006450653076171875, + "objective/train/value_loss": 0.00013045086234342307, + "objective/train/value_max": -0.00013446807861328125, + "objective/train/value_min": -0.371826171875, + "objective/train/value_reward_corr": 0.6219127786332249, + "objective/train/value_std": 0.009521484375, + "objective/train/weight_avg": 1.000975251197815, + "objective/train/weighted_lm_loss": 1.5323002338409424, + "objective/train/weights_max": 1.1521174907684326, + "objective/train/weights_min": 0.3824407160282135, + "theoretical_loss": 3.617023132438607, + "tokens_seen": 1196032000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006439576311988445, + "loss": 0.0727, + "theoretical_loss": 3.616986937202112, + "tokens_seen": 1196163072 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006438773872572621, + "loss": 0.0745, + "theoretical_loss": 3.616914561956867, + "tokens_seen": 1196425216 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006437971433156797, + "loss": 0.0743, + "theoretical_loss": 3.6168422070068287, + "tokens_seen": 1196687360 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006437168993740972, + "loss": 0.0731, + "theoretical_loss": 3.6167698723418624, + "tokens_seen": 1196949504 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006436366554325149, + "loss": 0.0761, + "theoretical_loss": 3.6166975579518406, + "tokens_seen": 1197211648 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006435564114909325, + "loss": 0.0729, + "theoretical_loss": 3.6166252638266423, + "tokens_seen": 1197473792 + }, + { + "epoch": 0.36, + "learning_rate": 0.00064347616754935, + "loss": 0.0746, + "theoretical_loss": 3.616552989956155, + "tokens_seen": 1197735936 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006433959236077677, + "loss": 0.0745, + "theoretical_loss": 3.616480736330272, + "tokens_seen": 1197998080 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006433156796661852, + "loss": 0.0711, + "theoretical_loss": 3.616408502938895, + "tokens_seen": 1198260224 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006432354357246028, + "loss": 0.0701, + "theoretical_loss": 3.616336289771932, + "tokens_seen": 1198522368 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006431551917830204, + "loss": 0.0711, + "theoretical_loss": 3.6162640968192994, + "tokens_seen": 1198784512 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006430749478414379, + "loss": 0.0728, + "theoretical_loss": 3.61619192407092, + "tokens_seen": 1199046656 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.0014068173477426171, + "objective/train/docs_used": 438488, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5579599142074585, + "objective/train/original_loss": 1.5579596757888794, + "objective/train/theoretical_loss": 3.6161197715167237, + "objective/train/tokens_used": 1219768800, + "objective/train/value_avg": -0.00788116455078125, + "objective/train/value_loss": 0.0003655260952655226, + "objective/train/value_max": -9.840726852416992e-05, + "objective/train/value_min": -0.95556640625, + "objective/train/value_reward_corr": 0.7506662225236421, + "objective/train/value_std": 0.0210113525390625, + "objective/train/weight_avg": 1.0015637874603271, + "objective/train/weighted_lm_loss": 1.5613462924957275, + "objective/train/weights_max": 1.6007517576217651, + "objective/train/weights_min": 0.22900784015655518, + "theoretical_loss": 3.6161197715167237, + "tokens_seen": 1199308800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006429947038998556, + "loss": 0.0722, + "theoretical_loss": 3.6161197715167237, + "tokens_seen": 1199308800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006429144599582731, + "loss": 0.0719, + "theoretical_loss": 3.616047639146648, + "tokens_seen": 1199570944 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006428342160166908, + "loss": 0.0717, + "theoretical_loss": 3.6159755269506375, + "tokens_seen": 1199833088 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006427539720751084, + "loss": 0.0737, + "theoretical_loss": 3.6159034349186445, + "tokens_seen": 1200095232 + }, + { + "epoch": 0.36, + "learning_rate": 0.000642673728133526, + "loss": 0.0759, + "theoretical_loss": 3.615831363040628, + "tokens_seen": 1200357376 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006425934841919435, + "loss": 0.071, + "theoretical_loss": 3.615759311306553, + "tokens_seen": 1200619520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006425132402503611, + "loss": 0.0715, + "theoretical_loss": 3.6156872797063944, + "tokens_seen": 1200881664 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006424329963087787, + "loss": 0.0745, + "theoretical_loss": 3.6156152682301324, + "tokens_seen": 1201143808 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006423527523671962, + "loss": 0.0753, + "theoretical_loss": 3.6155432768677542, + "tokens_seen": 1201405952 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006422725084256139, + "loss": 0.0726, + "theoretical_loss": 3.615471305609255, + "tokens_seen": 1201668096 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006421922644840314, + "loss": 0.0735, + "theoretical_loss": 3.6153993544446372, + "tokens_seen": 1201930240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006421120205424491, + "loss": 0.0729, + "theoretical_loss": 3.61532742336391, + "tokens_seen": 1202192384 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006420317766008667, + "loss": 0.071, + "theoretical_loss": 3.6152555123570895, + "tokens_seen": 1202454528 + }, + { + "epoch": 0.36, + "objective/train/advantage_avg": 0.00142679491546005, + "objective/train/docs_used": 439824, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5132286548614502, + "objective/train/original_loss": 1.5132288932800293, + "objective/train/theoretical_loss": 3.6152195643782763, + "objective/train/tokens_used": 1223045600, + "objective/train/value_avg": -0.00974273681640625, + "objective/train/value_loss": 0.00020919894450344145, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.3330078125, + "objective/train/value_reward_corr": 0.7210474728015025, + "objective/train/value_std": 0.01556396484375, + "objective/train/weight_avg": 1.0015263557434082, + "objective/train/weighted_lm_loss": 1.5158843994140625, + "objective/train/weights_max": 1.3555892705917358, + "objective/train/weights_min": 0.3693855106830597, + "theoretical_loss": 3.6152195643782763, + "tokens_seen": 1202585600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006419515326592842, + "loss": 0.0687, + "theoretical_loss": 3.6151836214141992, + "tokens_seen": 1202716672 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006418712887177018, + "loss": 0.0738, + "theoretical_loss": 3.61511175052527, + "tokens_seen": 1202978816 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006417910447761194, + "loss": 0.0712, + "theoretical_loss": 3.6150398996803395, + "tokens_seen": 1203240960 + }, + { + "epoch": 0.36, + "learning_rate": 0.000641710800834537, + "loss": 0.0735, + "theoretical_loss": 3.6149680688694525, + "tokens_seen": 1203503104 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006416305568929546, + "loss": 0.0736, + "theoretical_loss": 3.614896258082661, + "tokens_seen": 1203765248 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006415503129513722, + "loss": 0.071, + "theoretical_loss": 3.614824467310025, + "tokens_seen": 1204027392 + }, + { + "epoch": 0.36, + "learning_rate": 0.0006414700690097897, + "loss": 0.0744, + "theoretical_loss": 3.614752696541609, + "tokens_seen": 1204289536 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006413898250682074, + "loss": 0.0733, + "theoretical_loss": 3.614680945767488, + "tokens_seen": 1204551680 + }, + { + "epoch": 0.37, + "learning_rate": 0.000641309581126625, + "loss": 0.0727, + "theoretical_loss": 3.614609214977741, + "tokens_seen": 1204813824 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006412293371850425, + "loss": 0.075, + "theoretical_loss": 3.614537504162457, + "tokens_seen": 1205075968 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006411490932434602, + "loss": 0.0737, + "theoretical_loss": 3.614465813311729, + "tokens_seen": 1205338112 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006410688493018777, + "loss": 0.0735, + "theoretical_loss": 3.6143941424156596, + "tokens_seen": 1205600256 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.00039125708281062543, + "objective/train/docs_used": 441116, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4485985040664673, + "objective/train/original_loss": 1.4485986232757568, + "objective/train/theoretical_loss": 3.614322491464357, + "objective/train/tokens_used": 1226322400, + "objective/train/value_avg": -0.01201629638671875, + "objective/train/value_loss": 0.0005621611489914358, + "objective/train/value_max": -0.00015115737915039062, + "objective/train/value_min": -0.95751953125, + "objective/train/value_reward_corr": 0.7828427996520164, + "objective/train/value_std": 0.02960205078125, + "objective/train/weight_avg": 1.0006436109542847, + "objective/train/weighted_lm_loss": 1.449430227279663, + "objective/train/weights_max": 1.638288974761963, + "objective/train/weights_min": 0.3702460527420044, + "theoretical_loss": 3.614322491464357, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006409886053602953, + "loss": 0.0726, + "theoretical_loss": 3.614322491464357, + "tokens_seen": 1205862400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006409083614187129, + "loss": 0.0761, + "theoretical_loss": 3.614250860447936, + "tokens_seen": 1206124544 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006408281174771304, + "loss": 0.0747, + "theoretical_loss": 3.6141792493565212, + "tokens_seen": 1206386688 + }, + { + "epoch": 0.37, + "learning_rate": 0.000640747873535548, + "loss": 0.0711, + "theoretical_loss": 3.6141076581802416, + "tokens_seen": 1206648832 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006406676295939656, + "loss": 0.0718, + "theoretical_loss": 3.614036086909234, + "tokens_seen": 1206910976 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006405873856523833, + "loss": 0.075, + "theoretical_loss": 3.6139645355336425, + "tokens_seen": 1207173120 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006405071417108009, + "loss": 0.0761, + "theoretical_loss": 3.613893004043617, + "tokens_seen": 1207435264 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006404268977692185, + "loss": 0.0724, + "theoretical_loss": 3.6138214924293166, + "tokens_seen": 1207697408 + }, + { + "epoch": 0.37, + "learning_rate": 0.000640346653827636, + "loss": 0.077, + "theoretical_loss": 3.6137500006809056, + "tokens_seen": 1207959552 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006402664098860537, + "loss": 0.0739, + "theoretical_loss": 3.6136785287885553, + "tokens_seen": 1208221696 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006401861659444712, + "loss": 0.0738, + "theoretical_loss": 3.6136070767424457, + "tokens_seen": 1208483840 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006401059220028887, + "loss": 0.0745, + "theoretical_loss": 3.6135356445327624, + "tokens_seen": 1208745984 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006400256780613064, + "loss": 0.0691, + "theoretical_loss": 3.6134642321496977, + "tokens_seen": 1209008128 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0001867167593445629, + "objective/train/docs_used": 442331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2952028512954712, + "objective/train/original_loss": 1.2952029705047607, + "objective/train/theoretical_loss": 3.613428533390085, + "objective/train/tokens_used": 1229599200, + "objective/train/value_avg": -0.00867462158203125, + "objective/train/value_loss": 0.00022595007612835616, + "objective/train/value_max": -4.756450653076172e-05, + "objective/train/value_min": -0.47412109375, + "objective/train/value_reward_corr": 0.820079703282189, + "objective/train/value_std": 0.019287109375, + "objective/train/weight_avg": 1.0002939701080322, + "objective/train/weighted_lm_loss": 1.2954621315002441, + "objective/train/weights_max": 1.459554672241211, + "objective/train/weights_min": 0.3717447817325592, + "theoretical_loss": 3.613428533390085, + "tokens_seen": 1209139200 + }, + { + "epoch": 0.37, + "learning_rate": 0.000639945434119724, + "loss": 0.0705, + "theoretical_loss": 3.613392839583452, + "tokens_seen": 1209270272 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006398651901781416, + "loss": 0.0742, + "theoretical_loss": 3.6133214668242317, + "tokens_seen": 1209532416 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006397849462365592, + "loss": 0.0707, + "theoretical_loss": 3.613250113862251, + "tokens_seen": 1209794560 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006397047022949768, + "loss": 0.0724, + "theoretical_loss": 3.61317878068773, + "tokens_seen": 1210056704 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006396244583533943, + "loss": 0.0716, + "theoretical_loss": 3.6131074672908965, + "tokens_seen": 1210318848 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006395442144118119, + "loss": 0.0714, + "theoretical_loss": 3.6130361736619854, + "tokens_seen": 1210580992 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006394639704702295, + "loss": 0.0739, + "theoretical_loss": 3.6129648997912382, + "tokens_seen": 1210843136 + }, + { + "epoch": 0.37, + "learning_rate": 0.000639383726528647, + "loss": 0.0724, + "theoretical_loss": 3.6128936456689034, + "tokens_seen": 1211105280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006393034825870647, + "loss": 0.073, + "theoretical_loss": 3.612822411285236, + "tokens_seen": 1211367424 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006392232386454822, + "loss": 0.0755, + "theoretical_loss": 3.612751196630499, + "tokens_seen": 1211629568 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006391429947039, + "loss": 0.0725, + "theoretical_loss": 3.61268000169496, + "tokens_seen": 1211891712 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006390627507623175, + "loss": 0.0744, + "theoretical_loss": 3.612608826468897, + "tokens_seen": 1212153856 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.0002951657515950501, + "objective/train/docs_used": 443456, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.476730465888977, + "objective/train/original_loss": 1.476730465888977, + "objective/train/theoretical_loss": 3.6125376709425923, + "objective/train/tokens_used": 1232876000, + "objective/train/value_avg": -0.0062255859375, + "objective/train/value_loss": 0.00031703378772363067, + "objective/train/value_max": -0.00010073184967041016, + "objective/train/value_min": -0.1983642578125, + "objective/train/value_reward_corr": 0.5360043017511746, + "objective/train/value_std": 0.00960540771484375, + "objective/train/weight_avg": 0.999844491481781, + "objective/train/weighted_lm_loss": 1.4760133028030396, + "objective/train/weights_max": 1.2021102905273438, + "objective/train/weights_min": 0.3719916045665741, + "theoretical_loss": 3.6125376709425923, + "tokens_seen": 1212416000 + }, + { + "epoch": 0.37, + "learning_rate": 0.000638982506820735, + "loss": 0.0738, + "theoretical_loss": 3.6125376709425923, + "tokens_seen": 1212416000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006389022628791527, + "loss": 0.0683, + "theoretical_loss": 3.6124665351063356, + "tokens_seen": 1212678144 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006388220189375702, + "loss": 0.0737, + "theoretical_loss": 3.612395418950424, + "tokens_seen": 1212940288 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006387417749959878, + "loss": 0.0743, + "theoretical_loss": 3.6123243224651604, + "tokens_seen": 1213202432 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006386615310544054, + "loss": 0.0719, + "theoretical_loss": 3.612253245640856, + "tokens_seen": 1213464576 + }, + { + "epoch": 0.37, + "learning_rate": 0.000638581287112823, + "loss": 0.0716, + "theoretical_loss": 3.6121821884678287, + "tokens_seen": 1213726720 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006385010431712405, + "loss": 0.0743, + "theoretical_loss": 3.6121111509364017, + "tokens_seen": 1213988864 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006384207992296582, + "loss": 0.075, + "theoretical_loss": 3.6120401330369067, + "tokens_seen": 1214251008 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006383405552880758, + "loss": 0.0722, + "theoretical_loss": 3.6119691347596814, + "tokens_seen": 1214513152 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006382603113464933, + "loss": 0.074, + "theoretical_loss": 3.611898156095071, + "tokens_seen": 1214775296 + }, + { + "epoch": 0.37, + "learning_rate": 0.000638180067404911, + "loss": 0.0736, + "theoretical_loss": 3.6118271970334264, + "tokens_seen": 1215037440 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006380998234633285, + "loss": 0.0722, + "theoretical_loss": 3.6117562575651068, + "tokens_seen": 1215299584 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006380195795217462, + "loss": 0.0728, + "theoretical_loss": 3.611685337680477, + "tokens_seen": 1215561728 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0022401802707463503, + "objective/train/docs_used": 444688, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3715991973876953, + "objective/train/original_loss": 1.3715991973876953, + "objective/train/theoretical_loss": 3.6116498850790366, + "objective/train/tokens_used": 1236152800, + "objective/train/value_avg": -0.005950927734375, + "objective/train/value_loss": 0.00014855283370707184, + "objective/train/value_max": -0.0001233816146850586, + "objective/train/value_min": -0.921875, + "objective/train/value_reward_corr": 0.452322266230042, + "objective/train/value_std": 0.01007080078125, + "objective/train/weight_avg": 1.0023151636123657, + "objective/train/weighted_lm_loss": 1.3756544589996338, + "objective/train/weights_max": 2.5139997005462646, + "objective/train/weights_min": 0.3757654130458832, + "theoretical_loss": 3.6116498850790366, + "tokens_seen": 1215692800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006379393355801637, + "loss": 0.072, + "theoretical_loss": 3.6116144373699086, + "tokens_seen": 1215823872 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006378590916385812, + "loss": 0.0728, + "theoretical_loss": 3.611543556623782, + "tokens_seen": 1216086016 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006377788476969989, + "loss": 0.0728, + "theoretical_loss": 3.6114726954324814, + "tokens_seen": 1216348160 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006376986037554164, + "loss": 0.0724, + "theoretical_loss": 3.6114018537864, + "tokens_seen": 1216610304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006376183598138341, + "loss": 0.0725, + "theoretical_loss": 3.6113310316759373, + "tokens_seen": 1216872448 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006375381158722517, + "loss": 0.0692, + "theoretical_loss": 3.6112602290914984, + "tokens_seen": 1217134592 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006374578719306693, + "loss": 0.0715, + "theoretical_loss": 3.611189446023497, + "tokens_seen": 1217396736 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006373776279890868, + "loss": 0.0725, + "theoretical_loss": 3.6111186824623527, + "tokens_seen": 1217658880 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006372973840475045, + "loss": 0.0736, + "theoretical_loss": 3.6110479383984915, + "tokens_seen": 1217921024 + }, + { + "epoch": 0.37, + "learning_rate": 0.000637217140105922, + "loss": 0.073, + "theoretical_loss": 3.6109772138223466, + "tokens_seen": 1218183168 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006371368961643395, + "loss": 0.0736, + "theoretical_loss": 3.6109065087243577, + "tokens_seen": 1218445312 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006370566522227572, + "loss": 0.0714, + "theoretical_loss": 3.610835823094972, + "tokens_seen": 1218707456 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0002287530805915594, + "objective/train/docs_used": 445814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5659476518630981, + "objective/train/original_loss": 1.5659475326538086, + "objective/train/theoretical_loss": 3.6107651569246424, + "objective/train/tokens_used": 1239429600, + "objective/train/value_avg": -0.01335906982421875, + "objective/train/value_loss": 0.0003773129137698561, + "objective/train/value_max": -0.00010150671005249023, + "objective/train/value_min": -0.476318359375, + "objective/train/value_reward_corr": 0.8288847136796991, + "objective/train/value_std": 0.0251617431640625, + "objective/train/weight_avg": 1.0004096031188965, + "objective/train/weighted_lm_loss": 1.5659234523773193, + "objective/train/weights_max": 1.2777711153030396, + "objective/train/weights_min": 0.5512197613716125, + "theoretical_loss": 3.6107651569246424, + "tokens_seen": 1218969600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006369764082811747, + "loss": 0.073, + "theoretical_loss": 3.6107651569246424, + "tokens_seen": 1218969600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006368961643395924, + "loss": 0.0712, + "theoretical_loss": 3.610694510203829, + "tokens_seen": 1219231744 + }, + { + "epoch": 0.37, + "learning_rate": 0.00063681592039801, + "loss": 0.0717, + "theoretical_loss": 3.610623882922999, + "tokens_seen": 1219493888 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006367356764564276, + "loss": 0.0716, + "theoretical_loss": 3.6105532750726255, + "tokens_seen": 1219756032 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006366554325148452, + "loss": 0.0718, + "theoretical_loss": 3.6104826866431887, + "tokens_seen": 1220018176 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006365751885732627, + "loss": 0.0716, + "theoretical_loss": 3.6104121176251764, + "tokens_seen": 1220280320 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006364949446316803, + "loss": 0.0723, + "theoretical_loss": 3.6103415680090816, + "tokens_seen": 1220542464 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006364147006900979, + "loss": 0.0739, + "theoretical_loss": 3.6102710377854046, + "tokens_seen": 1220804608 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006363344567485155, + "loss": 0.0705, + "theoretical_loss": 3.610200526944652, + "tokens_seen": 1221066752 + }, + { + "epoch": 0.37, + "learning_rate": 0.000636254212806933, + "loss": 0.0723, + "theoretical_loss": 3.610130035477339, + "tokens_seen": 1221328896 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006361739688653508, + "loss": 0.0753, + "theoretical_loss": 3.6100595633739854, + "tokens_seen": 1221591040 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006360937249237683, + "loss": 0.073, + "theoretical_loss": 3.6099891106251176, + "tokens_seen": 1221853184 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006360134809821858, + "loss": 0.0748, + "theoretical_loss": 3.6099186772212697, + "tokens_seen": 1222115328 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0006381941493600607, + "objective/train/docs_used": 446976, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.419157862663269, + "objective/train/original_loss": 1.419157862663269, + "objective/train/theoretical_loss": 3.6098834677707723, + "objective/train/tokens_used": 1242706400, + "objective/train/value_avg": -0.0130615234375, + "objective/train/value_loss": 0.0005473028286360204, + "objective/train/value_max": -0.00012242794036865234, + "objective/train/value_min": -0.96533203125, + "objective/train/value_reward_corr": 0.8783592914327736, + "objective/train/value_std": 0.036163330078125, + "objective/train/weight_avg": 1.0008801221847534, + "objective/train/weighted_lm_loss": 1.4213128089904785, + "objective/train/weights_max": 1.5615370273590088, + "objective/train/weights_min": 0.368775874376297, + "theoretical_loss": 3.6098834677707723, + "tokens_seen": 1222246400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006359332370406035, + "loss": 0.0709, + "theoretical_loss": 3.6098482631529825, + "tokens_seen": 1222377472 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635852993099021, + "loss": 0.0727, + "theoretical_loss": 3.6097778684108026, + "tokens_seen": 1222639616 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006357727491574386, + "loss": 0.0718, + "theoretical_loss": 3.609707492985284, + "tokens_seen": 1222901760 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006356925052158562, + "loss": 0.0713, + "theoretical_loss": 3.609637136866987, + "tokens_seen": 1223163904 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006356122612742738, + "loss": 0.0696, + "theoretical_loss": 3.609566800046478, + "tokens_seen": 1223426048 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006355320173326913, + "loss": 0.0711, + "theoretical_loss": 3.609496482514332, + "tokens_seen": 1223688192 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635451773391109, + "loss": 0.0733, + "theoretical_loss": 3.609426184261128, + "tokens_seen": 1223950336 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006353715294495266, + "loss": 0.0735, + "theoretical_loss": 3.6093559052774538, + "tokens_seen": 1224212480 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006352912855079442, + "loss": 0.0716, + "theoretical_loss": 3.6092856455539017, + "tokens_seen": 1224474624 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006352110415663618, + "loss": 0.0746, + "theoretical_loss": 3.6092154050810725, + "tokens_seen": 1224736768 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006351307976247793, + "loss": 0.073, + "theoretical_loss": 3.6091451838495727, + "tokens_seen": 1224998912 + }, + { + "epoch": 0.37, + "learning_rate": 0.000635050553683197, + "loss": 0.0714, + "theoretical_loss": 3.609074981850016, + "tokens_seen": 1225261056 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.0011464846320450306, + "objective/train/docs_used": 448161, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4441373348236084, + "objective/train/original_loss": 1.4441375732421875, + "objective/train/theoretical_loss": 3.6090047990730216, + "objective/train/tokens_used": 1245983200, + "objective/train/value_avg": -0.0064849853515625, + "objective/train/value_loss": 0.00032763619674369693, + "objective/train/value_max": -0.00011235475540161133, + "objective/train/value_min": -0.94091796875, + "objective/train/value_reward_corr": 0.7932949888128629, + "objective/train/value_std": 0.0202789306640625, + "objective/train/weight_avg": 1.0012918710708618, + "objective/train/weighted_lm_loss": 1.4470592737197876, + "objective/train/weights_max": 1.4689112901687622, + "objective/train/weights_min": 0.4429895877838135, + "theoretical_loss": 3.6090047990730216, + "tokens_seen": 1225523200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006349703097416145, + "loss": 0.0747, + "theoretical_loss": 3.6090047990730216, + "tokens_seen": 1225523200 + }, + { + "epoch": 0.37, + "learning_rate": 0.000634890065800032, + "loss": 0.0712, + "theoretical_loss": 3.6089346355092164, + "tokens_seen": 1225785344 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006348098218584497, + "loss": 0.0737, + "theoretical_loss": 3.6088644911492334, + "tokens_seen": 1226047488 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006347295779168672, + "loss": 0.0731, + "theoretical_loss": 3.6087943659837114, + "tokens_seen": 1226309632 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006346493339752849, + "loss": 0.0708, + "theoretical_loss": 3.6087242600032976, + "tokens_seen": 1226571776 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006345690900337025, + "loss": 0.0721, + "theoretical_loss": 3.6086541731986443, + "tokens_seen": 1226833920 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006344888460921201, + "loss": 0.0688, + "theoretical_loss": 3.6085841055604106, + "tokens_seen": 1227096064 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006344086021505376, + "loss": 0.0729, + "theoretical_loss": 3.608514057079262, + "tokens_seen": 1227358208 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006343283582089553, + "loss": 0.0711, + "theoretical_loss": 3.6084440277458714, + "tokens_seen": 1227620352 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006342481142673728, + "loss": 0.0703, + "theoretical_loss": 3.6083740175509176, + "tokens_seen": 1227882496 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006341678703257904, + "loss": 0.0749, + "theoretical_loss": 3.6083040264850856, + "tokens_seen": 1228144640 + }, + { + "epoch": 0.37, + "learning_rate": 0.000634087626384208, + "loss": 0.0713, + "theoretical_loss": 3.6082340545390674, + "tokens_seen": 1228406784 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006340073824426255, + "loss": 0.0733, + "theoretical_loss": 3.6081641017035615, + "tokens_seen": 1228668928 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -6.965985812712461e-05, + "objective/train/docs_used": 449484, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5495264530181885, + "objective/train/original_loss": 1.5495264530181885, + "objective/train/theoretical_loss": 3.6081291324493456, + "objective/train/tokens_used": 1249260000, + "objective/train/value_avg": -0.00957489013671875, + "objective/train/value_loss": 0.0002603928733151406, + "objective/train/value_max": -9.03010368347168e-05, + "objective/train/value_min": -0.61083984375, + "objective/train/value_reward_corr": 0.7232506307301012, + "objective/train/value_std": 0.01611328125, + "objective/train/weight_avg": 1.000057578086853, + "objective/train/weighted_lm_loss": 1.5498452186584473, + "objective/train/weights_max": 1.781590461730957, + "objective/train/weights_min": 0.5239829421043396, + "theoretical_loss": 3.6081291324493456, + "tokens_seen": 1228800000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006339271385010433, + "loss": 0.0749, + "theoretical_loss": 3.608094167969273, + "tokens_seen": 1228931072 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006338468945594608, + "loss": 0.0717, + "theoretical_loss": 3.6080242533269136, + "tokens_seen": 1229193216 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006337666506178784, + "loss": 0.0702, + "theoretical_loss": 3.6079543577672, + "tokens_seen": 1229455360 + }, + { + "epoch": 0.37, + "learning_rate": 0.000633686406676296, + "loss": 0.074, + "theoretical_loss": 3.6078844812808577, + "tokens_seen": 1229717504 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006336061627347135, + "loss": 0.0757, + "theoretical_loss": 3.6078146238586175, + "tokens_seen": 1229979648 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006335259187931311, + "loss": 0.0737, + "theoretical_loss": 3.6077447854912164, + "tokens_seen": 1230241792 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006334456748515487, + "loss": 0.0721, + "theoretical_loss": 3.6076749661693985, + "tokens_seen": 1230503936 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006333654309099663, + "loss": 0.0743, + "theoretical_loss": 3.607605165883914, + "tokens_seen": 1230766080 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006332851869683838, + "loss": 0.0729, + "theoretical_loss": 3.60753538462552, + "tokens_seen": 1231028224 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006332049430268016, + "loss": 0.0719, + "theoretical_loss": 3.607465622384979, + "tokens_seen": 1231290368 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006331246990852191, + "loss": 0.0732, + "theoretical_loss": 3.6073958791530614, + "tokens_seen": 1231552512 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006330444551436366, + "loss": 0.0727, + "theoretical_loss": 3.607326154920543, + "tokens_seen": 1231814656 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": 0.001238655881024897, + "objective/train/docs_used": 450737, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.530470371246338, + "objective/train/original_loss": 1.530470371246338, + "objective/train/theoretical_loss": 3.6072564496782062, + "objective/train/tokens_used": 1252536800, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.00031318055698648095, + "objective/train/value_max": -0.00013136863708496094, + "objective/train/value_min": -0.65771484375, + "objective/train/value_reward_corr": 0.6789382313438648, + "objective/train/value_std": 0.01499176025390625, + "objective/train/weight_avg": 1.0013794898986816, + "objective/train/weighted_lm_loss": 1.5326346158981323, + "objective/train/weights_max": 1.1837249994277954, + "objective/train/weights_min": 0.37208810448646545, + "theoretical_loss": 3.6072564496782062, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006329642112020543, + "loss": 0.0717, + "theoretical_loss": 3.6072564496782062, + "tokens_seen": 1232076800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006328839672604718, + "loss": 0.0703, + "theoretical_loss": 3.607186763416841, + "tokens_seen": 1232338944 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006328037233188895, + "loss": 0.073, + "theoretical_loss": 3.607117096127242, + "tokens_seen": 1232601088 + }, + { + "epoch": 0.37, + "learning_rate": 0.000632723479377307, + "loss": 0.0743, + "theoretical_loss": 3.60704744780021, + "tokens_seen": 1232863232 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006326432354357246, + "loss": 0.072, + "theoretical_loss": 3.606977818426555, + "tokens_seen": 1233125376 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006325629914941422, + "loss": 0.0745, + "theoretical_loss": 3.6069082079970913, + "tokens_seen": 1233387520 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006324827475525598, + "loss": 0.0741, + "theoretical_loss": 3.60683861650264, + "tokens_seen": 1233649664 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006324025036109774, + "loss": 0.0708, + "theoretical_loss": 3.606769043934027, + "tokens_seen": 1233911808 + }, + { + "epoch": 0.37, + "learning_rate": 0.000632322259669395, + "loss": 0.0736, + "theoretical_loss": 3.606699490282088, + "tokens_seen": 1234173952 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006322420157278126, + "loss": 0.07, + "theoretical_loss": 3.606629955537663, + "tokens_seen": 1234436096 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006321617717862301, + "loss": 0.0712, + "theoretical_loss": 3.606560439691598, + "tokens_seen": 1234698240 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006320815278446478, + "loss": 0.0723, + "theoretical_loss": 3.6064909427347462, + "tokens_seen": 1234960384 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006320012839030653, + "loss": 0.0733, + "theoretical_loss": 3.6064214646579673, + "tokens_seen": 1235222528 + }, + { + "epoch": 0.37, + "objective/train/advantage_avg": -0.0005033229826949537, + "objective/train/docs_used": 451987, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4716780185699463, + "objective/train/original_loss": 1.4716781377792358, + "objective/train/theoretical_loss": 3.60638673269675, + "objective/train/tokens_used": 1255813600, + "objective/train/value_avg": -0.0082550048828125, + "objective/train/value_loss": 0.00027764757396653295, + "objective/train/value_max": -5.9664249420166016e-05, + "objective/train/value_min": -0.393798828125, + "objective/train/value_reward_corr": 0.6338732515401595, + "objective/train/value_std": 0.01284027099609375, + "objective/train/weight_avg": 0.9996262192726135, + "objective/train/weighted_lm_loss": 1.4702317714691162, + "objective/train/weights_max": 1.3029727935791016, + "objective/train/weights_min": 0.39436399936676025, + "theoretical_loss": 3.60638673269675, + "tokens_seen": 1235353600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006319210399614828, + "loss": 0.0742, + "theoretical_loss": 3.606352005452126, + "tokens_seen": 1235484672 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006318407960199005, + "loss": 0.0729, + "theoretical_loss": 3.6062825651080956, + "tokens_seen": 1235746816 + }, + { + "epoch": 0.37, + "learning_rate": 0.000631760552078318, + "loss": 0.0737, + "theoretical_loss": 3.6062131436167544, + "tokens_seen": 1236008960 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006316803081367358, + "loss": 0.0716, + "theoretical_loss": 3.606143740968986, + "tokens_seen": 1236271104 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006316000641951533, + "loss": 0.0749, + "theoretical_loss": 3.6060743571556833, + "tokens_seen": 1236533248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006315198202535709, + "loss": 0.0741, + "theoretical_loss": 3.606004992167742, + "tokens_seen": 1236795392 + }, + { + "epoch": 0.37, + "learning_rate": 0.0006314395763119885, + "loss": 0.0716, + "theoretical_loss": 3.605935645996067, + "tokens_seen": 1237057536 + }, + { + "epoch": 0.37, + "learning_rate": 0.000631359332370406, + "loss": 0.0697, + "theoretical_loss": 3.605866318631568, + "tokens_seen": 1237319680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006312790884288236, + "loss": 0.0713, + "theoretical_loss": 3.605797010065161, + "tokens_seen": 1237581824 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006311988444872412, + "loss": 0.0715, + "theoretical_loss": 3.6057277202877698, + "tokens_seen": 1237843968 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006311186005456588, + "loss": 0.072, + "theoretical_loss": 3.6056584492903223, + "tokens_seen": 1238106112 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006310383566040763, + "loss": 0.0715, + "theoretical_loss": 3.6055891970637544, + "tokens_seen": 1238368256 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0013174168998375535, + "objective/train/docs_used": 453261, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5999850034713745, + "objective/train/original_loss": 1.5999850034713745, + "objective/train/theoretical_loss": 3.605519963599008, + "objective/train/tokens_used": 1259090400, + "objective/train/value_avg": -0.0081024169921875, + "objective/train/value_loss": 0.00033738784259185195, + "objective/train/value_max": -8.028745651245117e-05, + "objective/train/value_min": -0.69580078125, + "objective/train/value_reward_corr": 0.677158113803972, + "objective/train/value_std": 0.01568603515625, + "objective/train/weight_avg": 1.0014675855636597, + "objective/train/weighted_lm_loss": 1.6025694608688354, + "objective/train/weights_max": 1.330289602279663, + "objective/train/weights_min": 0.3702276945114136, + "theoretical_loss": 3.605519963599008, + "tokens_seen": 1238630400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006309581126624941, + "loss": 0.0749, + "theoretical_loss": 3.605519963599008, + "tokens_seen": 1238630400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006308778687209116, + "loss": 0.0724, + "theoretical_loss": 3.60545074888703, + "tokens_seen": 1238892544 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006307976247793292, + "loss": 0.0715, + "theoretical_loss": 3.6053815529187756, + "tokens_seen": 1239154688 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006307173808377468, + "loss": 0.0739, + "theoretical_loss": 3.6053123756852052, + "tokens_seen": 1239416832 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006306371368961643, + "loss": 0.0709, + "theoretical_loss": 3.605243217177285, + "tokens_seen": 1239678976 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006305568929545819, + "loss": 0.0711, + "theoretical_loss": 3.6051740773859877, + "tokens_seen": 1239941120 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006304766490129995, + "loss": 0.0729, + "theoretical_loss": 3.605104956302293, + "tokens_seen": 1240203264 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006303964050714171, + "loss": 0.0729, + "theoretical_loss": 3.605035853917187, + "tokens_seen": 1240465408 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006303161611298347, + "loss": 0.0747, + "theoretical_loss": 3.604966770221661, + "tokens_seen": 1240727552 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006302359171882524, + "loss": 0.0697, + "theoretical_loss": 3.604897705206713, + "tokens_seen": 1240989696 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006301556732466699, + "loss": 0.0722, + "theoretical_loss": 3.6048286588633465, + "tokens_seen": 1241251840 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006300754293050875, + "loss": 0.0749, + "theoretical_loss": 3.6047596311825725, + "tokens_seen": 1241513984 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006299951853635051, + "loss": 0.0745, + "theoretical_loss": 3.6046906221554087, + "tokens_seen": 1241776128 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0009522174368612468, + "objective/train/docs_used": 454384, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.473624348640442, + "objective/train/original_loss": 1.4736244678497314, + "objective/train/theoretical_loss": 3.604656124634124, + "objective/train/tokens_used": 1262367200, + "objective/train/value_avg": -0.00734710693359375, + "objective/train/value_loss": 0.00023657429846934974, + "objective/train/value_max": -9.995698928833008e-05, + "objective/train/value_min": -0.265869140625, + "objective/train/value_reward_corr": 0.6545632899431301, + "objective/train/value_std": 0.01197052001953125, + "objective/train/weight_avg": 1.0010582208633423, + "objective/train/weighted_lm_loss": 1.4750497341156006, + "objective/train/weights_max": 1.304564356803894, + "objective/train/weights_min": 0.39513498544692993, + "theoretical_loss": 3.604656124634124, + "tokens_seen": 1241907200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006299149414219226, + "loss": 0.0737, + "theoretical_loss": 3.6046216317728765, + "tokens_seen": 1242038272 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006298346974803403, + "loss": 0.0723, + "theoretical_loss": 3.6045526600260054, + "tokens_seen": 1242300416 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006297544535387578, + "loss": 0.0716, + "theoretical_loss": 3.6044837069058318, + "tokens_seen": 1242562560 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006296742095971754, + "loss": 0.0751, + "theoretical_loss": 3.6044147724033957, + "tokens_seen": 1242824704 + }, + { + "epoch": 0.38, + "learning_rate": 0.000629593965655593, + "loss": 0.0752, + "theoretical_loss": 3.6043458565097453, + "tokens_seen": 1243086848 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006295137217140106, + "loss": 0.0701, + "theoretical_loss": 3.6042769592159356, + "tokens_seen": 1243348992 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006294334777724282, + "loss": 0.0725, + "theoretical_loss": 3.6042080805130254, + "tokens_seen": 1243611136 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006293532338308458, + "loss": 0.0749, + "theoretical_loss": 3.6041392203920815, + "tokens_seen": 1243873280 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006292729898892634, + "loss": 0.0747, + "theoretical_loss": 3.6040703788441757, + "tokens_seen": 1244135424 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006291927459476809, + "loss": 0.0745, + "theoretical_loss": 3.6040015558603877, + "tokens_seen": 1244397568 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006291125020060986, + "loss": 0.0731, + "theoretical_loss": 3.603932751431802, + "tokens_seen": 1244659712 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006290322580645161, + "loss": 0.0719, + "theoretical_loss": 3.6038639655495093, + "tokens_seen": 1244921856 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": -0.0012195850722491741, + "objective/train/docs_used": 455531, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5041210651397705, + "objective/train/original_loss": 1.5041210651397705, + "objective/train/theoretical_loss": 3.6037951982046064, + "objective/train/tokens_used": 1265644000, + "objective/train/value_avg": -0.01177215576171875, + "objective/train/value_loss": 0.0004948603454977274, + "objective/train/value_max": -0.0001366138458251953, + "objective/train/value_min": -0.5390625, + "objective/train/value_reward_corr": 0.73076830896824, + "objective/train/value_std": 0.0195465087890625, + "objective/train/weight_avg": 0.9990051984786987, + "objective/train/weighted_lm_loss": 1.5013059377670288, + "objective/train/weights_max": 1.3366363048553467, + "objective/train/weights_min": 0.3697971701622009, + "theoretical_loss": 3.6037951982046064, + "tokens_seen": 1245184000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006289520141229337, + "loss": 0.0726, + "theoretical_loss": 3.6037951982046064, + "tokens_seen": 1245184000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006288717701813513, + "loss": 0.0729, + "theoretical_loss": 3.6037264493881973, + "tokens_seen": 1245446144 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006287915262397688, + "loss": 0.075, + "theoretical_loss": 3.603657719091391, + "tokens_seen": 1245708288 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006287112822981866, + "loss": 0.0732, + "theoretical_loss": 3.603589007305303, + "tokens_seen": 1245970432 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006286310383566041, + "loss": 0.0749, + "theoretical_loss": 3.603520314021055, + "tokens_seen": 1246232576 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006285507944150217, + "loss": 0.0719, + "theoretical_loss": 3.6034516392297746, + "tokens_seen": 1246494720 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006284705504734393, + "loss": 0.0759, + "theoretical_loss": 3.6033829829225965, + "tokens_seen": 1246756864 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006283903065318568, + "loss": 0.0749, + "theoretical_loss": 3.6033143450906593, + "tokens_seen": 1247019008 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006283100625902744, + "loss": 0.0733, + "theoretical_loss": 3.6032457257251105, + "tokens_seen": 1247281152 + }, + { + "epoch": 0.38, + "learning_rate": 0.000628229818648692, + "loss": 0.0701, + "theoretical_loss": 3.603177124817101, + "tokens_seen": 1247543296 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006281495747071096, + "loss": 0.0725, + "theoretical_loss": 3.603108542357791, + "tokens_seen": 1247805440 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006280693307655271, + "loss": 0.0743, + "theoretical_loss": 3.603039978338343, + "tokens_seen": 1248067584 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006279890868239449, + "loss": 0.0714, + "theoretical_loss": 3.6029714327499285, + "tokens_seen": 1248329728 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.00015995489957276732, + "objective/train/docs_used": 456712, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4048641920089722, + "objective/train/original_loss": 1.4048643112182617, + "objective/train/theoretical_loss": 3.602937166864601, + "objective/train/tokens_used": 1268920800, + "objective/train/value_avg": -0.00669097900390625, + "objective/train/value_loss": 0.00019214232452213764, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.24560546875, + "objective/train/value_reward_corr": 0.6105372464341148, + "objective/train/value_std": 0.00984954833984375, + "objective/train/weight_avg": 1.0002514123916626, + "objective/train/weighted_lm_loss": 1.405979871749878, + "objective/train/weights_max": 1.278395175933838, + "objective/train/weights_min": 0.4344214200973511, + "theoretical_loss": 3.602937166864601, + "tokens_seen": 1248460800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006279088428823624, + "loss": 0.0752, + "theoretical_loss": 3.602902905583724, + "tokens_seen": 1248591872 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006278285989407801, + "loss": 0.0717, + "theoretical_loss": 3.602834396830912, + "tokens_seen": 1248854016 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006277483549991976, + "loss": 0.0726, + "theoretical_loss": 3.6027659064826816, + "tokens_seen": 1249116160 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006276681110576151, + "loss": 0.0744, + "theoretical_loss": 3.602697434530227, + "tokens_seen": 1249378304 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006275878671160328, + "loss": 0.0744, + "theoretical_loss": 3.6026289809647487, + "tokens_seen": 1249640448 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006275076231744503, + "loss": 0.0741, + "theoretical_loss": 3.6025605457774548, + "tokens_seen": 1249902592 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006274273792328679, + "loss": 0.0721, + "theoretical_loss": 3.602492128959558, + "tokens_seen": 1250164736 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006273471352912855, + "loss": 0.0732, + "theoretical_loss": 3.6024237305022764, + "tokens_seen": 1250426880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006272668913497032, + "loss": 0.0741, + "theoretical_loss": 3.602355350396836, + "tokens_seen": 1250689024 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006271866474081207, + "loss": 0.072, + "theoretical_loss": 3.602286988634467, + "tokens_seen": 1250951168 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006271064034665383, + "loss": 0.0719, + "theoretical_loss": 3.6022186452064076, + "tokens_seen": 1251213312 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006270261595249559, + "loss": 0.0716, + "theoretical_loss": 3.6021503201039, + "tokens_seen": 1251475456 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0013131473679095507, + "objective/train/docs_used": 458046, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4418436288833618, + "objective/train/original_loss": 1.4418433904647827, + "objective/train/theoretical_loss": 3.6020820133181934, + "objective/train/tokens_used": 1272197600, + "objective/train/value_avg": -0.0069580078125, + "objective/train/value_loss": 0.00013802826288156211, + "objective/train/value_max": -7.486343383789062e-05, + "objective/train/value_min": -0.235595703125, + "objective/train/value_reward_corr": 0.722366365333374, + "objective/train/value_std": 0.0119781494140625, + "objective/train/weight_avg": 1.0013810396194458, + "objective/train/weighted_lm_loss": 1.4443731307983398, + "objective/train/weights_max": 1.2093919515609741, + "objective/train/weights_min": 0.6394911408424377, + "theoretical_loss": 3.6020820133181934, + "tokens_seen": 1251737600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006269459155833734, + "loss": 0.071, + "theoretical_loss": 3.6020820133181934, + "tokens_seen": 1251737600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006268656716417911, + "loss": 0.0744, + "theoretical_loss": 3.6020137248405435, + "tokens_seen": 1251999744 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006267854277002086, + "loss": 0.0733, + "theoretical_loss": 3.6019454546622107, + "tokens_seen": 1252261888 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006267051837586262, + "loss": 0.0711, + "theoretical_loss": 3.601877202774463, + "tokens_seen": 1252524032 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006266249398170438, + "loss": 0.0729, + "theoretical_loss": 3.601808969168573, + "tokens_seen": 1252786176 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006265446958754613, + "loss": 0.0719, + "theoretical_loss": 3.60174075383582, + "tokens_seen": 1253048320 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006264644519338791, + "loss": 0.0752, + "theoretical_loss": 3.6016725567674883, + "tokens_seen": 1253310464 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006263842079922966, + "loss": 0.0728, + "theoretical_loss": 3.60160437795487, + "tokens_seen": 1253572608 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006263039640507142, + "loss": 0.073, + "theoretical_loss": 3.6015362173892624, + "tokens_seen": 1253834752 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006262237201091318, + "loss": 0.0745, + "theoretical_loss": 3.6014680750619674, + "tokens_seen": 1254096896 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006261434761675494, + "loss": 0.0725, + "theoretical_loss": 3.6013999509642947, + "tokens_seen": 1254359040 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006260632322259669, + "loss": 0.0724, + "theoretical_loss": 3.601331845087559, + "tokens_seen": 1254621184 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006259829882843845, + "loss": 0.073, + "theoretical_loss": 3.601263757423082, + "tokens_seen": 1254883328 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0014171048533171415, + "objective/train/docs_used": 459153, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5080225467681885, + "objective/train/original_loss": 1.508022427558899, + "objective/train/theoretical_loss": 3.601229720417729, + "objective/train/tokens_used": 1275474400, + "objective/train/value_avg": -0.00691986083984375, + "objective/train/value_loss": 0.00018681980145629495, + "objective/train/value_max": -0.00014090538024902344, + "objective/train/value_min": -0.630859375, + "objective/train/value_reward_corr": 0.6682120767318891, + "objective/train/value_std": 0.0105743408203125, + "objective/train/weight_avg": 1.001495361328125, + "objective/train/weighted_lm_loss": 1.5094605684280396, + "objective/train/weights_max": 1.1394519805908203, + "objective/train/weights_min": 0.22579480707645416, + "theoretical_loss": 3.601229720417729, + "tokens_seen": 1255014400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006259027443428021, + "loss": 0.0728, + "theoretical_loss": 3.601195687962189, + "tokens_seen": 1255145472 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006258225004012196, + "loss": 0.0718, + "theoretical_loss": 3.601127636696214, + "tokens_seen": 1255407616 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006257422564596374, + "loss": 0.0727, + "theoretical_loss": 3.6010596036164957, + "tokens_seen": 1255669760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006256620125180549, + "loss": 0.0725, + "theoretical_loss": 3.600991588714378, + "tokens_seen": 1255931904 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006255817685764725, + "loss": 0.0705, + "theoretical_loss": 3.6009235919812124, + "tokens_seen": 1256194048 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006255015246348901, + "loss": 0.0716, + "theoretical_loss": 3.6008556134083545, + "tokens_seen": 1256456192 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006254212806933076, + "loss": 0.0732, + "theoretical_loss": 3.600787652987167, + "tokens_seen": 1256718336 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006253410367517252, + "loss": 0.0716, + "theoretical_loss": 3.6007197107090185, + "tokens_seen": 1256980480 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006252607928101428, + "loss": 0.0735, + "theoretical_loss": 3.600651786565283, + "tokens_seen": 1257242624 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006251805488685604, + "loss": 0.0719, + "theoretical_loss": 3.6005838805473402, + "tokens_seen": 1257504768 + }, + { + "epoch": 0.38, + "learning_rate": 0.000625100304926978, + "loss": 0.0717, + "theoretical_loss": 3.600515992646577, + "tokens_seen": 1257766912 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006250200609853957, + "loss": 0.072, + "theoretical_loss": 3.6004481228543854, + "tokens_seen": 1258029056 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0018183041829615831, + "objective/train/docs_used": 460354, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.566368579864502, + "objective/train/original_loss": 1.5663683414459229, + "objective/train/theoretical_loss": 3.600380271162162, + "objective/train/tokens_used": 1278751200, + "objective/train/value_avg": -0.01047515869140625, + "objective/train/value_loss": 0.0005354974418878555, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.8798828125, + "objective/train/value_reward_corr": 0.6700587084228133, + "objective/train/value_std": 0.019775390625, + "objective/train/weight_avg": 1.0020546913146973, + "objective/train/weighted_lm_loss": 1.5695098638534546, + "objective/train/weights_max": 1.7010573148727417, + "objective/train/weights_min": 0.3695588707923889, + "theoretical_loss": 3.600380271162162, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006249398170438132, + "loss": 0.071, + "theoretical_loss": 3.600380271162162, + "tokens_seen": 1258291200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006248595731022309, + "loss": 0.0693, + "theoretical_loss": 3.6003124375613114, + "tokens_seen": 1258553344 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006247793291606484, + "loss": 0.0747, + "theoretical_loss": 3.6002446220432427, + "tokens_seen": 1258815488 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006246990852190659, + "loss": 0.074, + "theoretical_loss": 3.6001768245993713, + "tokens_seen": 1259077632 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006246188412774836, + "loss": 0.0731, + "theoretical_loss": 3.600109045221119, + "tokens_seen": 1259339776 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006245385973359011, + "loss": 0.0748, + "theoretical_loss": 3.600041283899912, + "tokens_seen": 1259601920 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006244583533943187, + "loss": 0.0714, + "theoretical_loss": 3.599973540627184, + "tokens_seen": 1259864064 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006243781094527363, + "loss": 0.0741, + "theoretical_loss": 3.599905815394374, + "tokens_seen": 1260126208 + }, + { + "epoch": 0.38, + "learning_rate": 0.000624297865511154, + "loss": 0.0736, + "theoretical_loss": 3.599838108192926, + "tokens_seen": 1260388352 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006242176215695715, + "loss": 0.0724, + "theoretical_loss": 3.5997704190142907, + "tokens_seen": 1260650496 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006241373776279891, + "loss": 0.0741, + "theoretical_loss": 3.599702747849925, + "tokens_seen": 1260912640 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006240571336864067, + "loss": 0.073, + "theoretical_loss": 3.5996350946912896, + "tokens_seen": 1261174784 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006239768897448243, + "loss": 0.0715, + "theoretical_loss": 3.5995674595298537, + "tokens_seen": 1261436928 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0015806846786290407, + "objective/train/docs_used": 461530, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3945951461791992, + "objective/train/original_loss": 1.3945950269699097, + "objective/train/theoretical_loss": 3.599533648695421, + "objective/train/tokens_used": 1282028000, + "objective/train/value_avg": -0.006893157958984375, + "objective/train/value_loss": 0.00015965731290634722, + "objective/train/value_max": -7.31348991394043e-05, + "objective/train/value_min": -0.7705078125, + "objective/train/value_reward_corr": 0.6894917108257843, + "objective/train/value_std": 0.0124969482421875, + "objective/train/weight_avg": 1.0016539096832275, + "objective/train/weighted_lm_loss": 1.39777410030365, + "objective/train/weights_max": 1.2735666036605835, + "objective/train/weights_min": 0.36959269642829895, + "theoretical_loss": 3.599533648695421, + "tokens_seen": 1261568000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006238966458032419, + "loss": 0.072, + "theoretical_loss": 3.5994998423570914, + "tokens_seen": 1261699072 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006238164018616594, + "loss": 0.0713, + "theoretical_loss": 3.599432243164481, + "tokens_seen": 1261961216 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006237361579200771, + "loss": 0.0714, + "theoretical_loss": 3.599364661943509, + "tokens_seen": 1262223360 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006236559139784946, + "loss": 0.0726, + "theoretical_loss": 3.599297098685666, + "tokens_seen": 1262485504 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006235756700369121, + "loss": 0.071, + "theoretical_loss": 3.599229553382449, + "tokens_seen": 1262747648 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006234954260953299, + "loss": 0.0725, + "theoretical_loss": 3.599162026025361, + "tokens_seen": 1263009792 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006234151821537474, + "loss": 0.0706, + "theoretical_loss": 3.599094516605911, + "tokens_seen": 1263271936 + }, + { + "epoch": 0.38, + "learning_rate": 0.000623334938212165, + "loss": 0.0737, + "theoretical_loss": 3.5990270251156122, + "tokens_seen": 1263534080 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006232546942705826, + "loss": 0.0748, + "theoretical_loss": 3.5989595515459856, + "tokens_seen": 1263796224 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006231744503290002, + "loss": 0.0743, + "theoretical_loss": 3.598892095888557, + "tokens_seen": 1264058368 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006230942063874177, + "loss": 0.0716, + "theoretical_loss": 3.5988246581348573, + "tokens_seen": 1264320512 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006230139624458353, + "loss": 0.073, + "theoretical_loss": 3.5987572382764252, + "tokens_seen": 1264582656 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 7.375935092568398e-05, + "objective/train/docs_used": 462829, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4854201078414917, + "objective/train/original_loss": 1.4854199886322021, + "objective/train/theoretical_loss": 3.5986898363048025, + "objective/train/tokens_used": 1285304800, + "objective/train/value_avg": -0.0058441162109375, + "objective/train/value_loss": 0.00019366215565241873, + "objective/train/value_max": -0.00012242794036865234, + "objective/train/value_min": -0.383544921875, + "objective/train/value_reward_corr": 0.5646258964894881, + "objective/train/value_std": 0.00856781005859375, + "objective/train/weight_avg": 1.0001623630523682, + "objective/train/weighted_lm_loss": 1.4864187240600586, + "objective/train/weights_max": 1.2285127639770508, + "objective/train/weights_min": 0.39554521441459656, + "theoretical_loss": 3.5986898363048025, + "tokens_seen": 1264844800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006229337185042529, + "loss": 0.0724, + "theoretical_loss": 3.5986898363048025, + "tokens_seen": 1264844800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006228534745626704, + "loss": 0.0713, + "theoretical_loss": 3.5986224522115395, + "tokens_seen": 1265106944 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006227732306210882, + "loss": 0.0704, + "theoretical_loss": 3.59855508598819, + "tokens_seen": 1265369088 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006226929866795057, + "loss": 0.0737, + "theoretical_loss": 3.5984877376263142, + "tokens_seen": 1265631232 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006226127427379234, + "loss": 0.07, + "theoretical_loss": 3.5984204071174783, + "tokens_seen": 1265893376 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006225324987963409, + "loss": 0.0763, + "theoretical_loss": 3.598353094453255, + "tokens_seen": 1266155520 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006224522548547584, + "loss": 0.0724, + "theoretical_loss": 3.598285799625221, + "tokens_seen": 1266417664 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006223720109131761, + "loss": 0.0721, + "theoretical_loss": 3.5982185226249594, + "tokens_seen": 1266679808 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006222917669715936, + "loss": 0.0718, + "theoretical_loss": 3.5981512634440604, + "tokens_seen": 1266941952 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006222115230300112, + "loss": 0.0725, + "theoretical_loss": 3.5980840220741177, + "tokens_seen": 1267204096 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006221312790884289, + "loss": 0.0748, + "theoretical_loss": 3.5980167985067317, + "tokens_seen": 1267466240 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006220510351468465, + "loss": 0.0739, + "theoretical_loss": 3.5979495927335092, + "tokens_seen": 1267728384 + }, + { + "epoch": 0.38, + "learning_rate": 0.000621970791205264, + "loss": 0.0726, + "theoretical_loss": 3.5978824047460614, + "tokens_seen": 1267990528 + }, + { + "epoch": 0.38, + "objective/train/advantage_avg": 0.0012868504272773862, + "objective/train/docs_used": 464032, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5202511548995972, + "objective/train/original_loss": 1.5202511548995972, + "objective/train/theoretical_loss": 3.5978488174193832, + "objective/train/tokens_used": 1288581600, + "objective/train/value_avg": -0.00853729248046875, + "objective/train/value_loss": 0.0002907871676143259, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.3349609375, + "objective/train/value_reward_corr": 0.5771171767033785, + "objective/train/value_std": 0.013885498046875, + "objective/train/weight_avg": 1.0014193058013916, + "objective/train/weighted_lm_loss": 1.5222471952438354, + "objective/train/weights_max": 1.397885799407959, + "objective/train/weights_min": 0.37770265340805054, + "theoretical_loss": 3.5978488174193832, + "tokens_seen": 1268121600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006218905472636816, + "loss": 0.0733, + "theoretical_loss": 3.5978152345360055, + "tokens_seen": 1268252672 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006218103033220992, + "loss": 0.0724, + "theoretical_loss": 3.5977480820949657, + "tokens_seen": 1268514816 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006217300593805167, + "loss": 0.0741, + "theoretical_loss": 3.5976809474145703, + "tokens_seen": 1268776960 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006216498154389344, + "loss": 0.0731, + "theoretical_loss": 3.597613830486454, + "tokens_seen": 1269039104 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006215695714973519, + "loss": 0.0718, + "theoretical_loss": 3.5975467313022564, + "tokens_seen": 1269301248 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006214893275557696, + "loss": 0.0735, + "theoretical_loss": 3.5974796498536237, + "tokens_seen": 1269563392 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006214090836141871, + "loss": 0.0708, + "theoretical_loss": 3.597412586132208, + "tokens_seen": 1269825536 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006213288396726048, + "loss": 0.0728, + "theoretical_loss": 3.5973455401296652, + "tokens_seen": 1270087680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0006212485957310224, + "loss": 0.07, + "theoretical_loss": 3.597278511837659, + "tokens_seen": 1270349824 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006211683517894399, + "loss": 0.0716, + "theoretical_loss": 3.597211501247858, + "tokens_seen": 1270611968 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006210881078478575, + "loss": 0.0703, + "theoretical_loss": 3.5971445083519358, + "tokens_seen": 1270874112 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006210078639062751, + "loss": 0.0733, + "theoretical_loss": 3.5970775331415723, + "tokens_seen": 1271136256 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0005056922673247755, + "objective/train/docs_used": 465264, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4634767770767212, + "objective/train/original_loss": 1.4634767770767212, + "objective/train/theoretical_loss": 3.5970105756084525, + "objective/train/tokens_used": 1291858400, + "objective/train/value_avg": -0.007724761962890625, + "objective/train/value_loss": 0.00021731469314545393, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.34912109375, + "objective/train/value_reward_corr": 0.6817886298040029, + "objective/train/value_std": 0.016876220703125, + "objective/train/weight_avg": 1.0006133317947388, + "objective/train/weighted_lm_loss": 1.4629930257797241, + "objective/train/weights_max": 1.2334792613983154, + "objective/train/weights_min": 0.729193389415741, + "theoretical_loss": 3.5970105756084525, + "tokens_seen": 1271398400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006209276199646927, + "loss": 0.0702, + "theoretical_loss": 3.5970105756084525, + "tokens_seen": 1271398400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006208473760231102, + "loss": 0.0727, + "theoretical_loss": 3.5969436357442683, + "tokens_seen": 1271660544 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006207671320815279, + "loss": 0.0721, + "theoretical_loss": 3.5968767135407154, + "tokens_seen": 1271922688 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006206868881399454, + "loss": 0.0712, + "theoretical_loss": 3.596809808989496, + "tokens_seen": 1272184832 + }, + { + "epoch": 0.39, + "learning_rate": 0.000620606644198363, + "loss": 0.0728, + "theoretical_loss": 3.596742922082319, + "tokens_seen": 1272446976 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006205264002567807, + "loss": 0.0705, + "theoretical_loss": 3.5966760528108965, + "tokens_seen": 1272709120 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006204461563151982, + "loss": 0.0726, + "theoretical_loss": 3.596609201166948, + "tokens_seen": 1272971264 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006203659123736158, + "loss": 0.0726, + "theoretical_loss": 3.596542367142198, + "tokens_seen": 1273233408 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006202856684320334, + "loss": 0.0727, + "theoretical_loss": 3.5964755507283774, + "tokens_seen": 1273495552 + }, + { + "epoch": 0.39, + "learning_rate": 0.000620205424490451, + "loss": 0.0736, + "theoretical_loss": 3.5964087519172203, + "tokens_seen": 1273757696 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006201251805488686, + "loss": 0.0745, + "theoretical_loss": 3.59634197070047, + "tokens_seen": 1274019840 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006200449366072861, + "loss": 0.0751, + "theoretical_loss": 3.5962752070698727, + "tokens_seen": 1274281984 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006199646926657037, + "loss": 0.0694, + "theoretical_loss": 3.59620846101718, + "tokens_seen": 1274544128 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0023682687897235155, + "objective/train/docs_used": 466434, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4091410636901855, + "objective/train/original_loss": 1.4091408252716064, + "objective/train/theoretical_loss": 3.5961750945799724, + "objective/train/tokens_used": 1295135200, + "objective/train/value_avg": -0.012420654296875, + "objective/train/value_loss": 0.00021746121637988836, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.407958984375, + "objective/train/value_reward_corr": 0.8011572186934655, + "objective/train/value_std": 0.0207366943359375, + "objective/train/weight_avg": 1.002469778060913, + "objective/train/weighted_lm_loss": 1.4116214513778687, + "objective/train/weights_max": 1.1912201642990112, + "objective/train/weights_min": 0.3865475058555603, + "theoretical_loss": 3.5961750945799724, + "tokens_seen": 1274675200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006198844487241214, + "loss": 0.0727, + "theoretical_loss": 3.596141732534151, + "tokens_seen": 1274806272 + }, + { + "epoch": 0.39, + "learning_rate": 0.000619804204782539, + "loss": 0.0749, + "theoretical_loss": 3.596075021612549, + "tokens_seen": 1275068416 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006197239608409565, + "loss": 0.0739, + "theoretical_loss": 3.5960083282441433, + "tokens_seen": 1275330560 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006196437168993742, + "loss": 0.0741, + "theoretical_loss": 3.5959416524207084, + "tokens_seen": 1275592704 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006195634729577917, + "loss": 0.0732, + "theoretical_loss": 3.5958749941340242, + "tokens_seen": 1275854848 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006194832290162092, + "loss": 0.0735, + "theoretical_loss": 3.595808353375877, + "tokens_seen": 1276116992 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006194029850746269, + "loss": 0.0724, + "theoretical_loss": 3.5957417301380588, + "tokens_seen": 1276379136 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006193227411330444, + "loss": 0.0735, + "theoretical_loss": 3.595675124412365, + "tokens_seen": 1276641280 + }, + { + "epoch": 0.39, + "learning_rate": 0.000619242497191462, + "loss": 0.0762, + "theoretical_loss": 3.595608536190599, + "tokens_seen": 1276903424 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006191622532498797, + "loss": 0.0723, + "theoretical_loss": 3.595541965464568, + "tokens_seen": 1277165568 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006190820093082973, + "loss": 0.0702, + "theoretical_loss": 3.5954754122260866, + "tokens_seen": 1277427712 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006190017653667148, + "loss": 0.074, + "theoretical_loss": 3.595408876466972, + "tokens_seen": 1277689856 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.0013968882849439979, + "objective/train/docs_used": 467639, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.402967095375061, + "objective/train/original_loss": 1.4029669761657715, + "objective/train/theoretical_loss": 3.59534235817905, + "objective/train/tokens_used": 1298412000, + "objective/train/value_avg": -0.01238250732421875, + "objective/train/value_loss": 0.0012696925550699234, + "objective/train/value_max": -0.00010150671005249023, + "objective/train/value_min": -0.794921875, + "objective/train/value_reward_corr": 0.703285806535258, + "objective/train/value_std": 0.029815673828125, + "objective/train/weight_avg": 0.9991534948348999, + "objective/train/weighted_lm_loss": 1.3983840942382812, + "objective/train/weights_max": 2.1239025592803955, + "objective/train/weights_min": 0.3922275900840759, + "theoretical_loss": 3.59534235817905, + "tokens_seen": 1277952000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006189215214251324, + "loss": 0.0727, + "theoretical_loss": 3.59534235817905, + "tokens_seen": 1277952000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00061884127748355, + "loss": 0.0732, + "theoretical_loss": 3.5952758573541503, + "tokens_seen": 1278214144 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006187610335419676, + "loss": 0.0722, + "theoretical_loss": 3.5952093739841073, + "tokens_seen": 1278476288 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006186807896003852, + "loss": 0.0743, + "theoretical_loss": 3.5951429080607635, + "tokens_seen": 1278738432 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006186005456588027, + "loss": 0.0718, + "theoretical_loss": 3.5950764595759646, + "tokens_seen": 1279000576 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006185203017172204, + "loss": 0.0726, + "theoretical_loss": 3.5950100285215623, + "tokens_seen": 1279262720 + }, + { + "epoch": 0.39, + "learning_rate": 0.000618440057775638, + "loss": 0.0705, + "theoretical_loss": 3.594943614889414, + "tokens_seen": 1279524864 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006183598138340556, + "loss": 0.0746, + "theoretical_loss": 3.5948772186713827, + "tokens_seen": 1279787008 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006182795698924732, + "loss": 0.0726, + "theoretical_loss": 3.5948108398593357, + "tokens_seen": 1280049152 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006181993259508907, + "loss": 0.0735, + "theoretical_loss": 3.5947444784451488, + "tokens_seen": 1280311296 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006181190820093083, + "loss": 0.0716, + "theoretical_loss": 3.5946781344206995, + "tokens_seen": 1280573440 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006180388380677259, + "loss": 0.0745, + "theoretical_loss": 3.5946118077778735, + "tokens_seen": 1280835584 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006179585941261435, + "loss": 0.0758, + "theoretical_loss": 3.59454549850856, + "tokens_seen": 1281097728 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0009923300240188837, + "objective/train/docs_used": 468763, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6140085458755493, + "objective/train/original_loss": 1.6140084266662598, + "objective/train/theoretical_loss": 3.594512350386438, + "objective/train/tokens_used": 1301688800, + "objective/train/value_avg": -0.007724761962890625, + "objective/train/value_loss": 0.00035045703407377005, + "objective/train/value_max": -8.684396743774414e-05, + "objective/train/value_min": -0.94189453125, + "objective/train/value_reward_corr": 0.6209565433413602, + "objective/train/value_std": 0.01407623291015625, + "objective/train/weight_avg": 1.0011414289474487, + "objective/train/weighted_lm_loss": 1.615778923034668, + "objective/train/weights_max": 1.344037413597107, + "objective/train/weights_min": 0.36872875690460205, + "theoretical_loss": 3.594512350386438, + "tokens_seen": 1281228800 + }, + { + "epoch": 0.39, + "learning_rate": 0.000617878350184561, + "loss": 0.0737, + "theoretical_loss": 3.594479206604655, + "tokens_seen": 1281359872 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006177981062429787, + "loss": 0.0747, + "theoretical_loss": 3.59441293205806, + "tokens_seen": 1281622016 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006177178623013962, + "loss": 0.0724, + "theoretical_loss": 3.59434667486068, + "tokens_seen": 1281884160 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006176376183598139, + "loss": 0.0726, + "theoretical_loss": 3.5942804350044284, + "tokens_seen": 1282146304 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006175573744182315, + "loss": 0.0724, + "theoretical_loss": 3.594214212481222, + "tokens_seen": 1282408448 + }, + { + "epoch": 0.39, + "learning_rate": 0.000617477130476649, + "loss": 0.0702, + "theoretical_loss": 3.594148007282983, + "tokens_seen": 1282670592 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006173968865350667, + "loss": 0.0671, + "theoretical_loss": 3.59408181940164, + "tokens_seen": 1282932736 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006173166425934842, + "loss": 0.0711, + "theoretical_loss": 3.5940156488291266, + "tokens_seen": 1283194880 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006172363986519018, + "loss": 0.0703, + "theoretical_loss": 3.593949495557381, + "tokens_seen": 1283457024 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006171561547103194, + "loss": 0.072, + "theoretical_loss": 3.5938833595783484, + "tokens_seen": 1283719168 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006170759107687369, + "loss": 0.0722, + "theoretical_loss": 3.593817240883978, + "tokens_seen": 1283981312 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006169956668271545, + "loss": 0.072, + "theoretical_loss": 3.5937511394662254, + "tokens_seen": 1284243456 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0013025725493207574, + "objective/train/docs_used": 470107, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4686199426651, + "objective/train/original_loss": 1.4686200618743896, + "objective/train/theoretical_loss": 3.5936850553170503, + "objective/train/tokens_used": 1304965600, + "objective/train/value_avg": -0.005847930908203125, + "objective/train/value_loss": 9.238809434464201e-05, + "objective/train/value_max": -8.958578109741211e-05, + "objective/train/value_min": -0.5205078125, + "objective/train/value_reward_corr": 0.705613709892375, + "objective/train/value_std": 0.01006317138671875, + "objective/train/weight_avg": 1.0013480186462402, + "objective/train/weighted_lm_loss": 1.4713242053985596, + "objective/train/weights_max": 1.1862255334854126, + "objective/train/weights_min": 0.6386135220527649, + "theoretical_loss": 3.5936850553170503, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006169154228855722, + "loss": 0.0716, + "theoretical_loss": 3.5936850553170503, + "tokens_seen": 1284505600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006168351789439898, + "loss": 0.0736, + "theoretical_loss": 3.593618988428419, + "tokens_seen": 1284767744 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006167549350024073, + "loss": 0.0701, + "theoretical_loss": 3.593552938792303, + "tokens_seen": 1285029888 + }, + { + "epoch": 0.39, + "learning_rate": 0.000616674691060825, + "loss": 0.0717, + "theoretical_loss": 3.5934869064006785, + "tokens_seen": 1285292032 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006165944471192425, + "loss": 0.0734, + "theoretical_loss": 3.593420891245527, + "tokens_seen": 1285554176 + }, + { + "epoch": 0.39, + "learning_rate": 0.00061651420317766, + "loss": 0.0748, + "theoretical_loss": 3.5933548933188377, + "tokens_seen": 1285816320 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006164339592360777, + "loss": 0.0712, + "theoretical_loss": 3.5932889126126017, + "tokens_seen": 1286078464 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006163537152944952, + "loss": 0.0739, + "theoretical_loss": 3.5932229491188172, + "tokens_seen": 1286340608 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006162734713529129, + "loss": 0.0709, + "theoretical_loss": 3.5931570028294884, + "tokens_seen": 1286602752 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006161932274113304, + "loss": 0.0721, + "theoretical_loss": 3.5930910737366233, + "tokens_seen": 1286864896 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006161129834697481, + "loss": 0.0729, + "theoretical_loss": 3.5930251618322364, + "tokens_seen": 1287127040 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006160327395281657, + "loss": 0.0722, + "theoretical_loss": 3.5929592671083466, + "tokens_seen": 1287389184 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006159524955865832, + "loss": 0.0717, + "theoretical_loss": 3.5928933895569792, + "tokens_seen": 1287651328 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": -0.0021324241533875465, + "objective/train/docs_used": 471101, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3588120937347412, + "objective/train/original_loss": 1.358811855316162, + "objective/train/theoretical_loss": 3.592860457218501, + "objective/train/tokens_used": 1308242400, + "objective/train/value_avg": -0.01183319091796875, + "objective/train/value_loss": 0.0003357592795509845, + "objective/train/value_max": -0.0001596212387084961, + "objective/train/value_min": -0.321044921875, + "objective/train/value_reward_corr": 0.8440921060828028, + "objective/train/value_std": 0.02423095703125, + "objective/train/weight_avg": 0.9980318546295166, + "objective/train/weighted_lm_loss": 1.3537436723709106, + "objective/train/weights_max": 1.2598665952682495, + "objective/train/weights_min": 0.7315114736557007, + "theoretical_loss": 3.592860457218501, + "tokens_seen": 1287782400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006158722516450008, + "loss": 0.07, + "theoretical_loss": 3.5928275291701643, + "tokens_seen": 1287913472 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006157920077034184, + "loss": 0.0711, + "theoretical_loss": 3.592761685939937, + "tokens_seen": 1288175616 + }, + { + "epoch": 0.39, + "learning_rate": 0.000615711763761836, + "loss": 0.0717, + "theoretical_loss": 3.5926958598583383, + "tokens_seen": 1288437760 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006156315198202535, + "loss": 0.0692, + "theoretical_loss": 3.5926300509174136, + "tokens_seen": 1288699904 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006155512758786712, + "loss": 0.0711, + "theoretical_loss": 3.5925642591092153, + "tokens_seen": 1288962048 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006154710319370887, + "loss": 0.0696, + "theoretical_loss": 3.592498484425799, + "tokens_seen": 1289224192 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006153907879955064, + "loss": 0.0737, + "theoretical_loss": 3.5924327268592267, + "tokens_seen": 1289486336 + }, + { + "epoch": 0.39, + "learning_rate": 0.000615310544053924, + "loss": 0.0698, + "theoretical_loss": 3.5923669864015664, + "tokens_seen": 1289748480 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006152303001123415, + "loss": 0.0699, + "theoretical_loss": 3.59230126304489, + "tokens_seen": 1290010624 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006151500561707592, + "loss": 0.0733, + "theoretical_loss": 3.592235556781276, + "tokens_seen": 1290272768 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006150698122291767, + "loss": 0.0705, + "theoretical_loss": 3.5921698676028058, + "tokens_seen": 1290534912 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006149895682875943, + "loss": 0.0683, + "theoretical_loss": 3.59210419550157, + "tokens_seen": 1290797056 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0010878384346142411, + "objective/train/docs_used": 472355, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5228557586669922, + "objective/train/original_loss": 1.5228557586669922, + "objective/train/theoretical_loss": 3.5920385404696606, + "objective/train/tokens_used": 1311519200, + "objective/train/value_avg": -0.0082855224609375, + "objective/train/value_loss": 0.00018637104949448258, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.97314453125, + "objective/train/value_reward_corr": 0.8474394273861514, + "objective/train/value_std": 0.019683837890625, + "objective/train/weight_avg": 1.0011759996414185, + "objective/train/weighted_lm_loss": 1.5245059728622437, + "objective/train/weights_max": 1.336806058883667, + "objective/train/weights_min": 0.5558143258094788, + "theoretical_loss": 3.5920385404696606, + "tokens_seen": 1291059200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006149093243460119, + "loss": 0.0722, + "theoretical_loss": 3.5920385404696606, + "tokens_seen": 1291059200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006148290804044295, + "loss": 0.0714, + "theoretical_loss": 3.591972902499177, + "tokens_seen": 1291321344 + }, + { + "epoch": 0.39, + "learning_rate": 0.000614748836462847, + "loss": 0.0713, + "theoretical_loss": 3.5919072815822233, + "tokens_seen": 1291583488 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006146685925212647, + "loss": 0.0713, + "theoretical_loss": 3.5918416777109092, + "tokens_seen": 1291845632 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006145883485796823, + "loss": 0.0697, + "theoretical_loss": 3.591776090877349, + "tokens_seen": 1292107776 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006145081046380998, + "loss": 0.0752, + "theoretical_loss": 3.5917105210736624, + "tokens_seen": 1292369920 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006144278606965175, + "loss": 0.072, + "theoretical_loss": 3.591644968291975, + "tokens_seen": 1292632064 + }, + { + "epoch": 0.39, + "learning_rate": 0.000614347616754935, + "loss": 0.0707, + "theoretical_loss": 3.5915794325244175, + "tokens_seen": 1292894208 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006142673728133526, + "loss": 0.0723, + "theoretical_loss": 3.5915139137631242, + "tokens_seen": 1293156352 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006141871288717702, + "loss": 0.0701, + "theoretical_loss": 3.5914484120002372, + "tokens_seen": 1293418496 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006141068849301877, + "loss": 0.0718, + "theoretical_loss": 3.591382927227902, + "tokens_seen": 1293680640 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006140266409886053, + "loss": 0.0746, + "theoretical_loss": 3.5913174594382693, + "tokens_seen": 1293942784 + }, + { + "epoch": 0.39, + "learning_rate": 0.000613946397047023, + "loss": 0.0725, + "theoretical_loss": 3.5912520086234974, + "tokens_seen": 1294204928 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0006294844206422567, + "objective/train/docs_used": 473604, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4397389888763428, + "objective/train/original_loss": 1.4397391080856323, + "objective/train/theoretical_loss": 3.591219289579233, + "objective/train/tokens_used": 1314796000, + "objective/train/value_avg": -0.0067596435546875, + "objective/train/value_loss": 0.00012036753469146788, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.262939453125, + "objective/train/value_reward_corr": 0.6750268958365275, + "objective/train/value_std": 0.00977325439453125, + "objective/train/weight_avg": 1.0006887912750244, + "objective/train/weighted_lm_loss": 1.4400802850723267, + "objective/train/weights_max": 1.1841590404510498, + "objective/train/weights_min": 0.8148074746131897, + "theoretical_loss": 3.591219289579233, + "tokens_seen": 1294336000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006138661531054406, + "loss": 0.0735, + "theoretical_loss": 3.5911865747757457, + "tokens_seen": 1294467072 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006137859091638582, + "loss": 0.0715, + "theoretical_loss": 3.5911211578871827, + "tokens_seen": 1294729216 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006137056652222758, + "loss": 0.0701, + "theoretical_loss": 3.591055757949981, + "tokens_seen": 1294991360 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006136254212806933, + "loss": 0.074, + "theoretical_loss": 3.590990374956316, + "tokens_seen": 1295253504 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006135451773391109, + "loss": 0.074, + "theoretical_loss": 3.5909250088983713, + "tokens_seen": 1295515648 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006134649333975285, + "loss": 0.0741, + "theoretical_loss": 3.5908596597683347, + "tokens_seen": 1295777792 + }, + { + "epoch": 0.39, + "learning_rate": 0.000613384689455946, + "loss": 0.0714, + "theoretical_loss": 3.590794327558399, + "tokens_seen": 1296039936 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006133044455143637, + "loss": 0.0697, + "theoretical_loss": 3.590729012260762, + "tokens_seen": 1296302080 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006132242015727812, + "loss": 0.0739, + "theoretical_loss": 3.5906637138676265, + "tokens_seen": 1296564224 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006131439576311989, + "loss": 0.0708, + "theoretical_loss": 3.590598432371202, + "tokens_seen": 1296826368 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006130637136896165, + "loss": 0.0704, + "theoretical_loss": 3.5905331677637013, + "tokens_seen": 1297088512 + }, + { + "epoch": 0.39, + "learning_rate": 0.000612983469748034, + "loss": 0.0712, + "theoretical_loss": 3.5904679200373435, + "tokens_seen": 1297350656 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.0006142263882793486, + "objective/train/docs_used": 474727, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.378791332244873, + "objective/train/original_loss": 1.378791332244873, + "objective/train/theoretical_loss": 3.5904026891843523, + "objective/train/tokens_used": 1318072800, + "objective/train/value_avg": -0.005767822265625, + "objective/train/value_loss": 0.000269250973360613, + "objective/train/value_max": -6.0617923736572266e-05, + "objective/train/value_min": -0.232666015625, + "objective/train/value_reward_corr": 0.4479749164985532, + "objective/train/value_std": 0.00815582275390625, + "objective/train/weight_avg": 1.0007208585739136, + "objective/train/weighted_lm_loss": 1.379752278327942, + "objective/train/weights_max": 1.1730239391326904, + "objective/train/weights_min": 0.22749631106853485, + "theoretical_loss": 3.5904026891843523, + "tokens_seen": 1297612800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006129032258064516, + "loss": 0.0701, + "theoretical_loss": 3.5904026891843523, + "tokens_seen": 1297612800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006128229818648692, + "loss": 0.0684, + "theoretical_loss": 3.5903374751969563, + "tokens_seen": 1297874944 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006127427379232868, + "loss": 0.0687, + "theoretical_loss": 3.59027227806739, + "tokens_seen": 1298137088 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006126624939817043, + "loss": 0.0709, + "theoretical_loss": 3.5902070977878937, + "tokens_seen": 1298399232 + }, + { + "epoch": 0.39, + "learning_rate": 0.000612582250040122, + "loss": 0.07, + "theoretical_loss": 3.5901419343507106, + "tokens_seen": 1298661376 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006125020060985395, + "loss": 0.0703, + "theoretical_loss": 3.5900767877480906, + "tokens_seen": 1298923520 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006124217621569572, + "loss": 0.0696, + "theoretical_loss": 3.5900116579722883, + "tokens_seen": 1299185664 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006123415182153748, + "loss": 0.0716, + "theoretical_loss": 3.5899465450155637, + "tokens_seen": 1299447808 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006122612742737923, + "loss": 0.0727, + "theoretical_loss": 3.589881448870182, + "tokens_seen": 1299709952 + }, + { + "epoch": 0.39, + "learning_rate": 0.00061218103033221, + "loss": 0.0723, + "theoretical_loss": 3.589816369528413, + "tokens_seen": 1299972096 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006121007863906275, + "loss": 0.0702, + "theoretical_loss": 3.5897513069825324, + "tokens_seen": 1300234240 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006120205424490451, + "loss": 0.0721, + "theoretical_loss": 3.589686261224819, + "tokens_seen": 1300496384 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006119402985074627, + "loss": 0.0718, + "theoretical_loss": 3.5896212322475605, + "tokens_seen": 1300758528 + }, + { + "epoch": 0.39, + "objective/train/advantage_avg": 0.001577448914758861, + "objective/train/docs_used": 475730, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3788347244262695, + "objective/train/original_loss": 1.3788347244262695, + "objective/train/theoretical_loss": 3.5895887240491917, + "objective/train/tokens_used": 1321349600, + "objective/train/value_avg": -0.00556182861328125, + "objective/train/value_loss": 0.0001541156234452501, + "objective/train/value_max": -0.00013768672943115234, + "objective/train/value_min": -0.29736328125, + "objective/train/value_reward_corr": 0.6665854299912869, + "objective/train/value_std": 0.009857177734375, + "objective/train/weight_avg": 1.0016462802886963, + "objective/train/weighted_lm_loss": 1.3818347454071045, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.3779159486293793, + "theoretical_loss": 3.5895887240491917, + "tokens_seen": 1300889600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006118600545658803, + "loss": 0.0712, + "theoretical_loss": 3.589556220043046, + "tokens_seen": 1301020672 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006117798106242978, + "loss": 0.0713, + "theoretical_loss": 3.589491224603571, + "tokens_seen": 1301282816 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006116995666827155, + "loss": 0.0715, + "theoretical_loss": 3.5894262459214366, + "tokens_seen": 1301544960 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006116193227411331, + "loss": 0.0698, + "theoretical_loss": 3.589361283988948, + "tokens_seen": 1301807104 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006115390787995506, + "loss": 0.0711, + "theoretical_loss": 3.589296338798418, + "tokens_seen": 1302069248 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006114588348579683, + "loss": 0.0724, + "theoretical_loss": 3.5892314103421596, + "tokens_seen": 1302331392 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006113785909163858, + "loss": 0.0712, + "theoretical_loss": 3.589166498612496, + "tokens_seen": 1302593536 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006112983469748035, + "loss": 0.0719, + "theoretical_loss": 3.589101603601752, + "tokens_seen": 1302855680 + }, + { + "epoch": 0.39, + "learning_rate": 0.000611218103033221, + "loss": 0.0733, + "theoretical_loss": 3.58903672530226, + "tokens_seen": 1303117824 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006111378590916385, + "loss": 0.0721, + "theoretical_loss": 3.5889718637063552, + "tokens_seen": 1303379968 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006110576151500562, + "loss": 0.0703, + "theoretical_loss": 3.5889070188063794, + "tokens_seen": 1303642112 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006109773712084738, + "loss": 0.0733, + "theoretical_loss": 3.5888421905946783, + "tokens_seen": 1303904256 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0006969193345867097, + "objective/train/docs_used": 476908, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3389830589294434, + "objective/train/original_loss": 1.3389828205108643, + "objective/train/theoretical_loss": 3.588777379063604, + "objective/train/tokens_used": 1324626400, + "objective/train/value_avg": -0.00836181640625, + "objective/train/value_loss": 0.0003036280977539718, + "objective/train/value_max": -3.24249267578125e-05, + "objective/train/value_min": -0.76318359375, + "objective/train/value_reward_corr": 0.5790845446495612, + "objective/train/value_std": 0.01256561279296875, + "objective/train/weight_avg": 1.0008283853530884, + "objective/train/weighted_lm_loss": 1.3403379917144775, + "objective/train/weights_max": 1.2200902700424194, + "objective/train/weights_min": 0.36939677596092224, + "theoretical_loss": 3.588777379063604, + "tokens_seen": 1304166400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006108971272668914, + "loss": 0.0701, + "theoretical_loss": 3.588777379063604, + "tokens_seen": 1304166400 + }, + { + "epoch": 0.4, + "learning_rate": 0.000610816883325309, + "loss": 0.074, + "theoretical_loss": 3.5887125842055116, + "tokens_seen": 1304428544 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006107366393837266, + "loss": 0.07, + "theoretical_loss": 3.588647806012765, + "tokens_seen": 1304690688 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006106563954421441, + "loss": 0.0712, + "theoretical_loss": 3.588583044477728, + "tokens_seen": 1304952832 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006105761515005617, + "loss": 0.0742, + "theoretical_loss": 3.5885182995927734, + "tokens_seen": 1305214976 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006104959075589793, + "loss": 0.0712, + "theoretical_loss": 3.5884535713502776, + "tokens_seen": 1305477120 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006104156636173968, + "loss": 0.0702, + "theoretical_loss": 3.588388859742622, + "tokens_seen": 1305739264 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006103354196758145, + "loss": 0.0678, + "theoretical_loss": 3.5883241647621933, + "tokens_seen": 1306001408 + }, + { + "epoch": 0.4, + "learning_rate": 0.000610255175734232, + "loss": 0.0701, + "theoretical_loss": 3.588259486401383, + "tokens_seen": 1306263552 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006101749317926497, + "loss": 0.0711, + "theoretical_loss": 3.5881948246525877, + "tokens_seen": 1306525696 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006100946878510673, + "loss": 0.071, + "theoretical_loss": 3.588130179508209, + "tokens_seen": 1306787840 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006100144439094848, + "loss": 0.0717, + "theoretical_loss": 3.5880655509606534, + "tokens_seen": 1307049984 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006099341999679025, + "loss": 0.0734, + "theoretical_loss": 3.5880009390023324, + "tokens_seen": 1307312128 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -7.018262112978846e-05, + "objective/train/docs_used": 478148, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4914653301239014, + "objective/train/original_loss": 1.4914653301239014, + "objective/train/theoretical_loss": 3.587968639241765, + "objective/train/tokens_used": 1327903200, + "objective/train/value_avg": -0.006618499755859375, + "objective/train/value_loss": 0.00026124040596187115, + "objective/train/value_max": -6.920099258422852e-05, + "objective/train/value_min": -0.21337890625, + "objective/train/value_reward_corr": 0.6777787710933085, + "objective/train/value_std": 0.01326751708984375, + "objective/train/weight_avg": 1.0000460147857666, + "objective/train/weighted_lm_loss": 1.4916276931762695, + "objective/train/weights_max": 1.1570359468460083, + "objective/train/weights_min": 0.36851781606674194, + "theoretical_loss": 3.587968639241765, + "tokens_seen": 1307443200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00060985395602632, + "loss": 0.0694, + "theoretical_loss": 3.5879363436256626, + "tokens_seen": 1307574272 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006097737120847376, + "loss": 0.0723, + "theoretical_loss": 3.587871764823066, + "tokens_seen": 1307836416 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006096934681431552, + "loss": 0.0729, + "theoretical_loss": 3.5878072025869683, + "tokens_seen": 1308098560 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006096132242015728, + "loss": 0.0698, + "theoretical_loss": 3.5877426569098017, + "tokens_seen": 1308360704 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006095329802599903, + "loss": 0.069, + "theoretical_loss": 3.5876781277840024, + "tokens_seen": 1308622848 + }, + { + "epoch": 0.4, + "learning_rate": 0.000609452736318408, + "loss": 0.0712, + "theoretical_loss": 3.5876136152020117, + "tokens_seen": 1308884992 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006093724923768256, + "loss": 0.0714, + "theoretical_loss": 3.587549119156276, + "tokens_seen": 1309147136 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006092922484352431, + "loss": 0.0748, + "theoretical_loss": 3.5874846396392472, + "tokens_seen": 1309409280 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006092120044936608, + "loss": 0.0719, + "theoretical_loss": 3.5874201766433815, + "tokens_seen": 1309671424 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006091317605520783, + "loss": 0.0728, + "theoretical_loss": 3.58735573016114, + "tokens_seen": 1309933568 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006090515166104959, + "loss": 0.0724, + "theoretical_loss": 3.5872913001849884, + "tokens_seen": 1310195712 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006089712726689135, + "loss": 0.0693, + "theoretical_loss": 3.5872268867073993, + "tokens_seen": 1310457856 + }, + { + "debugging/Compilability": 0.9, + "debugging/distinct-1-grams": 0.7732995475821178, + "debugging/entropy-1-grams": 5.431960387820077, + "debugging/length": 501.4, + "debugging/num_segments": 10, + "debugging/raw_token_scores_avg": 0.009144442155957222, + "debugging/raw_token_scores_std": 0.03945710137486458, + "debugging/score": 0.007191963134632398, + "debugging/score_std": 0.006508080924945895, + "epoch": 0.4, + "objective/train/advantage_avg": 0.0012943374458700418, + "objective/train/docs_used": 479375, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4710427522659302, + "objective/train/original_loss": 1.4710428714752197, + "objective/train/theoretical_loss": 3.587162489720847, + "objective/train/tokens_used": 1331180000, + "objective/train/value_avg": -0.01043701171875, + "objective/train/value_loss": 0.0005823342362418771, + "objective/train/value_max": -0.00010973215103149414, + "objective/train/value_min": -0.89501953125, + "objective/train/value_reward_corr": 0.7931646191164867, + "objective/train/value_std": 0.029541015625, + "objective/train/weight_avg": 1.0015616416931152, + "objective/train/weighted_lm_loss": 1.4743027687072754, + "objective/train/weights_max": 2.00335693359375, + "objective/train/weights_min": 0.40475231409072876, + "theoretical_loss": 3.587162489720847, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006088910287273311, + "loss": 0.0695, + "theoretical_loss": 3.587162489720847, + "tokens_seen": 1310720000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006088107847857486, + "loss": 0.0699, + "theoretical_loss": 3.5870981092178136, + "tokens_seen": 1310982144 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006087305408441663, + "loss": 0.0718, + "theoretical_loss": 3.5870337451907854, + "tokens_seen": 1311244288 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006086502969025839, + "loss": 0.0724, + "theoretical_loss": 3.5869693976322523, + "tokens_seen": 1311506432 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006085700529610015, + "loss": 0.0677, + "theoretical_loss": 3.586905066534711, + "tokens_seen": 1311768576 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006084898090194191, + "loss": 0.0663, + "theoretical_loss": 3.5868407518906618, + "tokens_seen": 1312030720 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006084095650778366, + "loss": 0.0732, + "theoretical_loss": 3.586776453692611, + "tokens_seen": 1312292864 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006083293211362543, + "loss": 0.0705, + "theoretical_loss": 3.5867121719330677, + "tokens_seen": 1312555008 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006082490771946718, + "loss": 0.0713, + "theoretical_loss": 3.586647906604549, + "tokens_seen": 1312817152 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006081688332530893, + "loss": 0.0709, + "theoretical_loss": 3.5865836576995744, + "tokens_seen": 1313079296 + }, + { + "epoch": 0.4, + "learning_rate": 0.000608088589311507, + "loss": 0.0707, + "theoretical_loss": 3.5865194252106694, + "tokens_seen": 1313341440 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006080083453699246, + "loss": 0.0727, + "theoretical_loss": 3.586455209130364, + "tokens_seen": 1313603584 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006079281014283422, + "loss": 0.0738, + "theoretical_loss": 3.5863910094511935, + "tokens_seen": 1313865728 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0004384119529277086, + "objective/train/docs_used": 480644, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4312394857406616, + "objective/train/original_loss": 1.431239128112793, + "objective/train/theoretical_loss": 3.5863589157597024, + "objective/train/tokens_used": 1334456800, + "objective/train/value_avg": -0.00553131103515625, + "objective/train/value_loss": 0.00011002233804902062, + "objective/train/value_max": -0.00011146068572998047, + "objective/train/value_min": -0.28369140625, + "objective/train/value_reward_corr": 0.5407403599164424, + "objective/train/value_std": 0.00691986083984375, + "objective/train/weight_avg": 1.0004894733428955, + "objective/train/weighted_lm_loss": 1.4322750568389893, + "objective/train/weights_max": 1.328023076057434, + "objective/train/weights_min": 0.384535551071167, + "theoretical_loss": 3.5863589157597024, + "tokens_seen": 1313996800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006078478574867598, + "loss": 0.0713, + "theoretical_loss": 3.586326826165698, + "tokens_seen": 1314127872 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006077676135451774, + "loss": 0.0714, + "theoretical_loss": 3.5862626592664215, + "tokens_seen": 1314390016 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006076873696035949, + "loss": 0.0702, + "theoretical_loss": 3.586198508745915, + "tokens_seen": 1314652160 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006076071256620125, + "loss": 0.0706, + "theoretical_loss": 3.586134374596732, + "tokens_seen": 1314914304 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006075268817204301, + "loss": 0.0721, + "theoretical_loss": 3.586070256811432, + "tokens_seen": 1315176448 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006074466377788477, + "loss": 0.0689, + "theoretical_loss": 3.58600615538258, + "tokens_seen": 1315438592 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006073663938372653, + "loss": 0.0737, + "theoretical_loss": 3.5859420703027447, + "tokens_seen": 1315700736 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006072861498956828, + "loss": 0.0701, + "theoretical_loss": 3.5858780015644998, + "tokens_seen": 1315962880 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006072059059541006, + "loss": 0.075, + "theoretical_loss": 3.585813949160425, + "tokens_seen": 1316225024 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006071256620125181, + "loss": 0.0699, + "theoretical_loss": 3.585749913083103, + "tokens_seen": 1316487168 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006070454180709356, + "loss": 0.0753, + "theoretical_loss": 3.5856858933251234, + "tokens_seen": 1316749312 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006069651741293533, + "loss": 0.0726, + "theoretical_loss": 3.5856218898790786, + "tokens_seen": 1317011456 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0007247466128319502, + "objective/train/docs_used": 481887, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5275864601135254, + "objective/train/original_loss": 1.5275864601135254, + "objective/train/theoretical_loss": 3.585557902737568, + "objective/train/tokens_used": 1337733600, + "objective/train/value_avg": -0.01215362548828125, + "objective/train/value_loss": 0.00025770600768737495, + "objective/train/value_max": -7.599592208862305e-05, + "objective/train/value_min": -0.9384765625, + "objective/train/value_reward_corr": 0.9506689362153705, + "objective/train/value_std": 0.051605224609375, + "objective/train/weight_avg": 1.0008461475372314, + "objective/train/weighted_lm_loss": 1.5283203125, + "objective/train/weights_max": 1.57668137550354, + "objective/train/weights_min": 0.37430042028427124, + "theoretical_loss": 3.585557902737568, + "tokens_seen": 1317273600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006068849301877708, + "loss": 0.0741, + "theoretical_loss": 3.585557902737568, + "tokens_seen": 1317273600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006068046862461884, + "loss": 0.0768, + "theoretical_loss": 3.5854939318931933, + "tokens_seen": 1317535744 + }, + { + "epoch": 0.4, + "learning_rate": 0.000606724442304606, + "loss": 0.0721, + "theoretical_loss": 3.5854299773385634, + "tokens_seen": 1317797888 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006066441983630236, + "loss": 0.0725, + "theoretical_loss": 3.585366039066291, + "tokens_seen": 1318060032 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006065639544214411, + "loss": 0.0716, + "theoretical_loss": 3.585302117068993, + "tokens_seen": 1318322176 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006064837104798588, + "loss": 0.0721, + "theoretical_loss": 3.585238211339292, + "tokens_seen": 1318584320 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006064034665382764, + "loss": 0.0704, + "theoretical_loss": 3.5851743218698156, + "tokens_seen": 1318846464 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006063232225966939, + "loss": 0.0715, + "theoretical_loss": 3.585110448653195, + "tokens_seen": 1319108608 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006062429786551116, + "loss": 0.0698, + "theoretical_loss": 3.585046591682068, + "tokens_seen": 1319370752 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006061627347135291, + "loss": 0.0719, + "theoretical_loss": 3.5849827509490746, + "tokens_seen": 1319632896 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006060824907719468, + "loss": 0.0719, + "theoretical_loss": 3.584918926446863, + "tokens_seen": 1319895040 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006060022468303643, + "loss": 0.0708, + "theoretical_loss": 3.5848551181680826, + "tokens_seen": 1320157184 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006059220028887819, + "loss": 0.0677, + "theoretical_loss": 3.5847913261053908, + "tokens_seen": 1320419328 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0010326327756047249, + "objective/train/docs_used": 483102, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3745527267456055, + "objective/train/original_loss": 1.374552845954895, + "objective/train/theoretical_loss": 3.584759436152784, + "objective/train/tokens_used": 1341010400, + "objective/train/value_avg": -0.008331298828125, + "objective/train/value_loss": 0.00027299131033942103, + "objective/train/value_max": -0.00014770030975341797, + "objective/train/value_min": -0.57470703125, + "objective/train/value_reward_corr": 0.6821128634573694, + "objective/train/value_std": 0.015960693359375, + "objective/train/weight_avg": 1.0011539459228516, + "objective/train/weighted_lm_loss": 1.3762563467025757, + "objective/train/weights_max": 1.522135853767395, + "objective/train/weights_min": 0.3687252402305603, + "theoretical_loss": 3.584759436152784, + "tokens_seen": 1320550400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006058417589471995, + "loss": 0.072, + "theoretical_loss": 3.584727550251447, + "tokens_seen": 1320681472 + }, + { + "epoch": 0.4, + "learning_rate": 0.000605761515005617, + "loss": 0.0734, + "theoretical_loss": 3.5846637905989183, + "tokens_seen": 1320943616 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006056812710640347, + "loss": 0.0709, + "theoretical_loss": 3.5846000471404738, + "tokens_seen": 1321205760 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006056010271224523, + "loss": 0.0762, + "theoretical_loss": 3.5845363198687883, + "tokens_seen": 1321467904 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006055207831808699, + "loss": 0.0727, + "theoretical_loss": 3.584472608776542, + "tokens_seen": 1321730048 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006054405392392874, + "loss": 0.0733, + "theoretical_loss": 3.5844089138564197, + "tokens_seen": 1321992192 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006053602952977051, + "loss": 0.0694, + "theoretical_loss": 3.584345235101111, + "tokens_seen": 1322254336 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006052800513561226, + "loss": 0.071, + "theoretical_loss": 3.584281572503309, + "tokens_seen": 1322516480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006051998074145401, + "loss": 0.0728, + "theoretical_loss": 3.584217926055713, + "tokens_seen": 1322778624 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006051195634729578, + "loss": 0.0733, + "theoretical_loss": 3.584154295751027, + "tokens_seen": 1323040768 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006050393195313753, + "loss": 0.0718, + "theoretical_loss": 3.5840906815819586, + "tokens_seen": 1323302912 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006049590755897931, + "loss": 0.0725, + "theoretical_loss": 3.584027083541222, + "tokens_seen": 1323565056 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": -0.0004341166641097516, + "objective/train/docs_used": 484332, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4634686708450317, + "objective/train/original_loss": 1.463468313217163, + "objective/train/theoretical_loss": 3.583963501621533, + "objective/train/tokens_used": 1344287200, + "objective/train/value_avg": -0.007572174072265625, + "objective/train/value_loss": 0.000452632550150156, + "objective/train/value_max": -7.033348083496094e-05, + "objective/train/value_min": -0.37451171875, + "objective/train/value_reward_corr": 0.6120818484782071, + "objective/train/value_std": 0.01380157470703125, + "objective/train/weight_avg": 0.9997588396072388, + "objective/train/weighted_lm_loss": 1.463590383529663, + "objective/train/weights_max": 1.2186625003814697, + "objective/train/weights_min": 0.3854755163192749, + "theoretical_loss": 3.583963501621533, + "tokens_seen": 1323827200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006048788316482106, + "loss": 0.0722, + "theoretical_loss": 3.583963501621533, + "tokens_seen": 1323827200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006047985877066282, + "loss": 0.0714, + "theoretical_loss": 3.583899935815616, + "tokens_seen": 1324089344 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006047183437650458, + "loss": 0.0715, + "theoretical_loss": 3.583836386116197, + "tokens_seen": 1324351488 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006046380998234633, + "loss": 0.0721, + "theoretical_loss": 3.5837728525160086, + "tokens_seen": 1324613632 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006045578558818809, + "loss": 0.0764, + "theoretical_loss": 3.5837093350077875, + "tokens_seen": 1324875776 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006044776119402985, + "loss": 0.071, + "theoretical_loss": 3.5836458335842747, + "tokens_seen": 1325137920 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006043973679987161, + "loss": 0.0728, + "theoretical_loss": 3.5835823482382163, + "tokens_seen": 1325400064 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006043171240571336, + "loss": 0.0712, + "theoretical_loss": 3.583518878962364, + "tokens_seen": 1325662208 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006042368801155514, + "loss": 0.0709, + "theoretical_loss": 3.583455425749472, + "tokens_seen": 1325924352 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006041566361739689, + "loss": 0.0726, + "theoretical_loss": 3.583391988592301, + "tokens_seen": 1326186496 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006040763922323864, + "loss": 0.0748, + "theoretical_loss": 3.5833285674836164, + "tokens_seen": 1326448640 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006039961482908041, + "loss": 0.0723, + "theoretical_loss": 3.583265162416187, + "tokens_seen": 1326710784 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006039159043492216, + "loss": 0.0723, + "theoretical_loss": 3.583201773382788, + "tokens_seen": 1326972928 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0011210001539438963, + "objective/train/docs_used": 485470, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5233768224716187, + "objective/train/original_loss": 1.5233769416809082, + "objective/train/theoretical_loss": 3.5831700848765924, + "objective/train/tokens_used": 1347564000, + "objective/train/value_avg": -0.0078277587890625, + "objective/train/value_loss": 0.0003868304193019867, + "objective/train/value_max": -7.31348991394043e-05, + "objective/train/value_min": -0.60107421875, + "objective/train/value_reward_corr": 0.5873188182429645, + "objective/train/value_std": 0.0148773193359375, + "objective/train/weight_avg": 1.0012885332107544, + "objective/train/weighted_lm_loss": 1.5247997045516968, + "objective/train/weights_max": 1.3386691808700562, + "objective/train/weights_min": 0.3721534311771393, + "theoretical_loss": 3.5831700848765924, + "tokens_seen": 1327104000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006038356604076392, + "loss": 0.0732, + "theoretical_loss": 3.583138400376197, + "tokens_seen": 1327235072 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006037554164660568, + "loss": 0.071, + "theoretical_loss": 3.583075043389199, + "tokens_seen": 1327497216 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006036751725244744, + "loss": 0.073, + "theoretical_loss": 3.583011702414581, + "tokens_seen": 1327759360 + }, + { + "epoch": 0.4, + "learning_rate": 0.000603594928582892, + "loss": 0.0735, + "theoretical_loss": 3.5829483774451374, + "tokens_seen": 1328021504 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006035146846413096, + "loss": 0.0711, + "theoretical_loss": 3.582885068473665, + "tokens_seen": 1328283648 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006034344406997272, + "loss": 0.0726, + "theoretical_loss": 3.582821775492966, + "tokens_seen": 1328545792 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006033541967581448, + "loss": 0.0711, + "theoretical_loss": 3.5827584984958474, + "tokens_seen": 1328807936 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006032739528165624, + "loss": 0.0733, + "theoretical_loss": 3.582695237475121, + "tokens_seen": 1329070080 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006031937088749799, + "loss": 0.071, + "theoretical_loss": 3.582631992423603, + "tokens_seen": 1329332224 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006031134649333976, + "loss": 0.0708, + "theoretical_loss": 3.582568763334115, + "tokens_seen": 1329594368 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006030332209918151, + "loss": 0.0684, + "theoretical_loss": 3.582505550199481, + "tokens_seen": 1329856512 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006029529770502326, + "loss": 0.0708, + "theoretical_loss": 3.5824423530125324, + "tokens_seen": 1330118656 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0005511230556294322, + "objective/train/docs_used": 486514, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4543170928955078, + "objective/train/original_loss": 1.4543169736862183, + "objective/train/theoretical_loss": 3.582379171766104, + "objective/train/tokens_used": 1350840800, + "objective/train/value_avg": -0.00664520263671875, + "objective/train/value_loss": 0.0001335861161351204, + "objective/train/value_max": -9.918212890625e-05, + "objective/train/value_min": -0.250244140625, + "objective/train/value_reward_corr": 0.6410735725075329, + "objective/train/value_std": 0.01018524169921875, + "objective/train/weight_avg": 1.0006134510040283, + "objective/train/weighted_lm_loss": 1.4551430940628052, + "objective/train/weights_max": 1.1343940496444702, + "objective/train/weights_min": 0.3683467209339142, + "theoretical_loss": 3.582379171766104, + "tokens_seen": 1330380800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006028727331086503, + "loss": 0.0718, + "theoretical_loss": 3.582379171766104, + "tokens_seen": 1330380800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006027924891670679, + "loss": 0.0722, + "theoretical_loss": 3.582316006453034, + "tokens_seen": 1330642944 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006027122452254855, + "loss": 0.0721, + "theoretical_loss": 3.5822528570661683, + "tokens_seen": 1330905088 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006026320012839031, + "loss": 0.0731, + "theoretical_loss": 3.582189723598354, + "tokens_seen": 1331167232 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006025517573423207, + "loss": 0.0722, + "theoretical_loss": 3.582126606042446, + "tokens_seen": 1331429376 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006024715134007382, + "loss": 0.072, + "theoretical_loss": 3.5820635043913005, + "tokens_seen": 1331691520 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006023912694591559, + "loss": 0.0732, + "theoretical_loss": 3.582000418637781, + "tokens_seen": 1331953664 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006023110255175734, + "loss": 0.0686, + "theoretical_loss": 3.581937348774755, + "tokens_seen": 1332215808 + }, + { + "epoch": 0.4, + "learning_rate": 0.000602230781575991, + "loss": 0.0723, + "theoretical_loss": 3.5818742947950932, + "tokens_seen": 1332477952 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006021505376344086, + "loss": 0.0726, + "theoretical_loss": 3.5818112566916724, + "tokens_seen": 1332740096 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006020702936928261, + "loss": 0.0709, + "theoretical_loss": 3.5817482344573746, + "tokens_seen": 1333002240 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006019900497512439, + "loss": 0.0728, + "theoretical_loss": 3.5816852280850835, + "tokens_seen": 1333264384 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006019098058096614, + "loss": 0.0704, + "theoretical_loss": 3.5816222375676903, + "tokens_seen": 1333526528 + }, + { + "epoch": 0.4, + "objective/train/advantage_avg": 0.0011593915987759829, + "objective/train/docs_used": 487665, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3809312582015991, + "objective/train/original_loss": 1.3809311389923096, + "objective/train/theoretical_loss": 3.58159074825236, + "objective/train/tokens_used": 1354117600, + "objective/train/value_avg": -0.00823211669921875, + "objective/train/value_loss": 0.000309052033117041, + "objective/train/value_max": -0.00010889768600463867, + "objective/train/value_min": -0.958984375, + "objective/train/value_reward_corr": 0.7228285026400678, + "objective/train/value_std": 0.0185089111328125, + "objective/train/weight_avg": 1.001293659210205, + "objective/train/weighted_lm_loss": 1.3820528984069824, + "objective/train/weights_max": 1.5919711589813232, + "objective/train/weights_min": 0.38147324323654175, + "theoretical_loss": 3.58159074825236, + "tokens_seen": 1333657600 + }, + { + "epoch": 0.4, + "learning_rate": 0.000601829561868079, + "loss": 0.0721, + "theoretical_loss": 3.58155926289809, + "tokens_seen": 1333788672 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006017493179264966, + "loss": 0.0687, + "theoretical_loss": 3.581496304069181, + "tokens_seen": 1334050816 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006016690739849141, + "loss": 0.0735, + "theoretical_loss": 3.5814333610738673, + "tokens_seen": 1334312960 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006015888300433317, + "loss": 0.0719, + "theoretical_loss": 3.5813704339050583, + "tokens_seen": 1334575104 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006015085861017493, + "loss": 0.0717, + "theoretical_loss": 3.581307522555666, + "tokens_seen": 1334837248 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006014283421601669, + "loss": 0.0694, + "theoretical_loss": 3.5812446270186085, + "tokens_seen": 1335099392 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006013480982185844, + "loss": 0.0709, + "theoretical_loss": 3.5811817472868075, + "tokens_seen": 1335361536 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006012678542770022, + "loss": 0.0704, + "theoretical_loss": 3.5811188833531897, + "tokens_seen": 1335623680 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006011876103354197, + "loss": 0.068, + "theoretical_loss": 3.5810560352106866, + "tokens_seen": 1335885824 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006011073663938372, + "loss": 0.0686, + "theoretical_loss": 3.580993202852234, + "tokens_seen": 1336147968 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006010271224522549, + "loss": 0.0675, + "theoretical_loss": 3.580930386270772, + "tokens_seen": 1336410112 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006009468785106724, + "loss": 0.0718, + "theoretical_loss": 3.5808675854592464, + "tokens_seen": 1336672256 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -8.30290955491364e-05, + "objective/train/docs_used": 488954, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2475985288619995, + "objective/train/original_loss": 1.247598648071289, + "objective/train/theoretical_loss": 3.5808048004106054, + "objective/train/tokens_used": 1357394400, + "objective/train/value_avg": -0.00824737548828125, + "objective/train/value_loss": 0.00022322498261928558, + "objective/train/value_max": -4.1961669921875e-05, + "objective/train/value_min": -0.374755859375, + "objective/train/value_reward_corr": 0.6647625277203212, + "objective/train/value_std": 0.0136566162109375, + "objective/train/weight_avg": 1.0000206232070923, + "objective/train/weighted_lm_loss": 1.2482742071151733, + "objective/train/weights_max": 1.1859358549118042, + "objective/train/weights_min": 0.3940271735191345, + "theoretical_loss": 3.5808048004106054, + "tokens_seen": 1336934400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006008666345690901, + "loss": 0.0717, + "theoretical_loss": 3.5808048004106054, + "tokens_seen": 1336934400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006007863906275076, + "loss": 0.071, + "theoretical_loss": 3.5807420311178033, + "tokens_seen": 1337196544 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006007061466859252, + "loss": 0.071, + "theoretical_loss": 3.580679277573799, + "tokens_seen": 1337458688 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006006259027443429, + "loss": 0.0714, + "theoretical_loss": 3.5806165397715546, + "tokens_seen": 1337720832 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006005456588027604, + "loss": 0.0717, + "theoretical_loss": 3.580553817704039, + "tokens_seen": 1337982976 + }, + { + "epoch": 0.41, + "learning_rate": 0.000600465414861178, + "loss": 0.0696, + "theoretical_loss": 3.580491111364223, + "tokens_seen": 1338245120 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006003851709195956, + "loss": 0.0693, + "theoretical_loss": 3.5804284207450836, + "tokens_seen": 1338507264 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006003049269780132, + "loss": 0.0682, + "theoretical_loss": 3.580365745839602, + "tokens_seen": 1338769408 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006002246830364307, + "loss": 0.0719, + "theoretical_loss": 3.5803030866407637, + "tokens_seen": 1339031552 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006001444390948484, + "loss": 0.0701, + "theoretical_loss": 3.580240443141559, + "tokens_seen": 1339293696 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006000641951532659, + "loss": 0.0715, + "theoretical_loss": 3.5801778153349817, + "tokens_seen": 1339555840 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005999839512116834, + "loss": 0.0722, + "theoretical_loss": 3.5801152032140315, + "tokens_seen": 1339817984 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005999037072701011, + "loss": 0.0698, + "theoretical_loss": 3.580052606771712, + "tokens_seen": 1340080128 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0011167412158101797, + "objective/train/docs_used": 490192, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4070236682891846, + "objective/train/original_loss": 1.4070236682891846, + "objective/train/theoretical_loss": 3.580021314427854, + "objective/train/tokens_used": 1360671200, + "objective/train/value_avg": -0.00506591796875, + "objective/train/value_loss": 0.00011695722059812397, + "objective/train/value_max": -6.157159805297852e-05, + "objective/train/value_min": -0.1492919921875, + "objective/train/value_reward_corr": 0.4796502469239035, + "objective/train/value_std": 0.006679534912109375, + "objective/train/weight_avg": 1.0011686086654663, + "objective/train/weighted_lm_loss": 1.408930778503418, + "objective/train/weights_max": 1.1452467441558838, + "objective/train/weights_min": 0.3836681544780731, + "theoretical_loss": 3.580021314427854, + "tokens_seen": 1340211200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005998234633285187, + "loss": 0.0697, + "theoretical_loss": 3.579990026001031, + "tokens_seen": 1340342272 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005997432193869364, + "loss": 0.072, + "theoretical_loss": 3.579927460895002, + "tokens_seen": 1340604416 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005996629754453539, + "loss": 0.074, + "theoretical_loss": 3.5798649114466405, + "tokens_seen": 1340866560 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005995827315037715, + "loss": 0.0703, + "theoretical_loss": 3.579802377648969, + "tokens_seen": 1341128704 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005995024875621891, + "loss": 0.0724, + "theoretical_loss": 3.579739859495013, + "tokens_seen": 1341390848 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005994222436206067, + "loss": 0.072, + "theoretical_loss": 3.5796773569778026, + "tokens_seen": 1341652992 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005993419996790242, + "loss": 0.0712, + "theoretical_loss": 3.579614870090374, + "tokens_seen": 1341915136 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005992617557374418, + "loss": 0.075, + "theoretical_loss": 3.5795523988257654, + "tokens_seen": 1342177280 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005991815117958594, + "loss": 0.072, + "theoretical_loss": 3.5794899431770215, + "tokens_seen": 1342439424 + }, + { + "epoch": 0.41, + "learning_rate": 0.000599101267854277, + "loss": 0.0732, + "theoretical_loss": 3.5794275031371896, + "tokens_seen": 1342701568 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005990210239126947, + "loss": 0.0694, + "theoretical_loss": 3.579365078699323, + "tokens_seen": 1342963712 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005989407799711122, + "loss": 0.072, + "theoretical_loss": 3.579302669856479, + "tokens_seen": 1343225856 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.000923873158171773, + "objective/train/docs_used": 490931, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.553824543952942, + "objective/train/original_loss": 1.553824543952942, + "objective/train/theoretical_loss": 3.5792402766017197, + "objective/train/tokens_used": 1363948000, + "objective/train/value_avg": -0.007419586181640625, + "objective/train/value_loss": 0.0002518415276426822, + "objective/train/value_max": -8.481740951538086e-05, + "objective/train/value_min": -0.54833984375, + "objective/train/value_reward_corr": 0.6071947482415825, + "objective/train/value_std": 0.01244354248046875, + "objective/train/weight_avg": 1.0010340213775635, + "objective/train/weighted_lm_loss": 1.5558077096939087, + "objective/train/weights_max": 1.2336021661758423, + "objective/train/weights_min": 0.3683885335922241, + "theoretical_loss": 3.5792402766017197, + "tokens_seen": 1343488000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005988605360295298, + "loss": 0.0702, + "theoretical_loss": 3.5792402766017197, + "tokens_seen": 1343488000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005987802920879474, + "loss": 0.0693, + "theoretical_loss": 3.57917789892811, + "tokens_seen": 1343750144 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005987000481463649, + "loss": 0.0692, + "theoretical_loss": 3.579115536828721, + "tokens_seen": 1344012288 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005986198042047825, + "loss": 0.0701, + "theoretical_loss": 3.5790531902966274, + "tokens_seen": 1344274432 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005985395602632001, + "loss": 0.0698, + "theoretical_loss": 3.578990859324909, + "tokens_seen": 1344536576 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005984593163216177, + "loss": 0.0698, + "theoretical_loss": 3.5789285439066494, + "tokens_seen": 1344798720 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005983790723800354, + "loss": 0.0701, + "theoretical_loss": 3.578866244034937, + "tokens_seen": 1345060864 + }, + { + "epoch": 0.41, + "learning_rate": 0.000598298828438453, + "loss": 0.0733, + "theoretical_loss": 3.5788039597028636, + "tokens_seen": 1345323008 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005982185844968705, + "loss": 0.0677, + "theoretical_loss": 3.5787416909035272, + "tokens_seen": 1345585152 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005981383405552881, + "loss": 0.0707, + "theoretical_loss": 3.578679437630029, + "tokens_seen": 1345847296 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005980580966137057, + "loss": 0.072, + "theoretical_loss": 3.5786171998754748, + "tokens_seen": 1346109440 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005979778526721232, + "loss": 0.0697, + "theoretical_loss": 3.5785549776329746, + "tokens_seen": 1346371584 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005978976087305409, + "loss": 0.0697, + "theoretical_loss": 3.578492770895643, + "tokens_seen": 1346633728 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0012662590015679598, + "objective/train/docs_used": 492127, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3743919134140015, + "objective/train/original_loss": 1.3743919134140015, + "objective/train/theoretical_loss": 3.5784616733392656, + "objective/train/tokens_used": 1367224800, + "objective/train/value_avg": -0.0101776123046875, + "objective/train/value_loss": 0.0002954881056211889, + "objective/train/value_max": -7.033348083496094e-05, + "objective/train/value_min": -0.70166015625, + "objective/train/value_reward_corr": 0.7162222528307167, + "objective/train/value_std": 0.0204315185546875, + "objective/train/weight_avg": 1.0014090538024902, + "objective/train/weighted_lm_loss": 1.3758933544158936, + "objective/train/weights_max": 1.8491084575653076, + "objective/train/weights_min": 0.37020933628082275, + "theoretical_loss": 3.5784616733392656, + "tokens_seen": 1346764800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005978173647889584, + "loss": 0.0675, + "theoretical_loss": 3.5784305796566, + "tokens_seen": 1346895872 + }, + { + "epoch": 0.41, + "learning_rate": 0.000597737120847376, + "loss": 0.0706, + "theoretical_loss": 3.5783684039089687, + "tokens_seen": 1347158016 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005976568769057937, + "loss": 0.069, + "theoretical_loss": 3.578306243645876, + "tokens_seen": 1347420160 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005975766329642112, + "loss": 0.0685, + "theoretical_loss": 3.5782440988604547, + "tokens_seen": 1347682304 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005974963890226288, + "loss": 0.07, + "theoretical_loss": 3.5781819695458417, + "tokens_seen": 1347944448 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005974161450810464, + "loss": 0.0718, + "theoretical_loss": 3.578119855695178, + "tokens_seen": 1348206592 + }, + { + "epoch": 0.41, + "learning_rate": 0.000597335901139464, + "loss": 0.0702, + "theoretical_loss": 3.5780577573016084, + "tokens_seen": 1348468736 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005972556571978816, + "loss": 0.0726, + "theoretical_loss": 3.5779956743582835, + "tokens_seen": 1348730880 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005971754132562992, + "loss": 0.0706, + "theoretical_loss": 3.5779336068583563, + "tokens_seen": 1348993024 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005970951693147167, + "loss": 0.0716, + "theoretical_loss": 3.577871554794986, + "tokens_seen": 1349255168 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005970149253731343, + "loss": 0.0679, + "theoretical_loss": 3.5778095181613354, + "tokens_seen": 1349517312 + }, + { + "epoch": 0.41, + "learning_rate": 0.000596934681431552, + "loss": 0.0713, + "theoretical_loss": 3.577747496950572, + "tokens_seen": 1349779456 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.00041352870175614953, + "objective/train/docs_used": 493399, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4604887962341309, + "objective/train/original_loss": 1.46048903465271, + "objective/train/theoretical_loss": 3.577685491155866, + "objective/train/tokens_used": 1370501600, + "objective/train/value_avg": -0.006885528564453125, + "objective/train/value_loss": 0.0001873795554274693, + "objective/train/value_max": -9.608268737792969e-05, + "objective/train/value_min": -0.3330078125, + "objective/train/value_reward_corr": 0.638973367373221, + "objective/train/value_std": 0.0112152099609375, + "objective/train/weight_avg": 1.0004981756210327, + "objective/train/weighted_lm_loss": 1.4614195823669434, + "objective/train/weights_max": 1.1102087497711182, + "objective/train/weights_min": 0.3691481649875641, + "theoretical_loss": 3.577685491155866, + "tokens_seen": 1350041600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005968544374899695, + "loss": 0.0717, + "theoretical_loss": 3.577685491155866, + "tokens_seen": 1350041600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005967741935483872, + "loss": 0.0715, + "theoretical_loss": 3.577623500770394, + "tokens_seen": 1350303744 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005966939496068047, + "loss": 0.0705, + "theoretical_loss": 3.577561525787337, + "tokens_seen": 1350565888 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005966137056652223, + "loss": 0.0696, + "theoretical_loss": 3.5774995661998785, + "tokens_seen": 1350828032 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005965334617236399, + "loss": 0.0701, + "theoretical_loss": 3.5774376220012085, + "tokens_seen": 1351090176 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005964532177820575, + "loss": 0.0732, + "theoretical_loss": 3.5773756931845186, + "tokens_seen": 1351352320 + }, + { + "epoch": 0.41, + "learning_rate": 0.000596372973840475, + "loss": 0.0711, + "theoretical_loss": 3.5773137797430077, + "tokens_seen": 1351614464 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005962927298988926, + "loss": 0.0718, + "theoretical_loss": 3.577251881669877, + "tokens_seen": 1351876608 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005962124859573102, + "loss": 0.0756, + "theoretical_loss": 3.5771899989583336, + "tokens_seen": 1352138752 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005961322420157277, + "loss": 0.0694, + "theoretical_loss": 3.577128131601587, + "tokens_seen": 1352400896 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005960519980741455, + "loss": 0.0677, + "theoretical_loss": 3.5770662795928527, + "tokens_seen": 1352663040 + }, + { + "epoch": 0.41, + "learning_rate": 0.000595971754132563, + "loss": 0.0681, + "theoretical_loss": 3.5770044429253494, + "tokens_seen": 1352925184 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005958915101909807, + "loss": 0.0701, + "theoretical_loss": 3.576942621592301, + "tokens_seen": 1353187328 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -8.931478078011423e-05, + "objective/train/docs_used": 494475, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3972373008728027, + "objective/train/original_loss": 1.3972374200820923, + "objective/train/theoretical_loss": 3.5769117166740805, + "objective/train/tokens_used": 1373778400, + "objective/train/value_avg": -0.00982666015625, + "objective/train/value_loss": 0.00028727849712595344, + "objective/train/value_max": -4.297494888305664e-05, + "objective/train/value_min": -0.77197265625, + "objective/train/value_reward_corr": 0.7621639892533103, + "objective/train/value_std": 0.0201416015625, + "objective/train/weight_avg": 1.0000455379486084, + "objective/train/weighted_lm_loss": 1.3973900079727173, + "objective/train/weights_max": 1.478626012802124, + "objective/train/weights_min": 0.3716852366924286, + "theoretical_loss": 3.5769117166740805, + "tokens_seen": 1353318400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005958112662493982, + "loss": 0.071, + "theoretical_loss": 3.576880815586935, + "tokens_seen": 1353449472 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005957310223078157, + "loss": 0.0714, + "theoretical_loss": 3.576819024902483, + "tokens_seen": 1353711616 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005956507783662334, + "loss": 0.074, + "theoretical_loss": 3.576757249532183, + "tokens_seen": 1353973760 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005955705344246509, + "loss": 0.0697, + "theoretical_loss": 3.576695489469274, + "tokens_seen": 1354235904 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005954902904830685, + "loss": 0.0713, + "theoretical_loss": 3.5766337447070016, + "tokens_seen": 1354498048 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005954100465414862, + "loss": 0.0707, + "theoretical_loss": 3.5765720152386153, + "tokens_seen": 1354760192 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005953298025999038, + "loss": 0.0706, + "theoretical_loss": 3.5765103010573682, + "tokens_seen": 1355022336 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005952495586583213, + "loss": 0.071, + "theoretical_loss": 3.576448602156518, + "tokens_seen": 1355284480 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005951693147167389, + "loss": 0.0694, + "theoretical_loss": 3.5763869185293276, + "tokens_seen": 1355546624 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005950890707751565, + "loss": 0.0729, + "theoretical_loss": 3.576325250169062, + "tokens_seen": 1355808768 + }, + { + "epoch": 0.41, + "learning_rate": 0.000595008826833574, + "loss": 0.0696, + "theoretical_loss": 3.5762635970689933, + "tokens_seen": 1356070912 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005949285828919917, + "loss": 0.0688, + "theoretical_loss": 3.576201959222396, + "tokens_seen": 1356333056 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0009401834686286747, + "objective/train/docs_used": 495730, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5935755968093872, + "objective/train/original_loss": 1.5935757160186768, + "objective/train/theoretical_loss": 3.576140336622548, + "objective/train/tokens_used": 1377055200, + "objective/train/value_avg": -0.01068878173828125, + "objective/train/value_loss": 0.0004906124086119235, + "objective/train/value_max": -7.253885269165039e-05, + "objective/train/value_min": -0.935546875, + "objective/train/value_reward_corr": 0.8641266897975199, + "objective/train/value_std": 0.038726806640625, + "objective/train/weight_avg": 1.0011733770370483, + "objective/train/weighted_lm_loss": 1.594736933708191, + "objective/train/weights_max": 1.7491079568862915, + "objective/train/weights_min": 0.3695010542869568, + "theoretical_loss": 3.576140336622548, + "tokens_seen": 1356595200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005948483389504092, + "loss": 0.07, + "theoretical_loss": 3.576140336622548, + "tokens_seen": 1356595200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005947680950088269, + "loss": 0.0727, + "theoretical_loss": 3.5760787292627345, + "tokens_seen": 1356857344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005946878510672444, + "loss": 0.0726, + "theoretical_loss": 3.576017137136242, + "tokens_seen": 1357119488 + }, + { + "epoch": 0.41, + "learning_rate": 0.000594607607125662, + "loss": 0.071, + "theoretical_loss": 3.5759555602363635, + "tokens_seen": 1357381632 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005945273631840797, + "loss": 0.0679, + "theoretical_loss": 3.5758939985563942, + "tokens_seen": 1357643776 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005944471192424972, + "loss": 0.0708, + "theoretical_loss": 3.5758324520896347, + "tokens_seen": 1357905920 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005943668753009148, + "loss": 0.0717, + "theoretical_loss": 3.57577092082939, + "tokens_seen": 1358168064 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005942866313593324, + "loss": 0.071, + "theoretical_loss": 3.5757094047689684, + "tokens_seen": 1358430208 + }, + { + "epoch": 0.41, + "learning_rate": 0.00059420638741775, + "loss": 0.0695, + "theoretical_loss": 3.575647903901684, + "tokens_seen": 1358692352 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005941261434761675, + "loss": 0.0715, + "theoretical_loss": 3.575586418220853, + "tokens_seen": 1358954496 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005940458995345851, + "loss": 0.0717, + "theoretical_loss": 3.5755249477197983, + "tokens_seen": 1359216640 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005939656555930027, + "loss": 0.0695, + "theoretical_loss": 3.5754634923918447, + "tokens_seen": 1359478784 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005938854116514203, + "loss": 0.0695, + "theoretical_loss": 3.5754020522303227, + "tokens_seen": 1359740928 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0009771112818270922, + "objective/train/docs_used": 496861, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3605506420135498, + "objective/train/original_loss": 1.3605504035949707, + "objective/train/theoretical_loss": 3.5753713378348904, + "objective/train/tokens_used": 1380332000, + "objective/train/value_avg": -0.0107574462890625, + "objective/train/value_loss": 0.0002261370827909559, + "objective/train/value_max": -7.659196853637695e-05, + "objective/train/value_min": -0.440673828125, + "objective/train/value_reward_corr": 0.701051009387965, + "objective/train/value_std": 0.0171356201171875, + "objective/train/weight_avg": 1.001084804534912, + "objective/train/weighted_lm_loss": 1.3605451583862305, + "objective/train/weights_max": 1.1667248010635376, + "objective/train/weights_min": 0.5305235981941223, + "theoretical_loss": 3.5753713378348904, + "tokens_seen": 1359872000 + }, + { + "epoch": 0.41, + "learning_rate": 0.000593805167709838, + "loss": 0.0697, + "theoretical_loss": 3.575340627228566, + "tokens_seen": 1360003072 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005937249237682555, + "loss": 0.0711, + "theoretical_loss": 3.575279217379914, + "tokens_seen": 1360265216 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005936446798266731, + "loss": 0.0737, + "theoretical_loss": 3.575217822677709, + "tokens_seen": 1360527360 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005935644358850907, + "loss": 0.0698, + "theoretical_loss": 3.575156443115297, + "tokens_seen": 1360789504 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005934841919435082, + "loss": 0.0699, + "theoretical_loss": 3.5750950786860307, + "tokens_seen": 1361051648 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005934039480019259, + "loss": 0.0703, + "theoretical_loss": 3.5750337293832644, + "tokens_seen": 1361313792 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005933237040603434, + "loss": 0.0681, + "theoretical_loss": 3.5749723952003576, + "tokens_seen": 1361575936 + }, + { + "epoch": 0.41, + "learning_rate": 0.000593243460118761, + "loss": 0.0741, + "theoretical_loss": 3.5749110761306744, + "tokens_seen": 1361838080 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005931632161771787, + "loss": 0.0693, + "theoretical_loss": 3.5748497721675823, + "tokens_seen": 1362100224 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005930829722355963, + "loss": 0.0722, + "theoretical_loss": 3.574788483304453, + "tokens_seen": 1362362368 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005930027282940138, + "loss": 0.0698, + "theoretical_loss": 3.5747272095346636, + "tokens_seen": 1362624512 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005929224843524315, + "loss": 0.0718, + "theoretical_loss": 3.5746659508515943, + "tokens_seen": 1362886656 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.0009679661598056555, + "objective/train/docs_used": 498147, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4032081365585327, + "objective/train/original_loss": 1.4032080173492432, + "objective/train/theoretical_loss": 3.5746047072486293, + "objective/train/tokens_used": 1383608800, + "objective/train/value_avg": -0.006671905517578125, + "objective/train/value_loss": 0.00034899654565379024, + "objective/train/value_max": -7.486343383789062e-05, + "objective/train/value_min": -0.732421875, + "objective/train/value_reward_corr": 0.5755574112170757, + "objective/train/value_std": 0.0115966796875, + "objective/train/weight_avg": 1.0011147260665894, + "objective/train/weighted_lm_loss": 1.4046744108200073, + "objective/train/weights_max": 1.3812626600265503, + "objective/train/weights_min": 0.3844299912452698, + "theoretical_loss": 3.5746047072486293, + "tokens_seen": 1363148800 + }, + { + "epoch": 0.41, + "learning_rate": 0.000592842240410849, + "loss": 0.0711, + "theoretical_loss": 3.5746047072486293, + "tokens_seen": 1363148800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005927619964692665, + "loss": 0.0691, + "theoretical_loss": 3.574543478719158, + "tokens_seen": 1363410944 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005926817525276842, + "loss": 0.07, + "theoretical_loss": 3.5744822652565724, + "tokens_seen": 1363673088 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005926015085861017, + "loss": 0.0727, + "theoretical_loss": 3.5744210668542706, + "tokens_seen": 1363935232 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005925212646445193, + "loss": 0.0722, + "theoretical_loss": 3.574359883505653, + "tokens_seen": 1364197376 + }, + { + "epoch": 0.41, + "learning_rate": 0.000592441020702937, + "loss": 0.0723, + "theoretical_loss": 3.5742987152041255, + "tokens_seen": 1364459520 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005923607767613546, + "loss": 0.0688, + "theoretical_loss": 3.574237561943098, + "tokens_seen": 1364721664 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005922805328197721, + "loss": 0.0713, + "theoretical_loss": 3.5741764237159837, + "tokens_seen": 1364983808 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005922002888781897, + "loss": 0.0721, + "theoretical_loss": 3.5741153005162003, + "tokens_seen": 1365245952 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005921200449366073, + "loss": 0.073, + "theoretical_loss": 3.57405419233717, + "tokens_seen": 1365508096 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005920398009950249, + "loss": 0.0712, + "theoretical_loss": 3.5739930991723194, + "tokens_seen": 1365770240 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005919595570534425, + "loss": 0.0736, + "theoretical_loss": 3.5739320210150787, + "tokens_seen": 1366032384 + }, + { + "epoch": 0.41, + "learning_rate": 0.00059187931311186, + "loss": 0.0713, + "theoretical_loss": 3.5738709578588814, + "tokens_seen": 1366294528 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": -0.0008065802976489067, + "objective/train/docs_used": 499217, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3456385135650635, + "objective/train/original_loss": 1.345638632774353, + "objective/train/theoretical_loss": 3.573840431904124, + "objective/train/tokens_used": 1386885600, + "objective/train/value_avg": -0.007415771484375, + "objective/train/value_loss": 0.00022081122733652592, + "objective/train/value_max": -5.650520324707031e-05, + "objective/train/value_min": -0.2288818359375, + "objective/train/value_reward_corr": 0.6760963179560076, + "objective/train/value_std": 0.01129913330078125, + "objective/train/weight_avg": 0.9992921948432922, + "objective/train/weighted_lm_loss": 1.3444547653198242, + "objective/train/weights_max": 1.128661870956421, + "objective/train/weights_min": 0.3694700598716736, + "theoretical_loss": 3.573840431904124, + "tokens_seen": 1366425600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005917990691702777, + "loss": 0.0688, + "theoretical_loss": 3.573809909697167, + "tokens_seen": 1366556672 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005917188252286952, + "loss": 0.0701, + "theoretical_loss": 3.573748876523379, + "tokens_seen": 1366818816 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005916385812871128, + "loss": 0.0717, + "theoretical_loss": 3.5736878583309624, + "tokens_seen": 1367080960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005915583373455305, + "loss": 0.0716, + "theoretical_loss": 3.5736268551133694, + "tokens_seen": 1367343104 + }, + { + "epoch": 0.41, + "learning_rate": 0.000591478093403948, + "loss": 0.0725, + "theoretical_loss": 3.5735658668640538, + "tokens_seen": 1367605248 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005913978494623656, + "loss": 0.0699, + "theoretical_loss": 3.573504893576476, + "tokens_seen": 1367867392 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005913176055207832, + "loss": 0.0699, + "theoretical_loss": 3.573443935244099, + "tokens_seen": 1368129536 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005912373615792008, + "loss": 0.0702, + "theoretical_loss": 3.5733829918603903, + "tokens_seen": 1368391680 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005911571176376183, + "loss": 0.0728, + "theoretical_loss": 3.573322063418821, + "tokens_seen": 1368653824 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005910768736960359, + "loss": 0.0711, + "theoretical_loss": 3.5732611499128666, + "tokens_seen": 1368915968 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005909966297544535, + "loss": 0.07, + "theoretical_loss": 3.5732002513360075, + "tokens_seen": 1369178112 + }, + { + "epoch": 0.41, + "learning_rate": 0.0005909163858128712, + "loss": 0.0699, + "theoretical_loss": 3.5731393676817267, + "tokens_seen": 1369440256 + }, + { + "epoch": 0.41, + "objective/train/advantage_avg": 0.00028625421691685915, + "objective/train/docs_used": 500422, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4607715606689453, + "objective/train/original_loss": 1.4607714414596558, + "objective/train/theoretical_loss": 3.573078498943513, + "objective/train/tokens_used": 1390162400, + "objective/train/value_avg": -0.0100250244140625, + "objective/train/value_loss": 0.00033545782207511365, + "objective/train/value_max": -5.519390106201172e-05, + "objective/train/value_min": -0.65869140625, + "objective/train/value_reward_corr": 0.753396177125751, + "objective/train/value_std": 0.0200042724609375, + "objective/train/weight_avg": 1.000441312789917, + "objective/train/weighted_lm_loss": 1.4615808725357056, + "objective/train/weights_max": 1.7887964248657227, + "objective/train/weights_min": 0.40708115696907043, + "theoretical_loss": 3.573078498943513, + "tokens_seen": 1369702400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005908361418712888, + "loss": 0.071, + "theoretical_loss": 3.573078498943513, + "tokens_seen": 1369702400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005907558979297063, + "loss": 0.0715, + "theoretical_loss": 3.5730176451148568, + "tokens_seen": 1369964544 + }, + { + "epoch": 0.42, + "learning_rate": 0.000590675653988124, + "loss": 0.0699, + "theoretical_loss": 3.572956806189256, + "tokens_seen": 1370226688 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005905954100465415, + "loss": 0.071, + "theoretical_loss": 3.5728959821602095, + "tokens_seen": 1370488832 + }, + { + "epoch": 0.42, + "learning_rate": 0.000590515166104959, + "loss": 0.0699, + "theoretical_loss": 3.5728351730212218, + "tokens_seen": 1370750976 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005904349221633767, + "loss": 0.0708, + "theoretical_loss": 3.5727743787658017, + "tokens_seen": 1371013120 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005903546782217942, + "loss": 0.0746, + "theoretical_loss": 3.572713599387461, + "tokens_seen": 1371275264 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005902744342802118, + "loss": 0.0694, + "theoretical_loss": 3.572652834879716, + "tokens_seen": 1371537408 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005901941903386295, + "loss": 0.0691, + "theoretical_loss": 3.5725920852360877, + "tokens_seen": 1371799552 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005901139463970471, + "loss": 0.0687, + "theoretical_loss": 3.5725313504501006, + "tokens_seen": 1372061696 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005900337024554646, + "loss": 0.0725, + "theoretical_loss": 3.5724706305152827, + "tokens_seen": 1372323840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005899534585138823, + "loss": 0.0668, + "theoretical_loss": 3.572409925425167, + "tokens_seen": 1372585984 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005898732145722998, + "loss": 0.0709, + "theoretical_loss": 3.5723492351732906, + "tokens_seen": 1372848128 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 8.659496234031394e-05, + "objective/train/docs_used": 501474, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2795002460479736, + "objective/train/original_loss": 1.2795000076293945, + "objective/train/theoretical_loss": 3.5723188956096736, + "objective/train/tokens_used": 1393439200, + "objective/train/value_avg": -0.0092010498046875, + "objective/train/value_loss": 0.0001278216950595379, + "objective/train/value_max": -8.821487426757812e-05, + "objective/train/value_min": -0.24072265625, + "objective/train/value_reward_corr": 0.8686309210454173, + "objective/train/value_std": 0.0202178955078125, + "objective/train/weight_avg": 1.0001461505889893, + "objective/train/weighted_lm_loss": 1.279416561126709, + "objective/train/weights_max": 1.1410603523254395, + "objective/train/weights_min": 0.38203245401382446, + "theoretical_loss": 3.5723188956096736, + "tokens_seen": 1372979200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005897929706307173, + "loss": 0.0701, + "theoretical_loss": 3.572288559753194, + "tokens_seen": 1373110272 + }, + { + "epoch": 0.42, + "learning_rate": 0.000589712726689135, + "loss": 0.0748, + "theoretical_loss": 3.5722278991584218, + "tokens_seen": 1373372416 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005896324827475525, + "loss": 0.0711, + "theoretical_loss": 3.572167253382523, + "tokens_seen": 1373634560 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005895522388059702, + "loss": 0.0741, + "theoretical_loss": 3.5721066224190503, + "tokens_seen": 1373896704 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005894719948643878, + "loss": 0.0701, + "theoretical_loss": 3.572046006261561, + "tokens_seen": 1374158848 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005893917509228054, + "loss": 0.0678, + "theoretical_loss": 3.5719854049036153, + "tokens_seen": 1374420992 + }, + { + "epoch": 0.42, + "learning_rate": 0.000589311506981223, + "loss": 0.0694, + "theoretical_loss": 3.571924818338779, + "tokens_seen": 1374683136 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005892312630396405, + "loss": 0.0685, + "theoretical_loss": 3.5718642465606214, + "tokens_seen": 1374945280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005891510190980581, + "loss": 0.0685, + "theoretical_loss": 3.571803689562714, + "tokens_seen": 1375207424 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005890707751564757, + "loss": 0.0721, + "theoretical_loss": 3.571743147338635, + "tokens_seen": 1375469568 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005889905312148933, + "loss": 0.0719, + "theoretical_loss": 3.5716826198819653, + "tokens_seen": 1375731712 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005889102872733108, + "loss": 0.0734, + "theoretical_loss": 3.57162210718629, + "tokens_seen": 1375993856 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0006503405747935176, + "objective/train/docs_used": 502650, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2630902528762817, + "objective/train/original_loss": 1.2630900144577026, + "objective/train/theoretical_loss": 3.5715616092451983, + "objective/train/tokens_used": 1396716000, + "objective/train/value_avg": -0.00872039794921875, + "objective/train/value_loss": 0.00023866714036557823, + "objective/train/value_max": -8.749961853027344e-05, + "objective/train/value_min": -0.23876953125, + "objective/train/value_reward_corr": 0.7916243639009954, + "objective/train/value_std": 0.01435089111328125, + "objective/train/weight_avg": 1.000759482383728, + "objective/train/weighted_lm_loss": 1.2627712488174438, + "objective/train/weights_max": 1.1882543563842773, + "objective/train/weights_min": 0.3765632212162018, + "theoretical_loss": 3.5715616092451983, + "tokens_seen": 1376256000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005888300433317285, + "loss": 0.0692, + "theoretical_loss": 3.5715616092451983, + "tokens_seen": 1376256000 + }, + { + "epoch": 0.42, + "learning_rate": 0.000588749799390146, + "loss": 0.0675, + "theoretical_loss": 3.5715011260522824, + "tokens_seen": 1376518144 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005886695554485636, + "loss": 0.0701, + "theoretical_loss": 3.57144065760114, + "tokens_seen": 1376780288 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005885893115069813, + "loss": 0.071, + "theoretical_loss": 3.5713802038853726, + "tokens_seen": 1377042432 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005885090675653988, + "loss": 0.0675, + "theoretical_loss": 3.5713197648985844, + "tokens_seen": 1377304576 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005884288236238165, + "loss": 0.0723, + "theoretical_loss": 3.571259340634385, + "tokens_seen": 1377566720 + }, + { + "epoch": 0.42, + "learning_rate": 0.000588348579682234, + "loss": 0.0682, + "theoretical_loss": 3.5711989310863874, + "tokens_seen": 1377828864 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005882683357406516, + "loss": 0.0664, + "theoretical_loss": 3.571138536248209, + "tokens_seen": 1378091008 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005881880917990692, + "loss": 0.0715, + "theoretical_loss": 3.57107815611347, + "tokens_seen": 1378353152 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005881078478574867, + "loss": 0.072, + "theoretical_loss": 3.571017790675796, + "tokens_seen": 1378615296 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005880276039159043, + "loss": 0.0712, + "theoretical_loss": 3.570957439928815, + "tokens_seen": 1378877440 + }, + { + "epoch": 0.42, + "learning_rate": 0.000587947359974322, + "loss": 0.0708, + "theoretical_loss": 3.5708971038661614, + "tokens_seen": 1379139584 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005878671160327396, + "loss": 0.0694, + "theoretical_loss": 3.5708367824814715, + "tokens_seen": 1379401728 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0012014803942292929, + "objective/train/docs_used": 503826, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4277607202529907, + "objective/train/original_loss": 1.4277606010437012, + "objective/train/theoretical_loss": 3.5708066272913754, + "objective/train/tokens_used": 1399992800, + "objective/train/value_avg": -0.00450897216796875, + "objective/train/value_loss": 6.611572462134063e-05, + "objective/train/value_max": -2.4139881134033203e-05, + "objective/train/value_min": -0.134033203125, + "objective/train/value_reward_corr": 0.5657798775306075, + "objective/train/value_std": 0.006134033203125, + "objective/train/weight_avg": 1.0012338161468506, + "objective/train/weighted_lm_loss": 1.4301097393035889, + "objective/train/weights_max": 1.1097402572631836, + "objective/train/weights_min": 0.7171305418014526, + "theoretical_loss": 3.5708066272913754, + "tokens_seen": 1379532800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005877868720911571, + "loss": 0.0707, + "theoretical_loss": 3.570776475768386, + "tokens_seen": 1379663872 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005877066281495748, + "loss": 0.0714, + "theoretical_loss": 3.57071618372055, + "tokens_seen": 1379926016 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005876263842079923, + "loss": 0.0719, + "theoretical_loss": 3.570655906331612, + "tokens_seen": 1380188160 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005875461402664098, + "loss": 0.0708, + "theoretical_loss": 3.570595643595225, + "tokens_seen": 1380450304 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005874658963248275, + "loss": 0.0708, + "theoretical_loss": 3.570535395505045, + "tokens_seen": 1380712448 + }, + { + "epoch": 0.42, + "learning_rate": 0.000587385652383245, + "loss": 0.0682, + "theoretical_loss": 3.570475162054734, + "tokens_seen": 1380974592 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005873054084416626, + "loss": 0.0705, + "theoretical_loss": 3.570414943237956, + "tokens_seen": 1381236736 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005872251645000803, + "loss": 0.0706, + "theoretical_loss": 3.570354739048379, + "tokens_seen": 1381498880 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005871449205584979, + "loss": 0.0696, + "theoretical_loss": 3.5702945494796765, + "tokens_seen": 1381761024 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005870646766169155, + "loss": 0.0708, + "theoretical_loss": 3.5702343745255236, + "tokens_seen": 1382023168 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005869844326753331, + "loss": 0.0707, + "theoretical_loss": 3.5701742141796022, + "tokens_seen": 1382285312 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005869041887337506, + "loss": 0.0683, + "theoretical_loss": 3.570114068435595, + "tokens_seen": 1382547456 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0009983739582821727, + "objective/train/docs_used": 505055, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3534469604492188, + "objective/train/original_loss": 1.3534469604492188, + "objective/train/theoretical_loss": 3.570053937287192, + "objective/train/tokens_used": 1403269600, + "objective/train/value_avg": -0.01267242431640625, + "objective/train/value_loss": 0.00016829556261654943, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.450927734375, + "objective/train/value_reward_corr": 0.8653068777879614, + "objective/train/value_std": 0.0235748291015625, + "objective/train/weight_avg": 1.0010778903961182, + "objective/train/weighted_lm_loss": 1.354577898979187, + "objective/train/weights_max": 1.1801592111587524, + "objective/train/weights_min": 0.3766666650772095, + "theoretical_loss": 3.570053937287192, + "tokens_seen": 1382809600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005868239447921682, + "loss": 0.0718, + "theoretical_loss": 3.570053937287192, + "tokens_seen": 1382809600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005867437008505858, + "loss": 0.0704, + "theoretical_loss": 3.569993820728084, + "tokens_seen": 1383071744 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005866634569090033, + "loss": 0.0687, + "theoretical_loss": 3.569933718751967, + "tokens_seen": 1383333888 + }, + { + "epoch": 0.42, + "learning_rate": 0.000586583212967421, + "loss": 0.0705, + "theoretical_loss": 3.569873631352542, + "tokens_seen": 1383596032 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005865029690258386, + "loss": 0.0706, + "theoretical_loss": 3.5698135585235122, + "tokens_seen": 1383858176 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005864227250842562, + "loss": 0.0684, + "theoretical_loss": 3.5697535002585856, + "tokens_seen": 1384120320 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005863424811426738, + "loss": 0.0703, + "theoretical_loss": 3.569693456551474, + "tokens_seen": 1384382464 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005862622372010913, + "loss": 0.0722, + "theoretical_loss": 3.5696334273958925, + "tokens_seen": 1384644608 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005861819932595089, + "loss": 0.0702, + "theoretical_loss": 3.569573412785561, + "tokens_seen": 1384906752 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005861017493179265, + "loss": 0.0718, + "theoretical_loss": 3.569513412714203, + "tokens_seen": 1385168896 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005860215053763441, + "loss": 0.073, + "theoretical_loss": 3.569453427175546, + "tokens_seen": 1385431040 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005859412614347616, + "loss": 0.0689, + "theoretical_loss": 3.5693934561633203, + "tokens_seen": 1385693184 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005858610174931793, + "loss": 0.0691, + "theoretical_loss": 3.5693334996712625, + "tokens_seen": 1385955328 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0001342852774541825, + "objective/train/docs_used": 506209, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5595440864562988, + "objective/train/original_loss": 1.5595442056655884, + "objective/train/theoretical_loss": 3.5693035268683393, + "objective/train/tokens_used": 1406546400, + "objective/train/value_avg": -0.007114410400390625, + "objective/train/value_loss": 0.00042897695675492287, + "objective/train/value_max": -7.31348991394043e-05, + "objective/train/value_min": -0.84033203125, + "objective/train/value_reward_corr": 0.7246249964333472, + "objective/train/value_std": 0.0205078125, + "objective/train/weight_avg": 1.0003262758255005, + "objective/train/weighted_lm_loss": 1.5597079992294312, + "objective/train/weights_max": 1.8627805709838867, + "objective/train/weights_min": 0.39913442730903625, + "theoretical_loss": 3.5693035268683393, + "tokens_seen": 1386086400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005857807735515968, + "loss": 0.0709, + "theoretical_loss": 3.5692735576931103, + "tokens_seen": 1386217472 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005857005296100145, + "loss": 0.0675, + "theoretical_loss": 3.569213630222607, + "tokens_seen": 1386479616 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005856202856684321, + "loss": 0.07, + "theoretical_loss": 3.5691537172535, + "tokens_seen": 1386741760 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005855400417268496, + "loss": 0.0676, + "theoretical_loss": 3.569093818779539, + "tokens_seen": 1387003904 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005854597977852673, + "loss": 0.0737, + "theoretical_loss": 3.5690339347944784, + "tokens_seen": 1387266048 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005853795538436848, + "loss": 0.0684, + "theoretical_loss": 3.568974065292077, + "tokens_seen": 1387528192 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005852993099021024, + "loss": 0.0725, + "theoretical_loss": 3.5689142102660973, + "tokens_seen": 1387790336 + }, + { + "epoch": 0.42, + "learning_rate": 0.00058521906596052, + "loss": 0.068, + "theoretical_loss": 3.568854369710305, + "tokens_seen": 1388052480 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005851388220189375, + "loss": 0.0671, + "theoretical_loss": 3.5687945436184703, + "tokens_seen": 1388314624 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005850585780773551, + "loss": 0.0703, + "theoretical_loss": 3.5687347319843665, + "tokens_seen": 1388576768 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005849783341357728, + "loss": 0.0705, + "theoretical_loss": 3.5686749348017726, + "tokens_seen": 1388838912 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005848980901941904, + "loss": 0.0718, + "theoretical_loss": 3.5686151520644684, + "tokens_seen": 1389101056 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0010544790420681238, + "objective/train/docs_used": 507487, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3682609796524048, + "objective/train/original_loss": 1.3682608604431152, + "objective/train/theoretical_loss": 3.56855538376624, + "objective/train/tokens_used": 1409823200, + "objective/train/value_avg": -0.00675201416015625, + "objective/train/value_loss": 8.866895223036408e-05, + "objective/train/value_max": -4.5418739318847656e-05, + "objective/train/value_min": -0.2037353515625, + "objective/train/value_reward_corr": 0.83755308145317, + "objective/train/value_std": 0.012359619140625, + "objective/train/weight_avg": 1.0010948181152344, + "objective/train/weighted_lm_loss": 1.3702481985092163, + "objective/train/weights_max": 1.1099433898925781, + "objective/train/weights_min": 0.3734019696712494, + "theoretical_loss": 3.56855538376624, + "tokens_seen": 1389363200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005848178462526079, + "loss": 0.0704, + "theoretical_loss": 3.56855538376624, + "tokens_seen": 1389363200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005847376023110256, + "loss": 0.0684, + "theoretical_loss": 3.568495629900877, + "tokens_seen": 1389625344 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005846573583694431, + "loss": 0.0729, + "theoretical_loss": 3.5684358904621725, + "tokens_seen": 1389887488 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005845771144278606, + "loss": 0.0676, + "theoretical_loss": 3.5683761654439223, + "tokens_seen": 1390149632 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005844968704862783, + "loss": 0.067, + "theoretical_loss": 3.5683164548399287, + "tokens_seen": 1390411776 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005844166265446958, + "loss": 0.072, + "theoretical_loss": 3.568256758643995, + "tokens_seen": 1390673920 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005843363826031135, + "loss": 0.0671, + "theoretical_loss": 3.5681970768499305, + "tokens_seen": 1390936064 + }, + { + "epoch": 0.42, + "learning_rate": 0.000584256138661531, + "loss": 0.0681, + "theoretical_loss": 3.5681374094515466, + "tokens_seen": 1391198208 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005841758947199487, + "loss": 0.0697, + "theoretical_loss": 3.5680777564426602, + "tokens_seen": 1391460352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005840956507783663, + "loss": 0.0667, + "theoretical_loss": 3.56801811781709, + "tokens_seen": 1391722496 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005840154068367838, + "loss": 0.0687, + "theoretical_loss": 3.5679584935686615, + "tokens_seen": 1391984640 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005839351628952014, + "loss": 0.0681, + "theoretical_loss": 3.5678988836912007, + "tokens_seen": 1392246784 + }, + { + "epoch": 0.42, + "learning_rate": 0.000583854918953619, + "loss": 0.0693, + "theoretical_loss": 3.567839288178539, + "tokens_seen": 1392508928 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0001814004499465227, + "objective/train/docs_used": 508733, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.275675892829895, + "objective/train/original_loss": 1.2756757736206055, + "objective/train/theoretical_loss": 3.567809495807081, + "objective/train/tokens_used": 1413100000, + "objective/train/value_avg": -0.00782012939453125, + "objective/train/value_loss": 0.00013020160258747637, + "objective/train/value_max": -0.00010722875595092773, + "objective/train/value_min": -0.270751953125, + "objective/train/value_reward_corr": 0.7759416009441611, + "objective/train/value_std": 0.01282501220703125, + "objective/train/weight_avg": 1.0002455711364746, + "objective/train/weighted_lm_loss": 1.275399088859558, + "objective/train/weights_max": 1.1240557432174683, + "objective/train/weights_min": 0.7806023359298706, + "theoretical_loss": 3.567809495807081, + "tokens_seen": 1392640000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005837746750120366, + "loss": 0.0704, + "theoretical_loss": 3.5677797070245125, + "tokens_seen": 1392771072 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005836944310704541, + "loss": 0.0688, + "theoretical_loss": 3.567720140222959, + "tokens_seen": 1393033216 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005836141871288718, + "loss": 0.0713, + "theoretical_loss": 3.567660587767722, + "tokens_seen": 1393295360 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005835339431872894, + "loss": 0.0694, + "theoretical_loss": 3.567601049652648, + "tokens_seen": 1393557504 + }, + { + "epoch": 0.42, + "learning_rate": 0.000583453699245707, + "loss": 0.069, + "theoretical_loss": 3.567541525871587, + "tokens_seen": 1393819648 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005833734553041246, + "loss": 0.0691, + "theoretical_loss": 3.5674820164183934, + "tokens_seen": 1394081792 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005832932113625421, + "loss": 0.0718, + "theoretical_loss": 3.567422521286925, + "tokens_seen": 1394343936 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005832129674209598, + "loss": 0.0711, + "theoretical_loss": 3.5673630404710432, + "tokens_seen": 1394606080 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005831327234793773, + "loss": 0.0704, + "theoretical_loss": 3.567303573964614, + "tokens_seen": 1394868224 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005830524795377949, + "loss": 0.0704, + "theoretical_loss": 3.5672441217615063, + "tokens_seen": 1395130368 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005829722355962125, + "loss": 0.0732, + "theoretical_loss": 3.5671846838555936, + "tokens_seen": 1395392512 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005828919916546301, + "loss": 0.0706, + "theoretical_loss": 3.567125260240752, + "tokens_seen": 1395654656 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": -0.0010937025072053075, + "objective/train/docs_used": 509897, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2824382781982422, + "objective/train/original_loss": 1.2824382781982422, + "objective/train/theoretical_loss": 3.5670658509108626, + "objective/train/tokens_used": 1416376800, + "objective/train/value_avg": -0.00798797607421875, + "objective/train/value_loss": 0.0003030859516002238, + "objective/train/value_max": -9.763240814208984e-05, + "objective/train/value_min": -0.5146484375, + "objective/train/value_reward_corr": 0.868826981243496, + "objective/train/value_std": 0.0196075439453125, + "objective/train/weight_avg": 0.9990524053573608, + "objective/train/weighted_lm_loss": 1.2816001176834106, + "objective/train/weights_max": 1.214937686920166, + "objective/train/weights_min": 0.6117776036262512, + "theoretical_loss": 3.5670658509108626, + "tokens_seen": 1395916800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005828117477130476, + "loss": 0.0684, + "theoretical_loss": 3.5670658509108626, + "tokens_seen": 1395916800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005827315037714653, + "loss": 0.0727, + "theoretical_loss": 3.5670064558598096, + "tokens_seen": 1396178944 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005826512598298829, + "loss": 0.0704, + "theoretical_loss": 3.5669470750814813, + "tokens_seen": 1396441088 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005825710158883004, + "loss": 0.0713, + "theoretical_loss": 3.5668877085697694, + "tokens_seen": 1396703232 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005824907719467181, + "loss": 0.0731, + "theoretical_loss": 3.5668283563185703, + "tokens_seen": 1396965376 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005824105280051356, + "loss": 0.0695, + "theoretical_loss": 3.566769018321782, + "tokens_seen": 1397227520 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005823302840635532, + "loss": 0.0731, + "theoretical_loss": 3.5667096945733086, + "tokens_seen": 1397489664 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005822500401219708, + "loss": 0.0695, + "theoretical_loss": 3.566650385067057, + "tokens_seen": 1397751808 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005821697961803883, + "loss": 0.0699, + "theoretical_loss": 3.5665910897969377, + "tokens_seen": 1398013952 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005820895522388059, + "loss": 0.0723, + "theoretical_loss": 3.5665318087568645, + "tokens_seen": 1398276096 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005820093082972236, + "loss": 0.0711, + "theoretical_loss": 3.5664725419407564, + "tokens_seen": 1398538240 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005819290643556412, + "loss": 0.0704, + "theoretical_loss": 3.566413289342535, + "tokens_seen": 1398800384 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005818488204140588, + "loss": 0.0703, + "theoretical_loss": 3.566354050956126, + "tokens_seen": 1399062528 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.000327935500536114, + "objective/train/docs_used": 511124, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4136905670166016, + "objective/train/original_loss": 1.4136905670166016, + "objective/train/theoretical_loss": 3.5663244370904543, + "objective/train/tokens_used": 1419653600, + "objective/train/value_avg": -0.0080413818359375, + "objective/train/value_loss": 0.00012046914343954995, + "objective/train/value_max": -0.00011593103408813477, + "objective/train/value_min": -0.3408203125, + "objective/train/value_reward_corr": 0.7663032114662961, + "objective/train/value_std": 0.013214111328125, + "objective/train/weight_avg": 1.0003875494003296, + "objective/train/weighted_lm_loss": 1.4141696691513062, + "objective/train/weights_max": 1.1405415534973145, + "objective/train/weights_min": 0.8112593293190002, + "theoretical_loss": 3.5663244370904543, + "tokens_seen": 1399193600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005817685764724764, + "loss": 0.0721, + "theoretical_loss": 3.566294826775459, + "tokens_seen": 1399324672 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005816883325308939, + "loss": 0.0682, + "theoretical_loss": 3.566235616794466, + "tokens_seen": 1399586816 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005816080885893115, + "loss": 0.0698, + "theoretical_loss": 3.566176421007085, + "tokens_seen": 1399848960 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005815278446477291, + "loss": 0.0691, + "theoretical_loss": 3.566117239407256, + "tokens_seen": 1400111104 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005814476007061466, + "loss": 0.0718, + "theoretical_loss": 3.5660580719889237, + "tokens_seen": 1400373248 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005813673567645643, + "loss": 0.0727, + "theoretical_loss": 3.5659989187460353, + "tokens_seen": 1400635392 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005812871128229819, + "loss": 0.0741, + "theoretical_loss": 3.5659397796725427, + "tokens_seen": 1400897536 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005812068688813995, + "loss": 0.0672, + "theoretical_loss": 3.565880654762402, + "tokens_seen": 1401159680 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005811266249398171, + "loss": 0.0685, + "theoretical_loss": 3.5658215440095717, + "tokens_seen": 1401421824 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005810463809982346, + "loss": 0.0699, + "theoretical_loss": 3.565762447408015, + "tokens_seen": 1401683968 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005809661370566522, + "loss": 0.0723, + "theoretical_loss": 3.5657033649516974, + "tokens_seen": 1401946112 + }, + { + "epoch": 0.42, + "learning_rate": 0.0005808858931150698, + "loss": 0.0693, + "theoretical_loss": 3.5656442966345905, + "tokens_seen": 1402208256 + }, + { + "epoch": 0.42, + "objective/train/advantage_avg": 0.0005382169038057327, + "objective/train/docs_used": 512300, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5377522706985474, + "objective/train/original_loss": 1.537752389907837, + "objective/train/theoretical_loss": 3.565585242450667, + "objective/train/tokens_used": 1422930400, + "objective/train/value_avg": -0.006023406982421875, + "objective/train/value_loss": 0.0002111362264258787, + "objective/train/value_max": -6.711483001708984e-05, + "objective/train/value_min": -0.61865234375, + "objective/train/value_reward_corr": 0.7056385404610528, + "objective/train/value_std": 0.013519287109375, + "objective/train/weight_avg": 1.0006349086761475, + "objective/train/weighted_lm_loss": 1.538962721824646, + "objective/train/weights_max": 1.5594542026519775, + "objective/train/weights_min": 0.37144145369529724, + "theoretical_loss": 3.565585242450667, + "tokens_seen": 1402470400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005808056491734874, + "loss": 0.0743, + "theoretical_loss": 3.565585242450667, + "tokens_seen": 1402470400 + }, + { + "epoch": 0.43, + "learning_rate": 0.000580725405231905, + "loss": 0.0716, + "theoretical_loss": 3.5655262023939054, + "tokens_seen": 1402732544 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005806451612903226, + "loss": 0.0687, + "theoretical_loss": 3.5654671764582866, + "tokens_seen": 1402994688 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005805649173487401, + "loss": 0.0703, + "theoretical_loss": 3.5654081646377955, + "tokens_seen": 1403256832 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005804846734071579, + "loss": 0.0705, + "theoretical_loss": 3.5653491669264215, + "tokens_seen": 1403518976 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005804044294655754, + "loss": 0.071, + "theoretical_loss": 3.565290183318156, + "tokens_seen": 1403781120 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005803241855239929, + "loss": 0.0706, + "theoretical_loss": 3.565231213806995, + "tokens_seen": 1404043264 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005802439415824106, + "loss": 0.0692, + "theoretical_loss": 3.5651722583869394, + "tokens_seen": 1404305408 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005801636976408281, + "loss": 0.0724, + "theoretical_loss": 3.565113317051991, + "tokens_seen": 1404567552 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005800834536992457, + "loss": 0.0699, + "theoretical_loss": 3.5650543897961584, + "tokens_seen": 1404829696 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005800032097576633, + "loss": 0.0724, + "theoretical_loss": 3.5649954766134515, + "tokens_seen": 1405091840 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005799229658160809, + "loss": 0.0724, + "theoretical_loss": 3.5649365774978845, + "tokens_seen": 1405353984 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005798427218744984, + "loss": 0.07, + "theoretical_loss": 3.5648776924434755, + "tokens_seen": 1405616128 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0011828236747533083, + "objective/train/docs_used": 513528, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.369804859161377, + "objective/train/original_loss": 1.369804859161377, + "objective/train/theoretical_loss": 3.5648482551873366, + "objective/train/tokens_used": 1426207200, + "objective/train/value_avg": -0.00914764404296875, + "objective/train/value_loss": 0.00022888151579536498, + "objective/train/value_max": -0.00011962652206420898, + "objective/train/value_min": -0.6591796875, + "objective/train/value_reward_corr": 0.6871334576079523, + "objective/train/value_std": 0.01441192626953125, + "objective/train/weight_avg": 1.0012850761413574, + "objective/train/weighted_lm_loss": 1.371004581451416, + "objective/train/weights_max": 1.3404009342193604, + "objective/train/weights_min": 0.3783082664012909, + "theoretical_loss": 3.5648482551873366, + "tokens_seen": 1405747200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005797624779329161, + "loss": 0.0726, + "theoretical_loss": 3.5648188214442467, + "tokens_seen": 1405878272 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005796822339913337, + "loss": 0.0735, + "theoretical_loss": 3.5647599644942227, + "tokens_seen": 1406140416 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005796019900497512, + "loss": 0.0713, + "theoretical_loss": 3.564701121587434, + "tokens_seen": 1406402560 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005795217461081689, + "loss": 0.067, + "theoretical_loss": 3.5646422927179113, + "tokens_seen": 1406664704 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005794415021665864, + "loss": 0.0693, + "theoretical_loss": 3.564583477879692, + "tokens_seen": 1406926848 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005793612582250041, + "loss": 0.0707, + "theoretical_loss": 3.5645246770668164, + "tokens_seen": 1407188992 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005792810142834216, + "loss": 0.0717, + "theoretical_loss": 3.5644658902733273, + "tokens_seen": 1407451136 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005792007703418391, + "loss": 0.0714, + "theoretical_loss": 3.564407117493272, + "tokens_seen": 1407713280 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005791205264002569, + "loss": 0.0695, + "theoretical_loss": 3.564348358720702, + "tokens_seen": 1407975424 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005790402824586744, + "loss": 0.0732, + "theoretical_loss": 3.564289613949671, + "tokens_seen": 1408237568 + }, + { + "epoch": 0.43, + "learning_rate": 0.000578960038517092, + "loss": 0.0717, + "theoretical_loss": 3.5642308831742384, + "tokens_seen": 1408499712 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005788797945755096, + "loss": 0.0708, + "theoretical_loss": 3.564172166388465, + "tokens_seen": 1408761856 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.000899914710316807, + "objective/train/docs_used": 514827, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.595373511314392, + "objective/train/original_loss": 1.5953733921051025, + "objective/train/theoretical_loss": 3.5641134635864153, + "objective/train/tokens_used": 1429484000, + "objective/train/value_avg": -0.007175445556640625, + "objective/train/value_loss": 0.00010781797755043954, + "objective/train/value_max": -6.35385513305664e-05, + "objective/train/value_min": -0.2237548828125, + "objective/train/value_reward_corr": 0.6808584583299038, + "objective/train/value_std": 0.00986480712890625, + "objective/train/weight_avg": 1.000953197479248, + "objective/train/weighted_lm_loss": 1.5966917276382446, + "objective/train/weights_max": 1.1473335027694702, + "objective/train/weights_min": 0.6740972399711609, + "theoretical_loss": 3.5641134635864153, + "tokens_seen": 1409024000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005787995506339272, + "loss": 0.0708, + "theoretical_loss": 3.5641134635864153, + "tokens_seen": 1409024000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005787193066923447, + "loss": 0.0691, + "theoretical_loss": 3.56405477476216, + "tokens_seen": 1409286144 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005786390627507623, + "loss": 0.0711, + "theoretical_loss": 3.563996099909771, + "tokens_seen": 1409548288 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005785588188091799, + "loss": 0.0728, + "theoretical_loss": 3.5639374390233245, + "tokens_seen": 1409810432 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005784785748675974, + "loss": 0.0706, + "theoretical_loss": 3.563878792096901, + "tokens_seen": 1410072576 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005783983309260151, + "loss": 0.0707, + "theoretical_loss": 3.5638201591245826, + "tokens_seen": 1410334720 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005783180869844327, + "loss": 0.0696, + "theoretical_loss": 3.563761540100457, + "tokens_seen": 1410596864 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005782378430428504, + "loss": 0.0722, + "theoretical_loss": 3.5637029350186156, + "tokens_seen": 1410859008 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005781575991012679, + "loss": 0.0715, + "theoretical_loss": 3.563644343873152, + "tokens_seen": 1411121152 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005780773551596854, + "loss": 0.0742, + "theoretical_loss": 3.5635857666581643, + "tokens_seen": 1411383296 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005779971112181031, + "loss": 0.0723, + "theoretical_loss": 3.5635272033677534, + "tokens_seen": 1411645440 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005779168672765206, + "loss": 0.0735, + "theoretical_loss": 3.5634686539960247, + "tokens_seen": 1411907584 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005778366233349382, + "loss": 0.0723, + "theoretical_loss": 3.5634101185370874, + "tokens_seen": 1412169728 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0011907402658835053, + "objective/train/docs_used": 516049, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4614386558532715, + "objective/train/original_loss": 1.4614386558532715, + "objective/train/theoretical_loss": 3.563380856023075, + "objective/train/tokens_used": 1432760800, + "objective/train/value_avg": -0.0091552734375, + "objective/train/value_loss": 0.00017796926840674132, + "objective/train/value_max": -5.9664249420166016e-05, + "objective/train/value_min": -0.1868896484375, + "objective/train/value_reward_corr": 0.6773004524352086, + "objective/train/value_std": 0.01293182373046875, + "objective/train/weight_avg": 1.0012750625610352, + "objective/train/weighted_lm_loss": 1.4639524221420288, + "objective/train/weights_max": 1.1568671464920044, + "objective/train/weights_min": 0.37346750497817993, + "theoretical_loss": 3.563380856023075, + "tokens_seen": 1412300800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005777563793933558, + "loss": 0.0709, + "theoretical_loss": 3.5633515969850533, + "tokens_seen": 1412431872 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005776761354517734, + "loss": 0.0711, + "theoretical_loss": 3.5632930893340378, + "tokens_seen": 1412694016 + }, + { + "epoch": 0.43, + "learning_rate": 0.000577595891510191, + "loss": 0.0722, + "theoretical_loss": 3.5632345955781606, + "tokens_seen": 1412956160 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005775156475686087, + "loss": 0.0697, + "theoretical_loss": 3.5631761157115456, + "tokens_seen": 1413218304 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005774354036270262, + "loss": 0.0696, + "theoretical_loss": 3.5631176497283175, + "tokens_seen": 1413480448 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005773551596854437, + "loss": 0.0722, + "theoretical_loss": 3.563059197622608, + "tokens_seen": 1413742592 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005772749157438614, + "loss": 0.0722, + "theoretical_loss": 3.56300075938855, + "tokens_seen": 1414004736 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005771946718022789, + "loss": 0.072, + "theoretical_loss": 3.5629423350202813, + "tokens_seen": 1414266880 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005771144278606965, + "loss": 0.0708, + "theoretical_loss": 3.5628839245119424, + "tokens_seen": 1414529024 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005770341839191141, + "loss": 0.0735, + "theoretical_loss": 3.5628255278576777, + "tokens_seen": 1414791168 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005769539399775317, + "loss": 0.0721, + "theoretical_loss": 3.5627671450516347, + "tokens_seen": 1415053312 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005768736960359494, + "loss": 0.0721, + "theoretical_loss": 3.5627087760879657, + "tokens_seen": 1415315456 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0009819098049774766, + "objective/train/docs_used": 517252, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.468359112739563, + "objective/train/original_loss": 1.4683589935302734, + "objective/train/theoretical_loss": 3.5626504209608254, + "objective/train/tokens_used": 1436037600, + "objective/train/value_avg": -0.006824493408203125, + "objective/train/value_loss": 0.00015597307356074452, + "objective/train/value_max": -7.200241088867188e-05, + "objective/train/value_min": -0.5869140625, + "objective/train/value_reward_corr": 0.6682466763435388, + "objective/train/value_std": 0.0118560791015625, + "objective/train/weight_avg": 1.001055121421814, + "objective/train/weighted_lm_loss": 1.4700490236282349, + "objective/train/weights_max": 1.603075385093689, + "objective/train/weights_min": 0.37400925159454346, + "theoretical_loss": 3.5626504209608254, + "tokens_seen": 1415577600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005767934520943669, + "loss": 0.0719, + "theoretical_loss": 3.5626504209608254, + "tokens_seen": 1415577600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005767132081527845, + "loss": 0.0677, + "theoretical_loss": 3.5625920796643724, + "tokens_seen": 1415839744 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005766329642112021, + "loss": 0.074, + "theoretical_loss": 3.5625337521927687, + "tokens_seen": 1416101888 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005765527202696197, + "loss": 0.0725, + "theoretical_loss": 3.5624754385401802, + "tokens_seen": 1416364032 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005764724763280372, + "loss": 0.0704, + "theoretical_loss": 3.562417138700776, + "tokens_seen": 1416626176 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005763922323864549, + "loss": 0.0727, + "theoretical_loss": 3.5623588526687295, + "tokens_seen": 1416888320 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005763119884448724, + "loss": 0.0683, + "theoretical_loss": 3.562300580438216, + "tokens_seen": 1417150464 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005762317445032899, + "loss": 0.0731, + "theoretical_loss": 3.562242322003416, + "tokens_seen": 1417412608 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005761515005617077, + "loss": 0.0712, + "theoretical_loss": 3.5621840773585127, + "tokens_seen": 1417674752 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005760712566201252, + "loss": 0.0694, + "theoretical_loss": 3.5621258464976924, + "tokens_seen": 1417936896 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005759910126785428, + "loss": 0.069, + "theoretical_loss": 3.5620676294151465, + "tokens_seen": 1418199040 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005759107687369604, + "loss": 0.0696, + "theoretical_loss": 3.562009426105069, + "tokens_seen": 1418461184 + }, + { + "epoch": 0.43, + "learning_rate": 0.000575830524795378, + "loss": 0.0714, + "theoretical_loss": 3.561951236561656, + "tokens_seen": 1418723328 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.00033617427106946707, + "objective/train/docs_used": 518371, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4012682437896729, + "objective/train/original_loss": 1.4012682437896729, + "objective/train/theoretical_loss": 3.5619221469506375, + "objective/train/tokens_used": 1439314400, + "objective/train/value_avg": -0.006404876708984375, + "objective/train/value_loss": 0.0001416690502082929, + "objective/train/value_max": -6.014108657836914e-05, + "objective/train/value_min": -0.350341796875, + "objective/train/value_reward_corr": 0.7427841776207642, + "objective/train/value_std": 0.013153076171875, + "objective/train/weight_avg": 1.000402808189392, + "objective/train/weighted_lm_loss": 1.4016001224517822, + "objective/train/weights_max": 1.1936334371566772, + "objective/train/weights_min": 0.38919898867607117, + "theoretical_loss": 3.5619221469506375, + "tokens_seen": 1418854400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005757502808537955, + "loss": 0.0728, + "theoretical_loss": 3.56189306077911, + "tokens_seen": 1418985472 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005756700369122131, + "loss": 0.072, + "theoretical_loss": 3.561834898751635, + "tokens_seen": 1419247616 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005755897929706307, + "loss": 0.0709, + "theoretical_loss": 3.561776750473439, + "tokens_seen": 1419509760 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005755095490290483, + "loss": 0.073, + "theoretical_loss": 3.561718615938733, + "tokens_seen": 1419771904 + }, + { + "epoch": 0.43, + "learning_rate": 0.000575429305087466, + "loss": 0.0707, + "theoretical_loss": 3.5616604951417328, + "tokens_seen": 1420034048 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005753490611458835, + "loss": 0.0708, + "theoretical_loss": 3.561602388076656, + "tokens_seen": 1420296192 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005752688172043012, + "loss": 0.0699, + "theoretical_loss": 3.5615442947377254, + "tokens_seen": 1420558336 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005751885732627187, + "loss": 0.0722, + "theoretical_loss": 3.5614862151191664, + "tokens_seen": 1420820480 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005751083293211362, + "loss": 0.0721, + "theoretical_loss": 3.561428149215208, + "tokens_seen": 1421082624 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005750280853795539, + "loss": 0.0694, + "theoretical_loss": 3.561370097020083, + "tokens_seen": 1421344768 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005749478414379714, + "loss": 0.0712, + "theoretical_loss": 3.561312058528026, + "tokens_seen": 1421606912 + }, + { + "epoch": 0.43, + "learning_rate": 0.000574867597496389, + "loss": 0.0709, + "theoretical_loss": 3.5612540337332783, + "tokens_seen": 1421869056 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0010773309040814638, + "objective/train/docs_used": 519513, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5569469928741455, + "objective/train/original_loss": 1.556947112083435, + "objective/train/theoretical_loss": 3.5611960226300816, + "objective/train/tokens_used": 1442591200, + "objective/train/value_avg": -0.007076263427734375, + "objective/train/value_loss": 0.00012166771193733439, + "objective/train/value_max": -4.9114227294921875e-05, + "objective/train/value_min": -0.2486572265625, + "objective/train/value_reward_corr": 0.7132574980049912, + "objective/train/value_std": 0.011260986328125, + "objective/train/weight_avg": 1.001137375831604, + "objective/train/weighted_lm_loss": 1.5595605373382568, + "objective/train/weights_max": 1.176132321357727, + "objective/train/weights_min": 0.8112593293190002, + "theoretical_loss": 3.5611960226300816, + "tokens_seen": 1422131200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005747873535548066, + "loss": 0.0724, + "theoretical_loss": 3.5611960226300816, + "tokens_seen": 1422131200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005747071096132242, + "loss": 0.0713, + "theoretical_loss": 3.561138025212683, + "tokens_seen": 1422393344 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005746268656716417, + "loss": 0.0699, + "theoretical_loss": 3.561080041475332, + "tokens_seen": 1422655488 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005745466217300594, + "loss": 0.0692, + "theoretical_loss": 3.5610220714122827, + "tokens_seen": 1422917632 + }, + { + "epoch": 0.43, + "learning_rate": 0.000574466377788477, + "loss": 0.0697, + "theoretical_loss": 3.560964115017791, + "tokens_seen": 1423179776 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005743861338468946, + "loss": 0.0701, + "theoretical_loss": 3.560906172286118, + "tokens_seen": 1423441920 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005743058899053122, + "loss": 0.0703, + "theoretical_loss": 3.5608482432115265, + "tokens_seen": 1423704064 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005742256459637297, + "loss": 0.0699, + "theoretical_loss": 3.5607903277882853, + "tokens_seen": 1423966208 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005741454020221474, + "loss": 0.0714, + "theoretical_loss": 3.560732426010664, + "tokens_seen": 1424228352 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005740651580805649, + "loss": 0.0727, + "theoretical_loss": 3.560674537872937, + "tokens_seen": 1424490496 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005739849141389825, + "loss": 0.0694, + "theoretical_loss": 3.560616663369382, + "tokens_seen": 1424752640 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005739046701974002, + "loss": 0.0703, + "theoretical_loss": 3.5605588024942803, + "tokens_seen": 1425014784 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005738244262558177, + "loss": 0.0708, + "theoretical_loss": 3.560500955241916, + "tokens_seen": 1425276928 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0009654366294853389, + "objective/train/docs_used": 520731, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3049242496490479, + "objective/train/original_loss": 1.3049242496490479, + "objective/train/theoretical_loss": 3.5604720367224756, + "objective/train/tokens_used": 1445868000, + "objective/train/value_avg": -0.007049560546875, + "objective/train/value_loss": 0.00032756561995483935, + "objective/train/value_max": -3.534555435180664e-05, + "objective/train/value_min": -0.97900390625, + "objective/train/value_reward_corr": 0.657158229851628, + "objective/train/value_std": 0.01511383056640625, + "objective/train/weight_avg": 1.0011066198349, + "objective/train/weighted_lm_loss": 1.3057200908660889, + "objective/train/weights_max": 1.2917603254318237, + "objective/train/weights_min": 0.36948272585868835, + "theoretical_loss": 3.5604720367224756, + "tokens_seen": 1425408000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005737441823142353, + "loss": 0.0713, + "theoretical_loss": 3.5604431216065775, + "tokens_seen": 1425539072 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005736639383726529, + "loss": 0.0705, + "theoretical_loss": 3.5603853015825564, + "tokens_seen": 1425801216 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005735836944310705, + "loss": 0.0692, + "theoretical_loss": 3.560327495164147, + "tokens_seen": 1426063360 + }, + { + "epoch": 0.43, + "learning_rate": 0.000573503450489488, + "loss": 0.0682, + "theoretical_loss": 3.5602697023456473, + "tokens_seen": 1426325504 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005734232065479057, + "loss": 0.0706, + "theoretical_loss": 3.5602119231213605, + "tokens_seen": 1426587648 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005733429626063232, + "loss": 0.0703, + "theoretical_loss": 3.5601541574855906, + "tokens_seen": 1426849792 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005732627186647407, + "loss": 0.0709, + "theoretical_loss": 3.5600964054326463, + "tokens_seen": 1427111936 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005731824747231584, + "loss": 0.0721, + "theoretical_loss": 3.5600386669568405, + "tokens_seen": 1427374080 + }, + { + "epoch": 0.43, + "learning_rate": 0.000573102230781576, + "loss": 0.0753, + "theoretical_loss": 3.559980942052488, + "tokens_seen": 1427636224 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005730219868399937, + "loss": 0.0696, + "theoretical_loss": 3.559923230713907, + "tokens_seen": 1427898368 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005729417428984112, + "loss": 0.0702, + "theoretical_loss": 3.5598655329354214, + "tokens_seen": 1428160512 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005728614989568288, + "loss": 0.0721, + "theoretical_loss": 3.5598078487113556, + "tokens_seen": 1428422656 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0017608533380553126, + "objective/train/docs_used": 521864, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.409778118133545, + "objective/train/original_loss": 1.409778118133545, + "objective/train/theoretical_loss": 3.55975017803604, + "objective/train/tokens_used": 1449144800, + "objective/train/value_avg": -0.0076446533203125, + "objective/train/value_loss": 0.00018956906569655985, + "objective/train/value_max": -5.692243576049805e-05, + "objective/train/value_min": -0.73388671875, + "objective/train/value_reward_corr": 0.678791158290969, + "objective/train/value_std": 0.013763427734375, + "objective/train/weight_avg": 1.001844882965088, + "objective/train/weighted_lm_loss": 1.4125566482543945, + "objective/train/weights_max": 1.1568920612335205, + "objective/train/weights_min": 0.36942073702812195, + "theoretical_loss": 3.55975017803604, + "tokens_seen": 1428684800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005727812550152464, + "loss": 0.0731, + "theoretical_loss": 3.55975017803604, + "tokens_seen": 1428684800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005727010110736639, + "loss": 0.0751, + "theoretical_loss": 3.5596925209038055, + "tokens_seen": 1428946944 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005726207671320815, + "loss": 0.0721, + "theoretical_loss": 3.55963487730899, + "tokens_seen": 1429209088 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005725405231904991, + "loss": 0.0697, + "theoretical_loss": 3.5595772472459313, + "tokens_seen": 1429471232 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005724602792489167, + "loss": 0.0707, + "theoretical_loss": 3.559519630708973, + "tokens_seen": 1429733376 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005723800353073343, + "loss": 0.071, + "theoretical_loss": 3.5594620276924607, + "tokens_seen": 1429995520 + }, + { + "epoch": 0.43, + "learning_rate": 0.000572299791365752, + "loss": 0.0703, + "theoretical_loss": 3.559404438190745, + "tokens_seen": 1430257664 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005722195474241695, + "loss": 0.0705, + "theoretical_loss": 3.559346862198178, + "tokens_seen": 1430519808 + }, + { + "epoch": 0.43, + "learning_rate": 0.000572139303482587, + "loss": 0.0718, + "theoretical_loss": 3.559289299709116, + "tokens_seen": 1430781952 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005720590595410047, + "loss": 0.0709, + "theoretical_loss": 3.5592317507179194, + "tokens_seen": 1431044096 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005719788155994222, + "loss": 0.072, + "theoretical_loss": 3.5591742152189507, + "tokens_seen": 1431306240 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005718985716578399, + "loss": 0.0694, + "theoretical_loss": 3.559116693206577, + "tokens_seen": 1431568384 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005718183277162574, + "loss": 0.0735, + "theoretical_loss": 3.5590591846751685, + "tokens_seen": 1431830528 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0009012027876451612, + "objective/train/docs_used": 522998, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3030763864517212, + "objective/train/original_loss": 1.3030766248703003, + "objective/train/theoretical_loss": 3.5590304354630673, + "objective/train/tokens_used": 1452421600, + "objective/train/value_avg": -0.00653076171875, + "objective/train/value_loss": 0.00013989253784529865, + "objective/train/value_max": -8.350610733032227e-05, + "objective/train/value_min": -0.347900390625, + "objective/train/value_reward_corr": 0.6877286111560412, + "objective/train/value_std": 0.01165008544921875, + "objective/train/weight_avg": 1.0009663105010986, + "objective/train/weighted_lm_loss": 1.304845929145813, + "objective/train/weights_max": 1.297332525253296, + "objective/train/weights_min": 0.37335923314094543, + "theoretical_loss": 3.5590304354630673, + "tokens_seen": 1431961600 + }, + { + "epoch": 0.43, + "learning_rate": 0.000571738083774675, + "loss": 0.0701, + "theoretical_loss": 3.5590016896190977, + "tokens_seen": 1432092672 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005716578398330927, + "loss": 0.07, + "theoretical_loss": 3.5589442080327416, + "tokens_seen": 1432354816 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005715775958915102, + "loss": 0.0709, + "theoretical_loss": 3.5588867399104798, + "tokens_seen": 1432616960 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005714973519499278, + "loss": 0.0718, + "theoretical_loss": 3.5588292852466967, + "tokens_seen": 1432879104 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005714171080083454, + "loss": 0.0669, + "theoretical_loss": 3.558771844035779, + "tokens_seen": 1433141248 + }, + { + "epoch": 0.43, + "learning_rate": 0.000571336864066763, + "loss": 0.0682, + "theoretical_loss": 3.5587144162721156, + "tokens_seen": 1433403392 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005712566201251805, + "loss": 0.0687, + "theoretical_loss": 3.5586570019501016, + "tokens_seen": 1433665536 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005711763761835982, + "loss": 0.074, + "theoretical_loss": 3.558599601064133, + "tokens_seen": 1433927680 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005710961322420157, + "loss": 0.072, + "theoretical_loss": 3.5585422136086104, + "tokens_seen": 1434189824 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005710158883004333, + "loss": 0.072, + "theoretical_loss": 3.558484839577937, + "tokens_seen": 1434451968 + }, + { + "epoch": 0.43, + "learning_rate": 0.000570935644358851, + "loss": 0.071, + "theoretical_loss": 3.55842747896652, + "tokens_seen": 1434714112 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005708554004172685, + "loss": 0.068, + "theoretical_loss": 3.55837013176877, + "tokens_seen": 1434976256 + }, + { + "epoch": 0.43, + "objective/train/advantage_avg": 0.0011591583024710417, + "objective/train/docs_used": 524207, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4704546928405762, + "objective/train/original_loss": 1.470454454421997, + "objective/train/theoretical_loss": 3.5583127979791005, + "objective/train/tokens_used": 1455698400, + "objective/train/value_avg": -0.005870819091796875, + "objective/train/value_loss": 0.00017229585500899702, + "objective/train/value_max": -8.219480514526367e-05, + "objective/train/value_min": -0.263671875, + "objective/train/value_reward_corr": 0.5902054650726603, + "objective/train/value_std": 0.01013946533203125, + "objective/train/weight_avg": 1.0012328624725342, + "objective/train/weighted_lm_loss": 1.4722777605056763, + "objective/train/weights_max": 1.1787195205688477, + "objective/train/weights_min": 0.3683807849884033, + "theoretical_loss": 3.5583127979791005, + "tokens_seen": 1435238400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0005707751564756861, + "loss": 0.0697, + "theoretical_loss": 3.5583127979791005, + "tokens_seen": 1435238400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005706949125341037, + "loss": 0.0712, + "theoretical_loss": 3.558255477591928, + "tokens_seen": 1435500544 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005706146685925213, + "loss": 0.0695, + "theoretical_loss": 3.558198170601674, + "tokens_seen": 1435762688 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005705344246509389, + "loss": 0.0695, + "theoretical_loss": 3.558140877002761, + "tokens_seen": 1436024832 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005704541807093565, + "loss": 0.0728, + "theoretical_loss": 3.5580835967896167, + "tokens_seen": 1436286976 + }, + { + "epoch": 0.44, + "learning_rate": 0.000570373936767774, + "loss": 0.0696, + "theoretical_loss": 3.5580263299566712, + "tokens_seen": 1436549120 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005702936928261916, + "loss": 0.0726, + "theoretical_loss": 3.5579690764983587, + "tokens_seen": 1436811264 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005702134488846092, + "loss": 0.0669, + "theoretical_loss": 3.557911836409115, + "tokens_seen": 1437073408 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005701332049430268, + "loss": 0.0694, + "theoretical_loss": 3.557854609683382, + "tokens_seen": 1437335552 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005700529610014445, + "loss": 0.0694, + "theoretical_loss": 3.5577973963156024, + "tokens_seen": 1437597696 + }, + { + "epoch": 0.44, + "learning_rate": 0.000569972717059862, + "loss": 0.0693, + "theoretical_loss": 3.557740196300224, + "tokens_seen": 1437859840 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005698924731182796, + "loss": 0.0726, + "theoretical_loss": 3.5576830096316963, + "tokens_seen": 1438121984 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005698122291766972, + "loss": 0.0715, + "theoretical_loss": 3.5576258363044735, + "tokens_seen": 1438384128 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.001051856903359294, + "objective/train/docs_used": 525418, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4021375179290771, + "objective/train/original_loss": 1.4021375179290771, + "objective/train/theoretical_loss": 3.5575972546421184, + "objective/train/tokens_used": 1458975200, + "objective/train/value_avg": -0.00624847412109375, + "objective/train/value_loss": 0.00013370224041864276, + "objective/train/value_max": -2.8431415557861328e-05, + "objective/train/value_min": -0.401123046875, + "objective/train/value_reward_corr": 0.6913211097079406, + "objective/train/value_std": 0.0120086669921875, + "objective/train/weight_avg": 1.001117467880249, + "objective/train/weighted_lm_loss": 1.4049772024154663, + "objective/train/weights_max": 1.2057886123657227, + "objective/train/weights_min": 0.7243527770042419, + "theoretical_loss": 3.5575972546421184, + "tokens_seen": 1438515200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005697319852351147, + "loss": 0.073, + "theoretical_loss": 3.5575686763130117, + "tokens_seen": 1438646272 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005696517412935323, + "loss": 0.0716, + "theoretical_loss": 3.5575115296517725, + "tokens_seen": 1438908416 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005695714973519499, + "loss": 0.0706, + "theoretical_loss": 3.5574543963152188, + "tokens_seen": 1439170560 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005694912534103675, + "loss": 0.0689, + "theoretical_loss": 3.5573972762978174, + "tokens_seen": 1439432704 + }, + { + "epoch": 0.44, + "learning_rate": 0.000569411009468785, + "loss": 0.074, + "theoretical_loss": 3.5573401695940383, + "tokens_seen": 1439694848 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005693307655272028, + "loss": 0.0705, + "theoretical_loss": 3.557283076198356, + "tokens_seen": 1439956992 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005692505215856203, + "loss": 0.0725, + "theoretical_loss": 3.557225996105246, + "tokens_seen": 1440219136 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005691702776440379, + "loss": 0.0734, + "theoretical_loss": 3.5571689293091895, + "tokens_seen": 1440481280 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005690900337024555, + "loss": 0.0692, + "theoretical_loss": 3.5571118758046696, + "tokens_seen": 1440743424 + }, + { + "epoch": 0.44, + "learning_rate": 0.000569009789760873, + "loss": 0.0683, + "theoretical_loss": 3.5570548355861726, + "tokens_seen": 1441005568 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005689295458192907, + "loss": 0.0734, + "theoretical_loss": 3.5569978086481884, + "tokens_seen": 1441267712 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005688493018777082, + "loss": 0.0682, + "theoretical_loss": 3.556940794985211, + "tokens_seen": 1441529856 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0003429764765314758, + "objective/train/docs_used": 526671, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.310866355895996, + "objective/train/original_loss": 1.310866355895996, + "objective/train/theoretical_loss": 3.5568837945917364, + "objective/train/tokens_used": 1462252000, + "objective/train/value_avg": -0.005947113037109375, + "objective/train/value_loss": 0.0002256840671179816, + "objective/train/value_max": -4.267692565917969e-05, + "objective/train/value_min": -0.268798828125, + "objective/train/value_reward_corr": 0.5540173147606325, + "objective/train/value_std": 0.00830841064453125, + "objective/train/weight_avg": 1.0004374980926514, + "objective/train/weighted_lm_loss": 1.3115313053131104, + "objective/train/weights_max": 1.1662676334381104, + "objective/train/weights_min": 0.3735102415084839, + "theoretical_loss": 3.5568837945917364, + "tokens_seen": 1441792000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005687690579361258, + "loss": 0.0702, + "theoretical_loss": 3.5568837945917364, + "tokens_seen": 1441792000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005686888139945435, + "loss": 0.0733, + "theoretical_loss": 3.556826807462264, + "tokens_seen": 1442054144 + }, + { + "epoch": 0.44, + "learning_rate": 0.000568608570052961, + "loss": 0.071, + "theoretical_loss": 3.5567698335912983, + "tokens_seen": 1442316288 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005685283261113786, + "loss": 0.0725, + "theoretical_loss": 3.5567128729733444, + "tokens_seen": 1442578432 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005684480821697962, + "loss": 0.0702, + "theoretical_loss": 3.5566559256029118, + "tokens_seen": 1442840576 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005683678382282138, + "loss": 0.0687, + "theoretical_loss": 3.556598991474515, + "tokens_seen": 1443102720 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005682875942866313, + "loss": 0.0709, + "theoretical_loss": 3.556542070582669, + "tokens_seen": 1443364864 + }, + { + "epoch": 0.44, + "learning_rate": 0.000568207350345049, + "loss": 0.0706, + "theoretical_loss": 3.5564851629218928, + "tokens_seen": 1443627008 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005681271064034665, + "loss": 0.0687, + "theoretical_loss": 3.55642826848671, + "tokens_seen": 1443889152 + }, + { + "epoch": 0.44, + "learning_rate": 0.000568046862461884, + "loss": 0.0681, + "theoretical_loss": 3.5563713872716467, + "tokens_seen": 1444151296 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005679666185203018, + "loss": 0.0719, + "theoretical_loss": 3.5563145192712318, + "tokens_seen": 1444413440 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005678863745787193, + "loss": 0.0722, + "theoretical_loss": 3.5562576644799977, + "tokens_seen": 1444675584 + }, + { + "epoch": 0.44, + "learning_rate": 0.000567806130637137, + "loss": 0.0717, + "theoretical_loss": 3.5562008228924804, + "tokens_seen": 1444937728 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0019295045640319586, + "objective/train/docs_used": 528002, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3192483186721802, + "objective/train/original_loss": 1.3192481994628906, + "objective/train/theoretical_loss": 3.556172407048409, + "objective/train/tokens_used": 1465528800, + "objective/train/value_avg": -0.00946807861328125, + "objective/train/value_loss": 0.0003503127081785351, + "objective/train/value_max": -8.416175842285156e-05, + "objective/train/value_min": -0.499267578125, + "objective/train/value_reward_corr": 0.6022576954413182, + "objective/train/value_std": 0.015655517578125, + "objective/train/weight_avg": 1.002081036567688, + "objective/train/weighted_lm_loss": 1.3213697671890259, + "objective/train/weights_max": 1.389220118522644, + "objective/train/weights_min": 0.37109869718551636, + "theoretical_loss": 3.556172407048409, + "tokens_seen": 1445068800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005677258866955545, + "loss": 0.0699, + "theoretical_loss": 3.556143994503219, + "tokens_seen": 1445199872 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005676456427539721, + "loss": 0.0672, + "theoretical_loss": 3.5560871793067554, + "tokens_seen": 1445462016 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005675653988123897, + "loss": 0.0694, + "theoretical_loss": 3.556030377297635, + "tokens_seen": 1445724160 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005674851548708073, + "loss": 0.0714, + "theoretical_loss": 3.5559735884704073, + "tokens_seen": 1445986304 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005674049109292248, + "loss": 0.0703, + "theoretical_loss": 3.5559168128196235, + "tokens_seen": 1446248448 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005673246669876424, + "loss": 0.0696, + "theoretical_loss": 3.555860050339839, + "tokens_seen": 1446510592 + }, + { + "epoch": 0.44, + "learning_rate": 0.00056724442304606, + "loss": 0.0704, + "theoretical_loss": 3.555803301025613, + "tokens_seen": 1446772736 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005671641791044776, + "loss": 0.0695, + "theoretical_loss": 3.555746564871506, + "tokens_seen": 1447034880 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005670839351628953, + "loss": 0.0681, + "theoretical_loss": 3.5556898418720837, + "tokens_seen": 1447297024 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005670036912213128, + "loss": 0.0674, + "theoretical_loss": 3.555633132021914, + "tokens_seen": 1447559168 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005669234472797304, + "loss": 0.0699, + "theoretical_loss": 3.5555764353155688, + "tokens_seen": 1447821312 + }, + { + "epoch": 0.44, + "learning_rate": 0.000566843203338148, + "loss": 0.0701, + "theoretical_loss": 3.555519751747622, + "tokens_seen": 1448083456 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0014572564978152514, + "objective/train/docs_used": 529166, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.432005763053894, + "objective/train/original_loss": 1.432005763053894, + "objective/train/theoretical_loss": 3.5554630813126513, + "objective/train/tokens_used": 1468805600, + "objective/train/value_avg": -0.00937652587890625, + "objective/train/value_loss": 0.00045499225961975753, + "objective/train/value_max": -0.0001366138458251953, + "objective/train/value_min": -0.97021484375, + "objective/train/value_reward_corr": 0.7644421545995016, + "objective/train/value_std": 0.0241241455078125, + "objective/train/weight_avg": 1.0016591548919678, + "objective/train/weighted_lm_loss": 1.4349960088729858, + "objective/train/weights_max": 1.7320685386657715, + "objective/train/weights_min": 0.369626522064209, + "theoretical_loss": 3.5554630813126513, + "tokens_seen": 1448345600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005667629593965655, + "loss": 0.0709, + "theoretical_loss": 3.5554630813126513, + "tokens_seen": 1448345600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005666827154549832, + "loss": 0.0717, + "theoretical_loss": 3.5554064240052385, + "tokens_seen": 1448607744 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005666024715134007, + "loss": 0.0693, + "theoretical_loss": 3.5553497798199674, + "tokens_seen": 1448869888 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005665222275718183, + "loss": 0.0724, + "theoretical_loss": 3.555293148751426, + "tokens_seen": 1449132032 + }, + { + "epoch": 0.44, + "learning_rate": 0.000566441983630236, + "loss": 0.0719, + "theoretical_loss": 3.555236530794204, + "tokens_seen": 1449394176 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005663617396886536, + "loss": 0.07, + "theoretical_loss": 3.5551799259428964, + "tokens_seen": 1449656320 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005662814957470711, + "loss": 0.0696, + "theoretical_loss": 3.5551233341920994, + "tokens_seen": 1449918464 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005662012518054887, + "loss": 0.0717, + "theoretical_loss": 3.555066755536414, + "tokens_seen": 1450180608 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005661210078639063, + "loss": 0.0737, + "theoretical_loss": 3.555010189970443, + "tokens_seen": 1450442752 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005660407639223238, + "loss": 0.0681, + "theoretical_loss": 3.5549536374887936, + "tokens_seen": 1450704896 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005659605199807415, + "loss": 0.0709, + "theoretical_loss": 3.5548970980860757, + "tokens_seen": 1450967040 + }, + { + "epoch": 0.44, + "learning_rate": 0.000565880276039159, + "loss": 0.0697, + "theoretical_loss": 3.5548405717569023, + "tokens_seen": 1451229184 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005658000320975766, + "loss": 0.0719, + "theoretical_loss": 3.5547840584958896, + "tokens_seen": 1451491328 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": -0.004492069128900766, + "objective/train/docs_used": 530451, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.529040813446045, + "objective/train/original_loss": 1.529041051864624, + "objective/train/theoretical_loss": 3.5547558067642617, + "objective/train/tokens_used": 1472082400, + "objective/train/value_avg": -0.007659912109375, + "objective/train/value_loss": 0.0013438891619443893, + "objective/train/value_max": -7.253885269165039e-05, + "objective/train/value_min": -0.2027587890625, + "objective/train/value_reward_corr": 0.6128778178033915, + "objective/train/value_std": 0.01111602783203125, + "objective/train/weight_avg": 0.9961403012275696, + "objective/train/weighted_lm_loss": 1.5288517475128174, + "objective/train/weights_max": 1.1460858583450317, + "objective/train/weights_min": 0.79996258020401, + "theoretical_loss": 3.5547558067642617, + "tokens_seen": 1451622400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005657197881559943, + "loss": 0.0724, + "theoretical_loss": 3.554727558297657, + "tokens_seen": 1451753472 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005656395442144118, + "loss": 0.0701, + "theoretical_loss": 3.554671071156828, + "tokens_seen": 1452015616 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005655593002728294, + "loss": 0.0708, + "theoretical_loss": 3.554614597068027, + "tokens_seen": 1452277760 + }, + { + "epoch": 0.44, + "learning_rate": 0.000565479056331247, + "loss": 0.0716, + "theoretical_loss": 3.554558136025884, + "tokens_seen": 1452539904 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005653988123896646, + "loss": 0.0721, + "theoretical_loss": 3.554501688025031, + "tokens_seen": 1452802048 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005653185684480822, + "loss": 0.0703, + "theoretical_loss": 3.554445253060103, + "tokens_seen": 1453064192 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005652383245064998, + "loss": 0.0699, + "theoretical_loss": 3.5543888311257397, + "tokens_seen": 1453326336 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005651580805649173, + "loss": 0.0716, + "theoretical_loss": 3.5543324222165813, + "tokens_seen": 1453588480 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005650778366233349, + "loss": 0.0693, + "theoretical_loss": 3.554276026327274, + "tokens_seen": 1453850624 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005649975926817526, + "loss": 0.0715, + "theoretical_loss": 3.5542196434524653, + "tokens_seen": 1454112768 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005649173487401701, + "loss": 0.0721, + "theoretical_loss": 3.554163273586806, + "tokens_seen": 1454374912 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005648371047985878, + "loss": 0.0698, + "theoretical_loss": 3.554106916724951, + "tokens_seen": 1454637056 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0006120117614045739, + "objective/train/docs_used": 531642, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4688389301300049, + "objective/train/original_loss": 1.4688386917114258, + "objective/train/theoretical_loss": 3.5540505728615583, + "objective/train/tokens_used": 1475359200, + "objective/train/value_avg": -0.00594329833984375, + "objective/train/value_loss": 0.0004463873337954283, + "objective/train/value_max": -6.35385513305664e-05, + "objective/train/value_min": -0.556640625, + "objective/train/value_reward_corr": 0.45180356973675206, + "objective/train/value_std": 0.0121307373046875, + "objective/train/weight_avg": 1.0007911920547485, + "objective/train/weighted_lm_loss": 1.469335675239563, + "objective/train/weights_max": 1.7006314992904663, + "objective/train/weights_min": 0.22718757390975952, + "theoretical_loss": 3.5540505728615583, + "tokens_seen": 1454899200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005647568608570053, + "loss": 0.07, + "theoretical_loss": 3.5540505728615583, + "tokens_seen": 1454899200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005646766169154229, + "loss": 0.0704, + "theoretical_loss": 3.5539942419912878, + "tokens_seen": 1455161344 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005645963729738405, + "loss": 0.0703, + "theoretical_loss": 3.553937924108804, + "tokens_seen": 1455423488 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005645161290322581, + "loss": 0.0687, + "theoretical_loss": 3.5538816192087728, + "tokens_seen": 1455685632 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005644358850906756, + "loss": 0.0692, + "theoretical_loss": 3.5538253272858658, + "tokens_seen": 1455947776 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005643556411490932, + "loss": 0.0699, + "theoretical_loss": 3.5537690483347557, + "tokens_seen": 1456209920 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005642753972075108, + "loss": 0.0682, + "theoretical_loss": 3.5537127823501184, + "tokens_seen": 1456472064 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005641951532659285, + "loss": 0.0695, + "theoretical_loss": 3.5536565293266342, + "tokens_seen": 1456734208 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005641149093243461, + "loss": 0.0754, + "theoretical_loss": 3.553600289258986, + "tokens_seen": 1456996352 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005640346653827636, + "loss": 0.0712, + "theoretical_loss": 3.553544062141859, + "tokens_seen": 1457258496 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005639544214411813, + "loss": 0.0705, + "theoretical_loss": 3.5534878479699423, + "tokens_seen": 1457520640 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005638741774995988, + "loss": 0.0731, + "theoretical_loss": 3.5534316467379288, + "tokens_seen": 1457782784 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005637939335580163, + "loss": 0.0727, + "theoretical_loss": 3.5533754584405126, + "tokens_seen": 1458044928 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.00047437482862733305, + "objective/train/docs_used": 532886, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3345245122909546, + "objective/train/original_loss": 1.3345245122909546, + "objective/train/theoretical_loss": 3.553347369140622, + "objective/train/tokens_used": 1478636000, + "objective/train/value_avg": -0.00730133056640625, + "objective/train/value_loss": 0.00017969703185372055, + "objective/train/value_max": -5.227327346801758e-05, + "objective/train/value_min": -0.3125, + "objective/train/value_reward_corr": 0.6709926357986172, + "objective/train/value_std": 0.01160430908203125, + "objective/train/weight_avg": 1.000558853149414, + "objective/train/weighted_lm_loss": 1.335375189781189, + "objective/train/weights_max": 1.3342957496643066, + "objective/train/weights_min": 0.37208810448646545, + "theoretical_loss": 3.553347369140622, + "tokens_seen": 1458176000 + }, + { + "epoch": 0.44, + "learning_rate": 0.000563713689616434, + "loss": 0.0698, + "theoretical_loss": 3.553319283072393, + "tokens_seen": 1458307072 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005636334456748515, + "loss": 0.0725, + "theoretical_loss": 3.553263120628271, + "tokens_seen": 1458569216 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005635532017332691, + "loss": 0.0708, + "theoretical_loss": 3.553206971102852, + "tokens_seen": 1458831360 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005634729577916868, + "loss": 0.0699, + "theoretical_loss": 3.5531508344908436, + "tokens_seen": 1459093504 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005633927138501044, + "loss": 0.0711, + "theoretical_loss": 3.5530947107869557, + "tokens_seen": 1459355648 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005633124699085219, + "loss": 0.0722, + "theoretical_loss": 3.5530385999859035, + "tokens_seen": 1459617792 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005632322259669395, + "loss": 0.071, + "theoretical_loss": 3.5529825020824033, + "tokens_seen": 1459879936 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005631519820253571, + "loss": 0.0712, + "theoretical_loss": 3.5529264170711756, + "tokens_seen": 1460142080 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005630717380837746, + "loss": 0.0715, + "theoretical_loss": 3.552870344946944, + "tokens_seen": 1460404224 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005629914941421923, + "loss": 0.0714, + "theoretical_loss": 3.5528142857044345, + "tokens_seen": 1460666368 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005629112502006098, + "loss": 0.0707, + "theoretical_loss": 3.5527582393383765, + "tokens_seen": 1460928512 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005628310062590275, + "loss": 0.0727, + "theoretical_loss": 3.5527022058435036, + "tokens_seen": 1461190656 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0010733278468251228, + "objective/train/docs_used": 534190, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3928134441375732, + "objective/train/original_loss": 1.3928134441375732, + "objective/train/theoretical_loss": 3.552646185214551, + "objective/train/tokens_used": 1481912800, + "objective/train/value_avg": -0.00820159912109375, + "objective/train/value_loss": 0.00018559301679488271, + "objective/train/value_max": -8.821487426757812e-05, + "objective/train/value_min": -0.344482421875, + "objective/train/value_reward_corr": 0.6832127209726154, + "objective/train/value_std": 0.01428985595703125, + "objective/train/weight_avg": 1.0011622905731201, + "objective/train/weighted_lm_loss": 1.393998146057129, + "objective/train/weights_max": 1.2060829401016235, + "objective/train/weights_min": 0.6082649827003479, + "theoretical_loss": 3.552646185214551, + "tokens_seen": 1461452800 + }, + { + "epoch": 0.44, + "learning_rate": 0.000562750762317445, + "loss": 0.0703, + "theoretical_loss": 3.552646185214551, + "tokens_seen": 1461452800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005626705183758626, + "loss": 0.0699, + "theoretical_loss": 3.552590177446257, + "tokens_seen": 1461714944 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005625902744342803, + "loss": 0.0701, + "theoretical_loss": 3.5525341825333645, + "tokens_seen": 1461977088 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005625100304926978, + "loss": 0.0728, + "theoretical_loss": 3.552478200470618, + "tokens_seen": 1462239232 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005624297865511154, + "loss": 0.072, + "theoretical_loss": 3.552422231252766, + "tokens_seen": 1462501376 + }, + { + "epoch": 0.44, + "learning_rate": 0.000562349542609533, + "loss": 0.0698, + "theoretical_loss": 3.552366274874559, + "tokens_seen": 1462763520 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005622692986679506, + "loss": 0.0707, + "theoretical_loss": 3.5523103313307516, + "tokens_seen": 1463025664 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005621890547263681, + "loss": 0.0729, + "theoretical_loss": 3.5522544006161016, + "tokens_seen": 1463287808 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005621088107847857, + "loss": 0.0703, + "theoretical_loss": 3.5521984827253688, + "tokens_seen": 1463549952 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005620285668432034, + "loss": 0.0685, + "theoretical_loss": 3.5521425776533175, + "tokens_seen": 1463812096 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005619483229016209, + "loss": 0.0697, + "theoretical_loss": 3.5520866853947135, + "tokens_seen": 1464074240 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005618680789600386, + "loss": 0.0688, + "theoretical_loss": 3.5520308059443275, + "tokens_seen": 1464336384 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005617878350184561, + "loss": 0.0722, + "theoretical_loss": 3.5519749392969313, + "tokens_seen": 1464598528 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.000920631573535502, + "objective/train/docs_used": 534938, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3900043964385986, + "objective/train/original_loss": 1.3900043964385986, + "objective/train/theoretical_loss": 3.5519470107727216, + "objective/train/tokens_used": 1485189600, + "objective/train/value_avg": -0.00879669189453125, + "objective/train/value_loss": 0.0005025153513997793, + "objective/train/value_max": -9.763240814208984e-05, + "objective/train/value_min": -0.7587890625, + "objective/train/value_reward_corr": 0.6102940318855143, + "objective/train/value_std": 0.0169677734375, + "objective/train/weight_avg": 1.0011345148086548, + "objective/train/weighted_lm_loss": 1.391117811203003, + "objective/train/weights_max": 2.0519468784332275, + "objective/train/weights_min": 0.3684299886226654, + "theoretical_loss": 3.5519470107727216, + "tokens_seen": 1464729600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005617075910768738, + "loss": 0.0715, + "theoretical_loss": 3.5519190854473006, + "tokens_seen": 1464860672 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005616273471352913, + "loss": 0.0708, + "theoretical_loss": 3.5518632443902156, + "tokens_seen": 1465122816 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005615471031937089, + "loss": 0.0703, + "theoretical_loss": 3.5518074161204565, + "tokens_seen": 1465384960 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005614668592521265, + "loss": 0.0732, + "theoretical_loss": 3.5517516006328096, + "tokens_seen": 1465647104 + }, + { + "epoch": 0.44, + "learning_rate": 0.000561386615310544, + "loss": 0.0708, + "theoretical_loss": 3.5516957979220627, + "tokens_seen": 1465909248 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005613063713689616, + "loss": 0.071, + "theoretical_loss": 3.551640007983007, + "tokens_seen": 1466171392 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005612261274273793, + "loss": 0.0683, + "theoretical_loss": 3.551584230810436, + "tokens_seen": 1466433536 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005611458834857969, + "loss": 0.0709, + "theoretical_loss": 3.551528466399148, + "tokens_seen": 1466695680 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005610656395442144, + "loss": 0.0711, + "theoretical_loss": 3.551472714743942, + "tokens_seen": 1466957824 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005609853956026321, + "loss": 0.069, + "theoretical_loss": 3.551416975839623, + "tokens_seen": 1467219968 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005609051516610496, + "loss": 0.07, + "theoretical_loss": 3.551361249680996, + "tokens_seen": 1467482112 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005608249077194671, + "loss": 0.069, + "theoretical_loss": 3.5513055362628707, + "tokens_seen": 1467744256 + }, + { + "epoch": 0.44, + "objective/train/advantage_avg": 0.0007431964622810483, + "objective/train/docs_used": 535989, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3608063459396362, + "objective/train/original_loss": 1.3608062267303467, + "objective/train/theoretical_loss": 3.5512498355800597, + "objective/train/tokens_used": 1488466400, + "objective/train/value_avg": -0.0081329345703125, + "objective/train/value_loss": 0.0004426943778526038, + "objective/train/value_max": -4.6133995056152344e-05, + "objective/train/value_min": -0.53125, + "objective/train/value_reward_corr": 0.5491775466311715, + "objective/train/value_std": 0.015716552734375, + "objective/train/weight_avg": 1.0009335279464722, + "objective/train/weighted_lm_loss": 1.3623521327972412, + "objective/train/weights_max": 1.7010573148727417, + "objective/train/weights_min": 0.36846092343330383, + "theoretical_loss": 3.5512498355800597, + "tokens_seen": 1468006400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005607446637778848, + "loss": 0.0725, + "theoretical_loss": 3.5512498355800597, + "tokens_seen": 1468006400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0005606644198363023, + "loss": 0.0696, + "theoretical_loss": 3.5511941476273785, + "tokens_seen": 1468268544 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005605841758947199, + "loss": 0.0687, + "theoretical_loss": 3.551138472399646, + "tokens_seen": 1468530688 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005605039319531376, + "loss": 0.0703, + "theoretical_loss": 3.5510828098916836, + "tokens_seen": 1468792832 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005604236880115552, + "loss": 0.0706, + "theoretical_loss": 3.5510271600983154, + "tokens_seen": 1469054976 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005603434440699728, + "loss": 0.0703, + "theoretical_loss": 3.5509715230143692, + "tokens_seen": 1469317120 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005602632001283903, + "loss": 0.0701, + "theoretical_loss": 3.550915898634676, + "tokens_seen": 1469579264 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005601829561868079, + "loss": 0.0678, + "theoretical_loss": 3.550860286954069, + "tokens_seen": 1469841408 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005601027122452255, + "loss": 0.068, + "theoretical_loss": 3.5508046879673856, + "tokens_seen": 1470103552 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005600224683036431, + "loss": 0.0721, + "theoretical_loss": 3.550749101669465, + "tokens_seen": 1470365696 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005599422243620606, + "loss": 0.0678, + "theoretical_loss": 3.5506935280551497, + "tokens_seen": 1470627840 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005598619804204783, + "loss": 0.0712, + "theoretical_loss": 3.5506379671192865, + "tokens_seen": 1470889984 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005597817364788959, + "loss": 0.0711, + "theoretical_loss": 3.550582418856723, + "tokens_seen": 1471152128 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0005571788060478866, + "objective/train/docs_used": 537210, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3439960479736328, + "objective/train/original_loss": 1.3439958095550537, + "objective/train/theoretical_loss": 3.55055464947632, + "objective/train/tokens_used": 1491743200, + "objective/train/value_avg": -0.00795745849609375, + "objective/train/value_loss": 0.00031730145565234125, + "objective/train/value_max": -6.657838821411133e-05, + "objective/train/value_min": -0.343505859375, + "objective/train/value_reward_corr": 0.6187407854038999, + "objective/train/value_std": 0.01346588134765625, + "objective/train/weight_avg": 1.0006963014602661, + "objective/train/weighted_lm_loss": 1.345666766166687, + "objective/train/weights_max": 1.2610359191894531, + "objective/train/weights_min": 0.3889971375465393, + "theoretical_loss": 3.55055464947632, + "tokens_seen": 1471283200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005597014925373134, + "loss": 0.0692, + "theoretical_loss": 3.550526883262312, + "tokens_seen": 1471414272 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005596212485957311, + "loss": 0.0711, + "theoretical_loss": 3.550471360330907, + "tokens_seen": 1471676416 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005595410046541486, + "loss": 0.0691, + "theoretical_loss": 3.550415850057367, + "tokens_seen": 1471938560 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005594607607125662, + "loss": 0.0696, + "theoretical_loss": 3.5503603524365523, + "tokens_seen": 1472200704 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005593805167709838, + "loss": 0.0735, + "theoretical_loss": 3.5503048674633266, + "tokens_seen": 1472462848 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005593002728294014, + "loss": 0.0694, + "theoretical_loss": 3.5502493951325564, + "tokens_seen": 1472724992 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005592200288878189, + "loss": 0.0739, + "theoretical_loss": 3.550193935439112, + "tokens_seen": 1472987136 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005591397849462365, + "loss": 0.0696, + "theoretical_loss": 3.5501384883778666, + "tokens_seen": 1473249280 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005590595410046541, + "loss": 0.0729, + "theoretical_loss": 3.5500830539436956, + "tokens_seen": 1473511424 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005589792970630718, + "loss": 0.0694, + "theoretical_loss": 3.550027632131477, + "tokens_seen": 1473773568 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005588990531214894, + "loss": 0.0709, + "theoretical_loss": 3.549972222936094, + "tokens_seen": 1474035712 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005588188091799069, + "loss": 0.0696, + "theoretical_loss": 3.5499168263524297, + "tokens_seen": 1474297856 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0007825845386832952, + "objective/train/docs_used": 538469, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.366462230682373, + "objective/train/original_loss": 1.366462230682373, + "objective/train/theoretical_loss": 3.549861442375373, + "objective/train/tokens_used": 1495020000, + "objective/train/value_avg": -0.005584716796875, + "objective/train/value_loss": 0.00012209409032948315, + "objective/train/value_max": -4.9114227294921875e-05, + "objective/train/value_min": -0.50048828125, + "objective/train/value_reward_corr": 0.7176460320886918, + "objective/train/value_std": 0.01194000244140625, + "objective/train/weight_avg": 1.0008388757705688, + "objective/train/weighted_lm_loss": 1.3676908016204834, + "objective/train/weights_max": 1.2191088199615479, + "objective/train/weights_min": 0.372718870639801, + "theoretical_loss": 3.549861442375373, + "tokens_seen": 1474560000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005587385652383246, + "loss": 0.0717, + "theoretical_loss": 3.549861442375373, + "tokens_seen": 1474560000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005586583212967421, + "loss": 0.068, + "theoretical_loss": 3.5498060709998143, + "tokens_seen": 1474822144 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005585780773551596, + "loss": 0.0711, + "theoretical_loss": 3.5497507122206473, + "tokens_seen": 1475084288 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005584978334135773, + "loss": 0.0705, + "theoretical_loss": 3.5496953660327684, + "tokens_seen": 1475346432 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005584175894719948, + "loss": 0.0704, + "theoretical_loss": 3.5496400324310775, + "tokens_seen": 1475608576 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005583373455304124, + "loss": 0.0668, + "theoretical_loss": 3.5495847114104766, + "tokens_seen": 1475870720 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005582571015888301, + "loss": 0.0719, + "theoretical_loss": 3.549529402965873, + "tokens_seen": 1476132864 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005581768576472477, + "loss": 0.0699, + "theoretical_loss": 3.549474107092173, + "tokens_seen": 1476395008 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005580966137056652, + "loss": 0.0735, + "theoretical_loss": 3.5494188237842894, + "tokens_seen": 1476657152 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005580163697640829, + "loss": 0.0715, + "theoretical_loss": 3.5493635530371366, + "tokens_seen": 1476919296 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005579361258225004, + "loss": 0.0695, + "theoretical_loss": 3.5493082948456314, + "tokens_seen": 1477181440 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005578558818809179, + "loss": 0.0723, + "theoretical_loss": 3.549253049204695, + "tokens_seen": 1477443584 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005577756379393356, + "loss": 0.0679, + "theoretical_loss": 3.549197816109251, + "tokens_seen": 1477705728 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0006546726217493415, + "objective/train/docs_used": 539614, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3523414134979248, + "objective/train/original_loss": 1.3523411750793457, + "objective/train/theoretical_loss": 3.549170204264502, + "objective/train/tokens_used": 1498296800, + "objective/train/value_avg": -0.0098419189453125, + "objective/train/value_loss": 0.00043507429654709995, + "objective/train/value_max": -5.829334259033203e-05, + "objective/train/value_min": -0.2685546875, + "objective/train/value_reward_corr": 0.6779971392819804, + "objective/train/value_std": 0.017822265625, + "objective/train/weight_avg": 1.0008503198623657, + "objective/train/weighted_lm_loss": 1.3519009351730347, + "objective/train/weights_max": 1.2550468444824219, + "objective/train/weights_min": 0.39393097162246704, + "theoretical_loss": 3.549170204264502, + "tokens_seen": 1477836800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005576953939977531, + "loss": 0.0694, + "theoretical_loss": 3.549142595554224, + "tokens_seen": 1477967872 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005576151500561709, + "loss": 0.0721, + "theoretical_loss": 3.5490873875345446, + "tokens_seen": 1478230016 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005575349061145884, + "loss": 0.0691, + "theoretical_loss": 3.5490321920451446, + "tokens_seen": 1478492160 + }, + { + "epoch": 0.45, + "learning_rate": 0.000557454662173006, + "loss": 0.0703, + "theoretical_loss": 3.54897700908096, + "tokens_seen": 1478754304 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005573744182314236, + "loss": 0.0725, + "theoretical_loss": 3.548921838636927, + "tokens_seen": 1479016448 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005572941742898411, + "loss": 0.0726, + "theoretical_loss": 3.5488666807079885, + "tokens_seen": 1479278592 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005572139303482587, + "loss": 0.0704, + "theoretical_loss": 3.5488115352890874, + "tokens_seen": 1479540736 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005571336864066763, + "loss": 0.071, + "theoretical_loss": 3.5487564023751714, + "tokens_seen": 1479802880 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005570534424650939, + "loss": 0.0715, + "theoretical_loss": 3.5487012819611894, + "tokens_seen": 1480065024 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005569731985235114, + "loss": 0.0716, + "theoretical_loss": 3.548646174042095, + "tokens_seen": 1480327168 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005568929545819291, + "loss": 0.0729, + "theoretical_loss": 3.5485910786128434, + "tokens_seen": 1480589312 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005568127106403467, + "loss": 0.0722, + "theoretical_loss": 3.5485359956683933, + "tokens_seen": 1480851456 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0007702909060753882, + "objective/train/docs_used": 540822, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.462890863418579, + "objective/train/original_loss": 1.462890625, + "objective/train/theoretical_loss": 3.548480925203706, + "objective/train/tokens_used": 1501573600, + "objective/train/value_avg": -0.01175689697265625, + "objective/train/value_loss": 0.0004899628693237901, + "objective/train/value_max": -9.173154830932617e-05, + "objective/train/value_min": -0.472900390625, + "objective/train/value_reward_corr": 0.6486511891603499, + "objective/train/value_std": 0.0182952880859375, + "objective/train/weight_avg": 1.0009918212890625, + "objective/train/weighted_lm_loss": 1.4647504091262817, + "objective/train/weights_max": 1.530020833015442, + "objective/train/weights_min": 0.37112417817115784, + "theoretical_loss": 3.548480925203706, + "tokens_seen": 1481113600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005567324666987642, + "loss": 0.0724, + "theoretical_loss": 3.548480925203706, + "tokens_seen": 1481113600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005566522227571819, + "loss": 0.0712, + "theoretical_loss": 3.548425867213747, + "tokens_seen": 1481375744 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005565719788155994, + "loss": 0.0705, + "theoretical_loss": 3.5483708216934833, + "tokens_seen": 1481637888 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005564917348740171, + "loss": 0.0705, + "theoretical_loss": 3.5483157886378844, + "tokens_seen": 1481900032 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005564114909324346, + "loss": 0.0724, + "theoretical_loss": 3.5482607680419243, + "tokens_seen": 1482162176 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005563312469908522, + "loss": 0.0706, + "theoretical_loss": 3.548205759900579, + "tokens_seen": 1482424320 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005562510030492698, + "loss": 0.0708, + "theoretical_loss": 3.548150764208828, + "tokens_seen": 1482686464 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005561707591076873, + "loss": 0.07, + "theoretical_loss": 3.5480957809616527, + "tokens_seen": 1482948608 + }, + { + "epoch": 0.45, + "learning_rate": 0.000556090515166105, + "loss": 0.0726, + "theoretical_loss": 3.548040810154038, + "tokens_seen": 1483210752 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005560102712245226, + "loss": 0.074, + "theoretical_loss": 3.5479858517809717, + "tokens_seen": 1483472896 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005559300272829402, + "loss": 0.0721, + "theoretical_loss": 3.547930905837445, + "tokens_seen": 1483735040 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005558497833413577, + "loss": 0.0748, + "theoretical_loss": 3.547875972318451, + "tokens_seen": 1483997184 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005557695393997754, + "loss": 0.0691, + "theoretical_loss": 3.547821051218987, + "tokens_seen": 1484259328 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.00014683479093946517, + "objective/train/docs_used": 542078, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4164698123931885, + "objective/train/original_loss": 1.4164698123931885, + "objective/train/theoretical_loss": 3.5477935953250155, + "objective/train/tokens_used": 1504850400, + "objective/train/value_avg": -0.006717681884765625, + "objective/train/value_loss": 0.000441345531726256, + "objective/train/value_max": -4.363059997558594e-05, + "objective/train/value_min": -0.91796875, + "objective/train/value_reward_corr": 0.6474719816157412, + "objective/train/value_std": 0.01513671875, + "objective/train/weight_avg": 1.000320315361023, + "objective/train/weighted_lm_loss": 1.4166218042373657, + "objective/train/weights_max": 1.835614800453186, + "objective/train/weights_min": 0.23298361897468567, + "theoretical_loss": 3.5477935953250155, + "tokens_seen": 1484390400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005556892954581929, + "loss": 0.071, + "theoretical_loss": 3.5477661425340514, + "tokens_seen": 1484521472 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005556090515166104, + "loss": 0.0718, + "theoretical_loss": 3.547711246258647, + "tokens_seen": 1484783616 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005555288075750281, + "loss": 0.0732, + "theoretical_loss": 3.547656362387779, + "tokens_seen": 1485045760 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005554485636334456, + "loss": 0.0733, + "theoretical_loss": 3.5476014909164553, + "tokens_seen": 1485307904 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005553683196918632, + "loss": 0.0696, + "theoretical_loss": 3.547546631839687, + "tokens_seen": 1485570048 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005552880757502809, + "loss": 0.0704, + "theoretical_loss": 3.547491785152488, + "tokens_seen": 1485832192 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005552078318086985, + "loss": 0.0715, + "theoretical_loss": 3.5474369508498755, + "tokens_seen": 1486094336 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005551275878671161, + "loss": 0.0716, + "theoretical_loss": 3.547382128926868, + "tokens_seen": 1486356480 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005550473439255337, + "loss": 0.0697, + "theoretical_loss": 3.5473273193784896, + "tokens_seen": 1486618624 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005549670999839512, + "loss": 0.0697, + "theoretical_loss": 3.547272522199764, + "tokens_seen": 1486880768 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005548868560423688, + "loss": 0.0721, + "theoretical_loss": 3.5472177373857208, + "tokens_seen": 1487142912 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005548066121007864, + "loss": 0.0681, + "theoretical_loss": 3.5471629649313905, + "tokens_seen": 1487405056 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": -0.0009506873902864754, + "objective/train/docs_used": 543335, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3594411611557007, + "objective/train/original_loss": 1.3594409227371216, + "objective/train/theoretical_loss": 3.547108204831807, + "objective/train/tokens_used": 1508127200, + "objective/train/value_avg": -0.0094451904296875, + "objective/train/value_loss": 0.000862235261593014, + "objective/train/value_max": -8.612871170043945e-05, + "objective/train/value_min": -0.98388671875, + "objective/train/value_reward_corr": 0.6495069759170901, + "objective/train/value_std": 0.0212860107421875, + "objective/train/weight_avg": 0.9994162917137146, + "objective/train/weighted_lm_loss": 1.3578301668167114, + "objective/train/weights_max": 1.7093836069107056, + "objective/train/weights_min": 0.23241551220417023, + "theoretical_loss": 3.547108204831807, + "tokens_seen": 1487667200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005547263681592039, + "loss": 0.0688, + "theoretical_loss": 3.547108204831807, + "tokens_seen": 1487667200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005546461242176217, + "loss": 0.0726, + "theoretical_loss": 3.547053457082008, + "tokens_seen": 1487929344 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005545658802760392, + "loss": 0.0717, + "theoretical_loss": 3.5469987216770322, + "tokens_seen": 1488191488 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005544856363344568, + "loss": 0.0725, + "theoretical_loss": 3.5469439986119227, + "tokens_seen": 1488453632 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005544053923928744, + "loss": 0.0701, + "theoretical_loss": 3.5468892878817253, + "tokens_seen": 1488715776 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005543251484512919, + "loss": 0.0714, + "theoretical_loss": 3.546834589481488, + "tokens_seen": 1488977920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005542449045097095, + "loss": 0.07, + "theoretical_loss": 3.5467799034062617, + "tokens_seen": 1489240064 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005541646605681271, + "loss": 0.0688, + "theoretical_loss": 3.546725229651101, + "tokens_seen": 1489502208 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005540844166265447, + "loss": 0.0711, + "theoretical_loss": 3.5466705682110633, + "tokens_seen": 1489764352 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005540041726849623, + "loss": 0.0716, + "theoretical_loss": 3.546615919081207, + "tokens_seen": 1490026496 + }, + { + "epoch": 0.45, + "learning_rate": 0.00055392392874338, + "loss": 0.0679, + "theoretical_loss": 3.546561282256596, + "tokens_seen": 1490288640 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005538436848017975, + "loss": 0.0678, + "theoretical_loss": 3.546506657732295, + "tokens_seen": 1490550784 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005537634408602151, + "loss": 0.0713, + "theoretical_loss": 3.546452045503372, + "tokens_seen": 1490812928 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.001109226606786251, + "objective/train/docs_used": 544526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4318077564239502, + "objective/train/original_loss": 1.4318077564239502, + "objective/train/theoretical_loss": 3.546424743998138, + "objective/train/tokens_used": 1511404000, + "objective/train/value_avg": -0.006862640380859375, + "objective/train/value_loss": 0.0001357761793769896, + "objective/train/value_max": -7.140636444091797e-05, + "objective/train/value_min": -0.798828125, + "objective/train/value_reward_corr": 0.6639085992244163, + "objective/train/value_std": 0.01186370849609375, + "objective/train/weight_avg": 1.0011723041534424, + "objective/train/weighted_lm_loss": 1.4328300952911377, + "objective/train/weights_max": 1.399935007095337, + "objective/train/weights_min": 0.37172776460647583, + "theoretical_loss": 3.546424743998138, + "tokens_seen": 1490944000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005536831969186327, + "loss": 0.0702, + "theoretical_loss": 3.5463974455648994, + "tokens_seen": 1491075072 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005536029529770502, + "loss": 0.0697, + "theoretical_loss": 3.5463428579119505, + "tokens_seen": 1491337216 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005535227090354679, + "loss": 0.0693, + "theoretical_loss": 3.546288282539602, + "tokens_seen": 1491599360 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005534424650938854, + "loss": 0.0703, + "theoretical_loss": 3.5462337194429336, + "tokens_seen": 1491861504 + }, + { + "epoch": 0.45, + "learning_rate": 0.000553362221152303, + "loss": 0.0723, + "theoretical_loss": 3.546179168617028, + "tokens_seen": 1492123648 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005532819772107206, + "loss": 0.069, + "theoretical_loss": 3.54612463005697, + "tokens_seen": 1492385792 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005532017332691381, + "loss": 0.073, + "theoretical_loss": 3.546070103757849, + "tokens_seen": 1492647936 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005531214893275557, + "loss": 0.0682, + "theoretical_loss": 3.546015589714755, + "tokens_seen": 1492910080 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005530412453859734, + "loss": 0.0714, + "theoretical_loss": 3.545961087922782, + "tokens_seen": 1493172224 + }, + { + "epoch": 0.45, + "learning_rate": 0.000552961001444391, + "loss": 0.0698, + "theoretical_loss": 3.5459065983770266, + "tokens_seen": 1493434368 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005528807575028085, + "loss": 0.0668, + "theoretical_loss": 3.5458521210725893, + "tokens_seen": 1493696512 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005528005135612262, + "loss": 0.069, + "theoretical_loss": 3.545797656004571, + "tokens_seen": 1493958656 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 0.0012045002076774836, + "objective/train/docs_used": 545793, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2948263883590698, + "objective/train/original_loss": 1.2948261499404907, + "objective/train/theoretical_loss": 3.545743203168077, + "objective/train/tokens_used": 1514680800, + "objective/train/value_avg": -0.0071563720703125, + "objective/train/value_loss": 0.0002446006110403687, + "objective/train/value_max": -7.843971252441406e-05, + "objective/train/value_min": -0.2481689453125, + "objective/train/value_reward_corr": 0.6621380128230953, + "objective/train/value_std": 0.0140380859375, + "objective/train/weight_avg": 1.0013121366500854, + "objective/train/weighted_lm_loss": 1.2966434955596924, + "objective/train/weights_max": 1.1413390636444092, + "objective/train/weights_min": 0.3839024305343628, + "theoretical_loss": 3.545743203168077, + "tokens_seen": 1494220800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005527202696196437, + "loss": 0.0681, + "theoretical_loss": 3.545743203168077, + "tokens_seen": 1494220800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005526400256780613, + "loss": 0.0693, + "theoretical_loss": 3.545688762558216, + "tokens_seen": 1494482944 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005525597817364789, + "loss": 0.0703, + "theoretical_loss": 3.5456343341700984, + "tokens_seen": 1494745088 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005524795377948964, + "loss": 0.0701, + "theoretical_loss": 3.545579917998838, + "tokens_seen": 1495007232 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005523992938533142, + "loss": 0.069, + "theoretical_loss": 3.5455255140395505, + "tokens_seen": 1495269376 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005523190499117317, + "loss": 0.071, + "theoretical_loss": 3.5454711222873554, + "tokens_seen": 1495531520 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005522388059701493, + "loss": 0.0712, + "theoretical_loss": 3.545416742737375, + "tokens_seen": 1495793664 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005521585620285669, + "loss": 0.0714, + "theoretical_loss": 3.5453623753847343, + "tokens_seen": 1496055808 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005520783180869845, + "loss": 0.0687, + "theoretical_loss": 3.54530802022456, + "tokens_seen": 1496317952 + }, + { + "epoch": 0.45, + "learning_rate": 0.000551998074145402, + "loss": 0.0745, + "theoretical_loss": 3.545253677251983, + "tokens_seen": 1496580096 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005519178302038196, + "loss": 0.0698, + "theoretical_loss": 3.5451993464621365, + "tokens_seen": 1496842240 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005518375862622372, + "loss": 0.0689, + "theoretical_loss": 3.5451450278501566, + "tokens_seen": 1497104384 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005517573423206547, + "loss": 0.0737, + "theoretical_loss": 3.545090721411182, + "tokens_seen": 1497366528 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": 9.254212636733428e-05, + "objective/train/docs_used": 547043, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3230732679367065, + "objective/train/original_loss": 1.323073387145996, + "objective/train/theoretical_loss": 3.545063572755053, + "objective/train/tokens_used": 1517957600, + "objective/train/value_avg": -0.0081787109375, + "objective/train/value_loss": 0.000149422645336017, + "objective/train/value_max": -5.519390106201172e-05, + "objective/train/value_min": -0.55078125, + "objective/train/value_reward_corr": 0.7170197682857611, + "objective/train/value_std": 0.01326751708984375, + "objective/train/weight_avg": 1.0001667737960815, + "objective/train/weighted_lm_loss": 1.3230582475662231, + "objective/train/weights_max": 1.6947335004806519, + "objective/train/weights_min": 0.6550559401512146, + "theoretical_loss": 3.545063572755053, + "tokens_seen": 1497497600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005516770983790724, + "loss": 0.071, + "theoretical_loss": 3.545036427140354, + "tokens_seen": 1497628672 + }, + { + "epoch": 0.45, + "learning_rate": 0.00055159685443749, + "loss": 0.0727, + "theoretical_loss": 3.544982145032817, + "tokens_seen": 1497890816 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005515166104959077, + "loss": 0.0696, + "theoretical_loss": 3.5449278750837188, + "tokens_seen": 1498152960 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005514363665543252, + "loss": 0.0734, + "theoretical_loss": 3.5448736172882085, + "tokens_seen": 1498415104 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005513561226127427, + "loss": 0.069, + "theoretical_loss": 3.5448193716414393, + "tokens_seen": 1498677248 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005512758786711604, + "loss": 0.0711, + "theoretical_loss": 3.5447651381385668, + "tokens_seen": 1498939392 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005511956347295779, + "loss": 0.0707, + "theoretical_loss": 3.544710916774749, + "tokens_seen": 1499201536 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005511153907879955, + "loss": 0.07, + "theoretical_loss": 3.5446567075451463, + "tokens_seen": 1499463680 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005510351468464131, + "loss": 0.0713, + "theoretical_loss": 3.544602510444924, + "tokens_seen": 1499725824 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005509549029048307, + "loss": 0.0701, + "theoretical_loss": 3.544548325469247, + "tokens_seen": 1499987968 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005508746589632483, + "loss": 0.0738, + "theoretical_loss": 3.5444941526132863, + "tokens_seen": 1500250112 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005507944150216659, + "loss": 0.0723, + "theoretical_loss": 3.5444399918722134, + "tokens_seen": 1500512256 + }, + { + "epoch": 0.45, + "objective/train/advantage_avg": -5.096226232126355e-05, + "objective/train/docs_used": 548178, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.424842357635498, + "objective/train/original_loss": 1.424842357635498, + "objective/train/theoretical_loss": 3.5443858432412028, + "objective/train/tokens_used": 1521234400, + "objective/train/value_avg": -0.007396697998046875, + "objective/train/value_loss": 0.0001681852008914575, + "objective/train/value_max": -4.869699478149414e-05, + "objective/train/value_min": -0.2218017578125, + "objective/train/value_reward_corr": 0.7244656461983854, + "objective/train/value_std": 0.01325225830078125, + "objective/train/weight_avg": 1.0000311136245728, + "objective/train/weighted_lm_loss": 1.4248158931732178, + "objective/train/weights_max": 1.162267804145813, + "objective/train/weights_min": 0.7341392636299133, + "theoretical_loss": 3.5443858432412028, + "tokens_seen": 1500774400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005507141710800835, + "loss": 0.0683, + "theoretical_loss": 3.5443858432412028, + "tokens_seen": 1500774400 + }, + { + "epoch": 0.45, + "learning_rate": 0.000550633927138501, + "loss": 0.0693, + "theoretical_loss": 3.5443317067154325, + "tokens_seen": 1501036544 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005505536831969187, + "loss": 0.0718, + "theoretical_loss": 3.544277582290083, + "tokens_seen": 1501298688 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005504734392553362, + "loss": 0.0713, + "theoretical_loss": 3.544223469960337, + "tokens_seen": 1501560832 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005503931953137538, + "loss": 0.0722, + "theoretical_loss": 3.5441693697213816, + "tokens_seen": 1501822976 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005503129513721714, + "loss": 0.0713, + "theoretical_loss": 3.5441152815684043, + "tokens_seen": 1502085120 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005502327074305889, + "loss": 0.0697, + "theoretical_loss": 3.5440612054965968, + "tokens_seen": 1502347264 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005501524634890067, + "loss": 0.0722, + "theoretical_loss": 3.544007141501154, + "tokens_seen": 1502609408 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005500722195474242, + "loss": 0.0697, + "theoretical_loss": 3.543953089577272, + "tokens_seen": 1502871552 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005499919756058418, + "loss": 0.0716, + "theoretical_loss": 3.5438990497201512, + "tokens_seen": 1503133696 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005499117316642594, + "loss": 0.0693, + "theoretical_loss": 3.5438450219249935, + "tokens_seen": 1503395840 + }, + { + "epoch": 0.46, + "learning_rate": 0.000549831487722677, + "loss": 0.0687, + "theoretical_loss": 3.5437910061870044, + "tokens_seen": 1503657984 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005497512437810945, + "loss": 0.0709, + "theoretical_loss": 3.543737002501392, + "tokens_seen": 1503920128 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0003146275703329593, + "objective/train/docs_used": 549351, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3942029476165771, + "objective/train/original_loss": 1.3942029476165771, + "objective/train/theoretical_loss": 3.54371000517673, + "objective/train/tokens_used": 1524511200, + "objective/train/value_avg": -0.006191253662109375, + "objective/train/value_loss": 0.00012938177678734064, + "objective/train/value_max": -5.346536636352539e-05, + "objective/train/value_min": -0.2010498046875, + "objective/train/value_reward_corr": 0.6965117820617344, + "objective/train/value_std": 0.01032257080078125, + "objective/train/weight_avg": 1.0003749132156372, + "objective/train/weighted_lm_loss": 1.3946812152862549, + "objective/train/weights_max": 1.142175316810608, + "objective/train/weights_min": 0.3683358430862427, + "theoretical_loss": 3.54371000517673, + "tokens_seen": 1504051200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005496709998395121, + "loss": 0.0703, + "theoretical_loss": 3.5436830108633663, + "tokens_seen": 1504182272 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005495907558979297, + "loss": 0.068, + "theoretical_loss": 3.5436290312681415, + "tokens_seen": 1504444416 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005495105119563472, + "loss": 0.0685, + "theoretical_loss": 3.543575063710933, + "tokens_seen": 1504706560 + }, + { + "epoch": 0.46, + "learning_rate": 0.000549430268014765, + "loss": 0.0703, + "theoretical_loss": 3.543521108186961, + "tokens_seen": 1504968704 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005493500240731825, + "loss": 0.0732, + "theoretical_loss": 3.543467164691445, + "tokens_seen": 1505230848 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005492697801316001, + "loss": 0.0678, + "theoretical_loss": 3.5434132332196113, + "tokens_seen": 1505492992 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005491895361900177, + "loss": 0.0698, + "theoretical_loss": 3.543359313766686, + "tokens_seen": 1505755136 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005491092922484352, + "loss": 0.0704, + "theoretical_loss": 3.543305406327899, + "tokens_seen": 1506017280 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005490290483068528, + "loss": 0.0681, + "theoretical_loss": 3.5432515108984832, + "tokens_seen": 1506279424 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005489488043652704, + "loss": 0.0673, + "theoretical_loss": 3.543197627473673, + "tokens_seen": 1506541568 + }, + { + "epoch": 0.46, + "learning_rate": 0.000548868560423688, + "loss": 0.0713, + "theoretical_loss": 3.543143756048708, + "tokens_seen": 1506803712 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005487883164821056, + "loss": 0.0694, + "theoretical_loss": 3.5430898966188265, + "tokens_seen": 1507065856 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0005419878871180117, + "objective/train/docs_used": 550589, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4911402463912964, + "objective/train/original_loss": 1.491140365600586, + "objective/train/theoretical_loss": 3.543036049179274, + "objective/train/tokens_used": 1527788000, + "objective/train/value_avg": -0.0102386474609375, + "objective/train/value_loss": 0.00021217716857790947, + "objective/train/value_max": -0.00013446807861328125, + "objective/train/value_min": -0.215576171875, + "objective/train/value_reward_corr": 0.7336762770578672, + "objective/train/value_std": 0.01538848876953125, + "objective/train/weight_avg": 1.0006433725357056, + "objective/train/weighted_lm_loss": 1.4918407201766968, + "objective/train/weights_max": 1.1528605222702026, + "objective/train/weights_min": 0.382254034280777, + "theoretical_loss": 3.543036049179274, + "tokens_seen": 1507328000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005487080725405232, + "loss": 0.0707, + "theoretical_loss": 3.543036049179274, + "tokens_seen": 1507328000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005486278285989408, + "loss": 0.0731, + "theoretical_loss": 3.5429822137252955, + "tokens_seen": 1507590144 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005485475846573585, + "loss": 0.0696, + "theoretical_loss": 3.54292839025214, + "tokens_seen": 1507852288 + }, + { + "epoch": 0.46, + "learning_rate": 0.000548467340715776, + "loss": 0.0697, + "theoretical_loss": 3.5428745787550593, + "tokens_seen": 1508114432 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005483870967741935, + "loss": 0.0715, + "theoretical_loss": 3.5428207792293076, + "tokens_seen": 1508376576 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005483068528326112, + "loss": 0.0698, + "theoretical_loss": 3.542766991670142, + "tokens_seen": 1508638720 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005482266088910287, + "loss": 0.0705, + "theoretical_loss": 3.542713216072821, + "tokens_seen": 1508900864 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005481463649494463, + "loss": 0.0722, + "theoretical_loss": 3.542659452432608, + "tokens_seen": 1509163008 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005480661210078639, + "loss": 0.0704, + "theoretical_loss": 3.542605700744768, + "tokens_seen": 1509425152 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005479858770662815, + "loss": 0.0693, + "theoretical_loss": 3.542551961004568, + "tokens_seen": 1509687296 + }, + { + "epoch": 0.46, + "learning_rate": 0.000547905633124699, + "loss": 0.0717, + "theoretical_loss": 3.5424982332072794, + "tokens_seen": 1509949440 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005478253891831167, + "loss": 0.0721, + "theoretical_loss": 3.5424445173481756, + "tokens_seen": 1510211584 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005477451452415343, + "loss": 0.0711, + "theoretical_loss": 3.5423908134225304, + "tokens_seen": 1510473728 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0009087743819691241, + "objective/train/docs_used": 551665, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4157395362854004, + "objective/train/original_loss": 1.4157395362854004, + "objective/train/theoretical_loss": 3.54236396593328, + "objective/train/tokens_used": 1531064800, + "objective/train/value_avg": -0.00485992431640625, + "objective/train/value_loss": 8.174213144229725e-05, + "objective/train/value_max": -3.0934810638427734e-05, + "objective/train/value_min": -0.196044921875, + "objective/train/value_reward_corr": 0.6648075748225439, + "objective/train/value_std": 0.0084686279296875, + "objective/train/weight_avg": 1.0009490251541138, + "objective/train/weighted_lm_loss": 1.417286992073059, + "objective/train/weights_max": 1.1846336126327515, + "objective/train/weights_min": 0.6891753673553467, + "theoretical_loss": 3.54236396593328, + "tokens_seen": 1510604800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005476649012999519, + "loss": 0.0708, + "theoretical_loss": 3.5423371214256245, + "tokens_seen": 1510735872 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005475846573583695, + "loss": 0.0708, + "theoretical_loss": 3.5422834413527378, + "tokens_seen": 1510998016 + }, + { + "epoch": 0.46, + "learning_rate": 0.000547504413416787, + "loss": 0.0717, + "theoretical_loss": 3.5422297731991548, + "tokens_seen": 1511260160 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005474241694752047, + "loss": 0.0687, + "theoretical_loss": 3.542176116960162, + "tokens_seen": 1511522304 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005473439255336222, + "loss": 0.0717, + "theoretical_loss": 3.542122472631048, + "tokens_seen": 1511784448 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005472636815920397, + "loss": 0.0718, + "theoretical_loss": 3.542068840207105, + "tokens_seen": 1512046592 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005471834376504575, + "loss": 0.0695, + "theoretical_loss": 3.5420152196836288, + "tokens_seen": 1512308736 + }, + { + "epoch": 0.46, + "learning_rate": 0.000547103193708875, + "loss": 0.0682, + "theoretical_loss": 3.541961611055915, + "tokens_seen": 1512570880 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005470229497672926, + "loss": 0.07, + "theoretical_loss": 3.5419080143192643, + "tokens_seen": 1512833024 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005469427058257102, + "loss": 0.0704, + "theoretical_loss": 3.541854429468979, + "tokens_seen": 1513095168 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005468624618841278, + "loss": 0.071, + "theoretical_loss": 3.5418008565003647, + "tokens_seen": 1513357312 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005467822179425453, + "loss": 0.0718, + "theoretical_loss": 3.541747295408729, + "tokens_seen": 1513619456 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0005864388658665121, + "objective/train/docs_used": 552897, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2440524101257324, + "objective/train/original_loss": 1.2440524101257324, + "objective/train/theoretical_loss": 3.541693746189383, + "objective/train/tokens_used": 1534341600, + "objective/train/value_avg": -0.0083770751953125, + "objective/train/value_loss": 0.0004337378195486963, + "objective/train/value_max": -5.8770179748535156e-05, + "objective/train/value_min": -0.755859375, + "objective/train/value_reward_corr": 0.6793665458707917, + "objective/train/value_std": 0.017364501953125, + "objective/train/weight_avg": 1.000771403312683, + "objective/train/weighted_lm_loss": 1.244329571723938, + "objective/train/weights_max": 1.7658472061157227, + "objective/train/weights_min": 0.37000179290771484, + "theoretical_loss": 3.541693746189383, + "tokens_seen": 1513881600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005467019740009629, + "loss": 0.07, + "theoretical_loss": 3.541693746189383, + "tokens_seen": 1513881600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005466217300593805, + "loss": 0.0687, + "theoretical_loss": 3.5416402088376397, + "tokens_seen": 1514143744 + }, + { + "epoch": 0.46, + "learning_rate": 0.000546541486117798, + "loss": 0.0701, + "theoretical_loss": 3.5415866833488154, + "tokens_seen": 1514405888 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005464612421762158, + "loss": 0.0707, + "theoretical_loss": 3.541533169718228, + "tokens_seen": 1514668032 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005463809982346333, + "loss": 0.0701, + "theoretical_loss": 3.541479667941199, + "tokens_seen": 1514930176 + }, + { + "epoch": 0.46, + "learning_rate": 0.000546300754293051, + "loss": 0.0707, + "theoretical_loss": 3.5414261780130527, + "tokens_seen": 1515192320 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005462205103514685, + "loss": 0.0699, + "theoretical_loss": 3.5413726999291155, + "tokens_seen": 1515454464 + }, + { + "epoch": 0.46, + "learning_rate": 0.000546140266409886, + "loss": 0.0726, + "theoretical_loss": 3.5413192336847166, + "tokens_seen": 1515716608 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005460600224683037, + "loss": 0.0685, + "theoretical_loss": 3.5412657792751876, + "tokens_seen": 1515978752 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005459797785267212, + "loss": 0.0698, + "theoretical_loss": 3.541212336695863, + "tokens_seen": 1516240896 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005458995345851388, + "loss": 0.0703, + "theoretical_loss": 3.54115890594208, + "tokens_seen": 1516503040 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005458192906435564, + "loss": 0.0677, + "theoretical_loss": 3.5411054870091787, + "tokens_seen": 1516765184 + }, + { + "epoch": 0.46, + "learning_rate": 0.000545739046701974, + "loss": 0.07, + "theoretical_loss": 3.541052079892502, + "tokens_seen": 1517027328 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0009824709268286824, + "objective/train/docs_used": 554075, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4332348108291626, + "objective/train/original_loss": 1.4332348108291626, + "objective/train/theoretical_loss": 3.5410253807637924, + "objective/train/tokens_used": 1537618400, + "objective/train/value_avg": -0.006805419921875, + "objective/train/value_loss": 0.00014581528375856578, + "objective/train/value_max": -9.173154830932617e-05, + "objective/train/value_min": -0.273193359375, + "objective/train/value_reward_corr": 0.6536788814320686, + "objective/train/value_std": 0.01123809814453125, + "objective/train/weight_avg": 1.0010511875152588, + "objective/train/weighted_lm_loss": 1.4342068433761597, + "objective/train/weights_max": 1.1899962425231934, + "objective/train/weights_min": 0.38768166303634644, + "theoretical_loss": 3.5410253807637924, + "tokens_seen": 1517158400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005456588027603916, + "loss": 0.0708, + "theoretical_loss": 3.540998684587394, + "tokens_seen": 1517289472 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005455785588188093, + "loss": 0.0712, + "theoretical_loss": 3.5409453010892022, + "tokens_seen": 1517551616 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005454983148772268, + "loss": 0.0696, + "theoretical_loss": 3.540891929393278, + "tokens_seen": 1517813760 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005454180709356443, + "loss": 0.0703, + "theoretical_loss": 3.540838569494974, + "tokens_seen": 1518075904 + }, + { + "epoch": 0.46, + "learning_rate": 0.000545337826994062, + "loss": 0.0677, + "theoretical_loss": 3.540785221389646, + "tokens_seen": 1518338048 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005452575830524795, + "loss": 0.0719, + "theoretical_loss": 3.5407318850726517, + "tokens_seen": 1518600192 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005451773391108972, + "loss": 0.0713, + "theoretical_loss": 3.5406785605393525, + "tokens_seen": 1518862336 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005450970951693147, + "loss": 0.0714, + "theoretical_loss": 3.540625247785111, + "tokens_seen": 1519124480 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005450168512277323, + "loss": 0.0702, + "theoretical_loss": 3.5405719468052945, + "tokens_seen": 1519386624 + }, + { + "epoch": 0.46, + "learning_rate": 0.00054493660728615, + "loss": 0.0682, + "theoretical_loss": 3.5405186575952716, + "tokens_seen": 1519648768 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005448563633445675, + "loss": 0.0697, + "theoretical_loss": 3.5404653801504127, + "tokens_seen": 1519910912 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005447761194029851, + "loss": 0.0713, + "theoretical_loss": 3.540412114466093, + "tokens_seen": 1520173056 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.001241142163053155, + "objective/train/docs_used": 555119, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.443703293800354, + "objective/train/original_loss": 1.443703532218933, + "objective/train/theoretical_loss": 3.5403588605376886, + "objective/train/tokens_used": 1540895200, + "objective/train/value_avg": -0.01012420654296875, + "objective/train/value_loss": 0.00023096564109437168, + "objective/train/value_max": -6.109476089477539e-05, + "objective/train/value_min": -0.2802734375, + "objective/train/value_reward_corr": 0.8002673033354555, + "objective/train/value_std": 0.019866943359375, + "objective/train/weight_avg": 1.001347541809082, + "objective/train/weighted_lm_loss": 1.4455903768539429, + "objective/train/weights_max": 1.1403095722198486, + "objective/train/weights_min": 0.36870554089546204, + "theoretical_loss": 3.5403588605376886, + "tokens_seen": 1520435200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005446958754614027, + "loss": 0.0681, + "theoretical_loss": 3.5403588605376886, + "tokens_seen": 1520435200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005446156315198203, + "loss": 0.0682, + "theoretical_loss": 3.540305618360578, + "tokens_seen": 1520697344 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005445353875782378, + "loss": 0.0733, + "theoretical_loss": 3.540252387930144, + "tokens_seen": 1520959488 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005444551436366555, + "loss": 0.0689, + "theoretical_loss": 3.540199169241771, + "tokens_seen": 1521221632 + }, + { + "epoch": 0.46, + "learning_rate": 0.000544374899695073, + "loss": 0.0711, + "theoretical_loss": 3.540145962290845, + "tokens_seen": 1521483776 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005442946557534905, + "loss": 0.0693, + "theoretical_loss": 3.5400927670727573, + "tokens_seen": 1521745920 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005442144118119083, + "loss": 0.0708, + "theoretical_loss": 3.5400395835828986, + "tokens_seen": 1522008064 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005441341678703258, + "loss": 0.0726, + "theoretical_loss": 3.539986411816665, + "tokens_seen": 1522270208 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005440539239287434, + "loss": 0.0725, + "theoretical_loss": 3.5399332517694533, + "tokens_seen": 1522532352 + }, + { + "epoch": 0.46, + "learning_rate": 0.000543973679987161, + "loss": 0.0686, + "theoretical_loss": 3.539880103436664, + "tokens_seen": 1522794496 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005438934360455786, + "loss": 0.0696, + "theoretical_loss": 3.5398269668136986, + "tokens_seen": 1523056640 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005438131921039962, + "loss": 0.0677, + "theoretical_loss": 3.539773841895964, + "tokens_seen": 1523318784 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005437329481624137, + "loss": 0.0707, + "theoretical_loss": 3.5397207286788666, + "tokens_seen": 1523580928 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.001080079935491085, + "objective/train/docs_used": 556367, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4472988843917847, + "objective/train/original_loss": 1.4472988843917847, + "objective/train/theoretical_loss": 3.5396941764566225, + "objective/train/tokens_used": 1544172000, + "objective/train/value_avg": -0.006328582763671875, + "objective/train/value_loss": 0.00018607915262691677, + "objective/train/value_max": -0.00011414289474487305, + "objective/train/value_min": -0.2391357421875, + "objective/train/value_reward_corr": 0.6196534618081238, + "objective/train/value_std": 0.01094818115234375, + "objective/train/weight_avg": 1.0011615753173828, + "objective/train/weighted_lm_loss": 1.449265956878662, + "objective/train/weights_max": 1.1805914640426636, + "objective/train/weights_min": 0.3763621747493744, + "theoretical_loss": 3.5396941764566225, + "tokens_seen": 1523712000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005436527042208313, + "loss": 0.0736, + "theoretical_loss": 3.539667627157818, + "tokens_seen": 1523843072 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005435724602792489, + "loss": 0.0704, + "theoretical_loss": 3.53961453732823, + "tokens_seen": 1524105216 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005434922163376666, + "loss": 0.0697, + "theoretical_loss": 3.53956145918552, + "tokens_seen": 1524367360 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005434119723960841, + "loss": 0.0698, + "theoretical_loss": 3.5395083927251045, + "tokens_seen": 1524629504 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005433317284545018, + "loss": 0.0737, + "theoretical_loss": 3.539455337942405, + "tokens_seen": 1524891648 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005432514845129193, + "loss": 0.0679, + "theoretical_loss": 3.5394022948328447, + "tokens_seen": 1525153792 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005431712405713368, + "loss": 0.069, + "theoretical_loss": 3.5393492633918497, + "tokens_seen": 1525415936 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005430909966297545, + "loss": 0.0705, + "theoretical_loss": 3.5392962436148485, + "tokens_seen": 1525678080 + }, + { + "epoch": 0.46, + "learning_rate": 0.000543010752688172, + "loss": 0.0682, + "theoretical_loss": 3.5392432354972723, + "tokens_seen": 1525940224 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005429305087465896, + "loss": 0.0719, + "theoretical_loss": 3.5391902390345544, + "tokens_seen": 1526202368 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005428502648050072, + "loss": 0.0696, + "theoretical_loss": 3.5391372542221315, + "tokens_seen": 1526464512 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005427700208634248, + "loss": 0.0692, + "theoretical_loss": 3.539084281055443, + "tokens_seen": 1526726656 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0010113557800650597, + "objective/train/docs_used": 557485, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3238643407821655, + "objective/train/original_loss": 1.323864459991455, + "objective/train/theoretical_loss": 3.5390313195299283, + "objective/train/tokens_used": 1547448800, + "objective/train/value_avg": -0.00528717041015625, + "objective/train/value_loss": 0.0001374837738694623, + "objective/train/value_max": -6.711483001708984e-05, + "objective/train/value_min": -0.223876953125, + "objective/train/value_reward_corr": 0.6038054569835163, + "objective/train/value_std": 0.00836944580078125, + "objective/train/weight_avg": 1.001071572303772, + "objective/train/weighted_lm_loss": 1.3252609968185425, + "objective/train/weights_max": 1.0974810123443604, + "objective/train/weights_min": 0.368222713470459, + "theoretical_loss": 3.5390313195299283, + "tokens_seen": 1526988800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005426897769218424, + "loss": 0.0711, + "theoretical_loss": 3.5390313195299283, + "tokens_seen": 1526988800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005426095329802601, + "loss": 0.0671, + "theoretical_loss": 3.538978369641033, + "tokens_seen": 1527250944 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005425292890386776, + "loss": 0.0698, + "theoretical_loss": 3.538925431384203, + "tokens_seen": 1527513088 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005424490450970952, + "loss": 0.0703, + "theoretical_loss": 3.538872504754888, + "tokens_seen": 1527775232 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005423688011555128, + "loss": 0.0698, + "theoretical_loss": 3.538819589748539, + "tokens_seen": 1528037376 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005422885572139303, + "loss": 0.0689, + "theoretical_loss": 3.5387666863606104, + "tokens_seen": 1528299520 + }, + { + "epoch": 0.46, + "learning_rate": 0.000542208313272348, + "loss": 0.072, + "theoretical_loss": 3.5387137945865588, + "tokens_seen": 1528561664 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005421280693307655, + "loss": 0.0703, + "theoretical_loss": 3.538660914421844, + "tokens_seen": 1528823808 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005420478253891831, + "loss": 0.072, + "theoretical_loss": 3.5386080458619276, + "tokens_seen": 1529085952 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005419675814476008, + "loss": 0.0709, + "theoretical_loss": 3.538555188902274, + "tokens_seen": 1529348096 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005418873375060183, + "loss": 0.0701, + "theoretical_loss": 3.53850234353835, + "tokens_seen": 1529610240 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005418070935644359, + "loss": 0.0691, + "theoretical_loss": 3.5384495097656252, + "tokens_seen": 1529872384 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005417268496228535, + "loss": 0.0724, + "theoretical_loss": 3.5383966875795716, + "tokens_seen": 1530134528 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.0014327875105664134, + "objective/train/docs_used": 558616, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3117499351501465, + "objective/train/original_loss": 1.3117499351501465, + "objective/train/theoretical_loss": 3.5383702808301325, + "objective/train/tokens_used": 1550725600, + "objective/train/value_avg": -0.00811767578125, + "objective/train/value_loss": 0.0003342017880640924, + "objective/train/value_max": -9.03010368347168e-05, + "objective/train/value_min": -0.2138671875, + "objective/train/value_reward_corr": 0.653793128617924, + "objective/train/value_std": 0.01491546630859375, + "objective/train/weight_avg": 1.0015684366226196, + "objective/train/weighted_lm_loss": 1.3147151470184326, + "objective/train/weights_max": 1.1417571306228638, + "objective/train/weights_min": 0.13709571957588196, + "theoretical_loss": 3.5383702808301325, + "tokens_seen": 1530265600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005416466056812711, + "loss": 0.0719, + "theoretical_loss": 3.538343876975664, + "tokens_seen": 1530396672 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005415663617396886, + "loss": 0.0714, + "theoretical_loss": 3.5382910779493795, + "tokens_seen": 1530658816 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005414861177981063, + "loss": 0.07, + "theoretical_loss": 3.538238290496198, + "tokens_seen": 1530920960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005414058738565238, + "loss": 0.0689, + "theoretical_loss": 3.5381855146116017, + "tokens_seen": 1531183104 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005413256299149413, + "loss": 0.0717, + "theoretical_loss": 3.5381327502910747, + "tokens_seen": 1531445248 + }, + { + "epoch": 0.46, + "learning_rate": 0.000541245385973359, + "loss": 0.071, + "theoretical_loss": 3.538079997530105, + "tokens_seen": 1531707392 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005411651420317766, + "loss": 0.0719, + "theoretical_loss": 3.538027256324182, + "tokens_seen": 1531969536 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005410848980901943, + "loss": 0.0696, + "theoretical_loss": 3.5379745266687985, + "tokens_seen": 1532231680 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005410046541486118, + "loss": 0.0675, + "theoretical_loss": 3.537921808559449, + "tokens_seen": 1532493824 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005409244102070294, + "loss": 0.0699, + "theoretical_loss": 3.537869101991631, + "tokens_seen": 1532755968 + }, + { + "epoch": 0.46, + "learning_rate": 0.000540844166265447, + "loss": 0.0731, + "theoretical_loss": 3.537816406960845, + "tokens_seen": 1533018112 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005407639223238645, + "loss": 0.0714, + "theoretical_loss": 3.537763723462593, + "tokens_seen": 1533280256 + }, + { + "epoch": 0.46, + "objective/train/advantage_avg": 0.00042100262362509966, + "objective/train/docs_used": 559697, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4558749198913574, + "objective/train/original_loss": 1.4558748006820679, + "objective/train/theoretical_loss": 3.537711051492379, + "objective/train/tokens_used": 1554002400, + "objective/train/value_avg": -0.00927734375, + "objective/train/value_loss": 0.0001727818016661331, + "objective/train/value_max": -6.109476089477539e-05, + "objective/train/value_min": -0.333984375, + "objective/train/value_reward_corr": 0.7564583381485481, + "objective/train/value_std": 0.0146026611328125, + "objective/train/weight_avg": 1.0005054473876953, + "objective/train/weighted_lm_loss": 1.4559611082077026, + "objective/train/weights_max": 1.1561311483383179, + "objective/train/weights_min": 0.6095831990242004, + "theoretical_loss": 3.537711051492379, + "tokens_seen": 1533542400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005406836783822821, + "loss": 0.0736, + "theoretical_loss": 3.537711051492379, + "tokens_seen": 1533542400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005406034344406997, + "loss": 0.0709, + "theoretical_loss": 3.5376583910457127, + "tokens_seen": 1533804544 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005405231904991174, + "loss": 0.0711, + "theoretical_loss": 3.537605742118102, + "tokens_seen": 1534066688 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005404429465575349, + "loss": 0.0721, + "theoretical_loss": 3.537553104705061, + "tokens_seen": 1534328832 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005403627026159526, + "loss": 0.0747, + "theoretical_loss": 3.5375004788021043, + "tokens_seen": 1534590976 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005402824586743701, + "loss": 0.0724, + "theoretical_loss": 3.537447864404749, + "tokens_seen": 1534853120 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005402022147327876, + "loss": 0.0713, + "theoretical_loss": 3.5373952615085154, + "tokens_seen": 1535115264 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005401219707912053, + "loss": 0.0696, + "theoretical_loss": 3.5373426701089263, + "tokens_seen": 1535377408 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005400417268496228, + "loss": 0.0728, + "theoretical_loss": 3.5372900902015063, + "tokens_seen": 1535639552 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005399614829080405, + "loss": 0.0733, + "theoretical_loss": 3.537237521781784, + "tokens_seen": 1535901696 + }, + { + "epoch": 0.47, + "learning_rate": 0.000539881238966458, + "loss": 0.072, + "theoretical_loss": 3.537184964845289, + "tokens_seen": 1536163840 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005398009950248756, + "loss": 0.0758, + "theoretical_loss": 3.5371324193875533, + "tokens_seen": 1536425984 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005397207510832933, + "loss": 0.0701, + "theoretical_loss": 3.537079885404113, + "tokens_seen": 1536688128 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.001109464792534709, + "objective/train/docs_used": 561026, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.287007451057434, + "objective/train/original_loss": 1.2870075702667236, + "objective/train/theoretical_loss": 3.5370536227138585, + "objective/train/tokens_used": 1557279200, + "objective/train/value_avg": -0.005340576171875, + "objective/train/value_loss": 0.0006102016777731478, + "objective/train/value_max": -6.109476089477539e-05, + "objective/train/value_min": -0.71337890625, + "objective/train/value_reward_corr": 0.6549939678459968, + "objective/train/value_std": 0.0154876708984375, + "objective/train/weight_avg": 1.0012873411178589, + "objective/train/weighted_lm_loss": 1.2889665365219116, + "objective/train/weights_max": 1.3794907331466675, + "objective/train/weights_min": 0.029097316786646843, + "theoretical_loss": 3.5370536227138585, + "tokens_seen": 1536819200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005396405071417108, + "loss": 0.0732, + "theoretical_loss": 3.5370273628905045, + "tokens_seen": 1536950272 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005395602632001284, + "loss": 0.0706, + "theoretical_loss": 3.5369748518422695, + "tokens_seen": 1537212416 + }, + { + "epoch": 0.47, + "learning_rate": 0.000539480019258546, + "loss": 0.0735, + "theoretical_loss": 3.5369223522549493, + "tokens_seen": 1537474560 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005393997753169636, + "loss": 0.0715, + "theoretical_loss": 3.53686986412409, + "tokens_seen": 1537736704 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005393195313753811, + "loss": 0.0712, + "theoretical_loss": 3.5368173874452378, + "tokens_seen": 1537998848 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005392392874337988, + "loss": 0.0716, + "theoretical_loss": 3.536764922213944, + "tokens_seen": 1538260992 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005391590434922163, + "loss": 0.0711, + "theoretical_loss": 3.536712468425761, + "tokens_seen": 1538523136 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005390787995506339, + "loss": 0.0705, + "theoretical_loss": 3.5366600260762433, + "tokens_seen": 1538785280 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005389985556090516, + "loss": 0.0689, + "theoretical_loss": 3.5366075951609486, + "tokens_seen": 1539047424 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005389183116674691, + "loss": 0.0713, + "theoretical_loss": 3.5365551756754376, + "tokens_seen": 1539309568 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005388380677258867, + "loss": 0.0711, + "theoretical_loss": 3.5365027676152714, + "tokens_seen": 1539571712 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005387578237843043, + "loss": 0.0727, + "theoretical_loss": 3.5364503709760164, + "tokens_seen": 1539833856 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0008831744198687375, + "objective/train/docs_used": 562258, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.411902904510498, + "objective/train/original_loss": 1.4119031429290771, + "objective/train/theoretical_loss": 3.536397985753239, + "objective/train/tokens_used": 1560556000, + "objective/train/value_avg": -0.006740570068359375, + "objective/train/value_loss": 0.00011778049520216882, + "objective/train/value_max": -9.459257125854492e-05, + "objective/train/value_min": -0.2239990234375, + "objective/train/value_reward_corr": 0.6256750582579078, + "objective/train/value_std": 0.010040283203125, + "objective/train/weight_avg": 1.0009410381317139, + "objective/train/weighted_lm_loss": 1.4133871793746948, + "objective/train/weights_max": 1.2288204431533813, + "objective/train/weights_min": 0.6369590759277344, + "theoretical_loss": 3.536397985753239, + "tokens_seen": 1540096000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005386775798427219, + "loss": 0.0733, + "theoretical_loss": 3.536397985753239, + "tokens_seen": 1540096000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005385973359011395, + "loss": 0.0762, + "theoretical_loss": 3.53634561194251, + "tokens_seen": 1540358144 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005385170919595571, + "loss": 0.0726, + "theoretical_loss": 3.5362932495394013, + "tokens_seen": 1540620288 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005384368480179746, + "loss": 0.0697, + "theoretical_loss": 3.5362408985394875, + "tokens_seen": 1540882432 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005383566040763922, + "loss": 0.0712, + "theoretical_loss": 3.5361885589383464, + "tokens_seen": 1541144576 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005382763601348099, + "loss": 0.0723, + "theoretical_loss": 3.5361362307315574, + "tokens_seen": 1541406720 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005381961161932274, + "loss": 0.0728, + "theoretical_loss": 3.5360839139147036, + "tokens_seen": 1541668864 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005381158722516451, + "loss": 0.0715, + "theoretical_loss": 3.5360316084833685, + "tokens_seen": 1541931008 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005380356283100626, + "loss": 0.0705, + "theoretical_loss": 3.53597931443314, + "tokens_seen": 1542193152 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005379553843684802, + "loss": 0.0724, + "theoretical_loss": 3.535927031759608, + "tokens_seen": 1542455296 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005378751404268978, + "loss": 0.0721, + "theoretical_loss": 3.5358747604583636, + "tokens_seen": 1542717440 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005377948964853153, + "loss": 0.0706, + "theoretical_loss": 3.5358225005250024, + "tokens_seen": 1542979584 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005377146525437329, + "loss": 0.0736, + "theoretical_loss": 3.535770251955121, + "tokens_seen": 1543241728 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0005263619241304696, + "objective/train/docs_used": 563510, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4112048149108887, + "objective/train/original_loss": 1.4112045764923096, + "objective/train/theoretical_loss": 3.5357441319301106, + "objective/train/tokens_used": 1563832800, + "objective/train/value_avg": -0.0087127685546875, + "objective/train/value_loss": 0.000459319562651217, + "objective/train/value_max": -3.820657730102539e-05, + "objective/train/value_min": -0.329833984375, + "objective/train/value_reward_corr": 0.6188310861340889, + "objective/train/value_std": 0.01538848876953125, + "objective/train/weight_avg": 1.0007210969924927, + "objective/train/weighted_lm_loss": 1.4122984409332275, + "objective/train/weights_max": 1.3907371759414673, + "objective/train/weights_min": 0.3745975196361542, + "theoretical_loss": 3.5357441319301106, + "tokens_seen": 1543372800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005376344086021505, + "loss": 0.0701, + "theoretical_loss": 3.5357180147443197, + "tokens_seen": 1543503872 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005375541646605681, + "loss": 0.073, + "theoretical_loss": 3.5356657888881986, + "tokens_seen": 1543766016 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005374739207189858, + "loss": 0.0703, + "theoretical_loss": 3.5356135743823636, + "tokens_seen": 1544028160 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005373936767774034, + "loss": 0.0724, + "theoretical_loss": 3.5355613712224203, + "tokens_seen": 1544290304 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005373134328358209, + "loss": 0.0745, + "theoretical_loss": 3.53550917940398, + "tokens_seen": 1544552448 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005372331888942385, + "loss": 0.0692, + "theoretical_loss": 3.535456998922652, + "tokens_seen": 1544814592 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005371529449526561, + "loss": 0.0712, + "theoretical_loss": 3.535404829774052, + "tokens_seen": 1545076736 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005370727010110736, + "loss": 0.0698, + "theoretical_loss": 3.535352671953796, + "tokens_seen": 1545338880 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005369924570694913, + "loss": 0.0725, + "theoretical_loss": 3.5353005254575027, + "tokens_seen": 1545601024 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005369122131279088, + "loss": 0.0769, + "theoretical_loss": 3.5352483902807945, + "tokens_seen": 1545863168 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005368319691863264, + "loss": 0.0719, + "theoretical_loss": 3.535196266419295, + "tokens_seen": 1546125312 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005367517252447441, + "loss": 0.0723, + "theoretical_loss": 3.5351441538686306, + "tokens_seen": 1546387456 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.00033845813595689833, + "objective/train/docs_used": 564726, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3939182758331299, + "objective/train/original_loss": 1.3939182758331299, + "objective/train/theoretical_loss": 3.535092052624429, + "objective/train/tokens_used": 1567109600, + "objective/train/value_avg": -0.00481414794921875, + "objective/train/value_loss": 0.0001568766892887652, + "objective/train/value_max": -4.83393669128418e-05, + "objective/train/value_min": -0.64111328125, + "objective/train/value_reward_corr": 0.6628277956239046, + "objective/train/value_std": 0.01015472412109375, + "objective/train/weight_avg": 1.000410556793213, + "objective/train/weighted_lm_loss": 1.3948057889938354, + "objective/train/weights_max": 1.4327837228775024, + "objective/train/weights_min": 0.3745289444923401, + "theoretical_loss": 3.535092052624429, + "tokens_seen": 1546649600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005366714813031616, + "loss": 0.0688, + "theoretical_loss": 3.535092052624429, + "tokens_seen": 1546649600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005365912373615792, + "loss": 0.0735, + "theoretical_loss": 3.5350399626823226, + "tokens_seen": 1546911744 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005365109934199968, + "loss": 0.0729, + "theoretical_loss": 3.534987884037945, + "tokens_seen": 1547173888 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005364307494784144, + "loss": 0.072, + "theoretical_loss": 3.5349358166869314, + "tokens_seen": 1547436032 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005363505055368319, + "loss": 0.072, + "theoretical_loss": 3.534883760624921, + "tokens_seen": 1547698176 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005362702615952496, + "loss": 0.0764, + "theoretical_loss": 3.534831715847555, + "tokens_seen": 1547960320 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005361900176536671, + "loss": 0.0732, + "theoretical_loss": 3.534779682350475, + "tokens_seen": 1548222464 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005361097737120849, + "loss": 0.0747, + "theoretical_loss": 3.534727660129329, + "tokens_seen": 1548484608 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005360295297705024, + "loss": 0.0711, + "theoretical_loss": 3.534675649179764, + "tokens_seen": 1548746752 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005359492858289199, + "loss": 0.0735, + "theoretical_loss": 3.53462364949743, + "tokens_seen": 1549008896 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005358690418873376, + "loss": 0.0707, + "theoretical_loss": 3.5345716610779814, + "tokens_seen": 1549271040 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005357887979457551, + "loss": 0.072, + "theoretical_loss": 3.5345196839170723, + "tokens_seen": 1549533184 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005357085540041727, + "loss": 0.0738, + "theoretical_loss": 3.534467718010361, + "tokens_seen": 1549795328 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0002879749226849526, + "objective/train/docs_used": 565981, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.505765438079834, + "objective/train/original_loss": 1.5057655572891235, + "objective/train/theoretical_loss": 3.534441739275974, + "objective/train/tokens_used": 1570386400, + "objective/train/value_avg": -0.007205963134765625, + "objective/train/value_loss": 0.00035114213824272156, + "objective/train/value_max": -6.35385513305664e-05, + "objective/train/value_min": -0.79736328125, + "objective/train/value_reward_corr": 0.6967833657591274, + "objective/train/value_std": 0.0176544189453125, + "objective/train/weight_avg": 1.0004457235336304, + "objective/train/weighted_lm_loss": 1.5057909488677979, + "objective/train/weights_max": 1.3775582313537598, + "objective/train/weights_min": 0.3902336657047272, + "theoretical_loss": 3.534441739275974, + "tokens_seen": 1549926400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005356283100625903, + "loss": 0.0716, + "theoretical_loss": 3.5344157633535085, + "tokens_seen": 1550057472 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005355480661210079, + "loss": 0.0723, + "theoretical_loss": 3.5343638199421763, + "tokens_seen": 1550319616 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005354678221794254, + "loss": 0.0732, + "theoretical_loss": 3.5343118877720294, + "tokens_seen": 1550581760 + }, + { + "epoch": 0.47, + "learning_rate": 0.000535387578237843, + "loss": 0.0716, + "theoretical_loss": 3.534259966838736, + "tokens_seen": 1550843904 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005353073342962607, + "loss": 0.0697, + "theoretical_loss": 3.534208057137966, + "tokens_seen": 1551106048 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005352270903546782, + "loss": 0.0728, + "theoretical_loss": 3.5341561586653905, + "tokens_seen": 1551368192 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005351468464130959, + "loss": 0.0693, + "theoretical_loss": 3.5341042714166853, + "tokens_seen": 1551630336 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005350666024715134, + "loss": 0.0703, + "theoretical_loss": 3.5340523953875267, + "tokens_seen": 1551892480 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005349863585299311, + "loss": 0.0707, + "theoretical_loss": 3.5340005305735946, + "tokens_seen": 1552154624 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005349061145883486, + "loss": 0.0686, + "theoretical_loss": 3.533948676970571, + "tokens_seen": 1552416768 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005348258706467661, + "loss": 0.0723, + "theoretical_loss": 3.533896834574139, + "tokens_seen": 1552678912 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005347456267051838, + "loss": 0.0723, + "theoretical_loss": 3.5338450033799864, + "tokens_seen": 1552941056 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": -0.000528849835973233, + "objective/train/docs_used": 567156, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4279303550720215, + "objective/train/original_loss": 1.4279303550720215, + "objective/train/theoretical_loss": 3.533793183383802, + "objective/train/tokens_used": 1573663200, + "objective/train/value_avg": -0.007747650146484375, + "objective/train/value_loss": 0.00032320283935405314, + "objective/train/value_max": -5.650520324707031e-05, + "objective/train/value_min": -0.2115478515625, + "objective/train/value_reward_corr": 0.5905321513810176, + "objective/train/value_std": 0.012298583984375, + "objective/train/weight_avg": 0.999616265296936, + "objective/train/weighted_lm_loss": 1.4269369840621948, + "objective/train/weights_max": 1.101507544517517, + "objective/train/weights_min": 0.3709147274494171, + "theoretical_loss": 3.533793183383802, + "tokens_seen": 1553203200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005346653827636013, + "loss": 0.0695, + "theoretical_loss": 3.533793183383802, + "tokens_seen": 1553203200 + }, + { + "epoch": 0.47, + "learning_rate": 0.000534585138822019, + "loss": 0.0685, + "theoretical_loss": 3.5337413745812767, + "tokens_seen": 1553465344 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005345048948804366, + "loss": 0.0717, + "theoretical_loss": 3.5336895769681043, + "tokens_seen": 1553727488 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005344246509388542, + "loss": 0.0697, + "theoretical_loss": 3.533637790539981, + "tokens_seen": 1553989632 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005343444069972717, + "loss": 0.0672, + "theoretical_loss": 3.533586015292606, + "tokens_seen": 1554251776 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005342641630556893, + "loss": 0.0707, + "theoretical_loss": 3.5335342512216794, + "tokens_seen": 1554513920 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005341839191141069, + "loss": 0.073, + "theoretical_loss": 3.5334824983229045, + "tokens_seen": 1554776064 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005341036751725244, + "loss": 0.0721, + "theoretical_loss": 3.5334307565919874, + "tokens_seen": 1555038208 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005340234312309421, + "loss": 0.0695, + "theoretical_loss": 3.533379026024636, + "tokens_seen": 1555300352 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005339431872893596, + "loss": 0.072, + "theoretical_loss": 3.53332730661656, + "tokens_seen": 1555562496 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005338629433477772, + "loss": 0.0717, + "theoretical_loss": 3.533275598363473, + "tokens_seen": 1555824640 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005337826994061949, + "loss": 0.0677, + "theoretical_loss": 3.53322390126109, + "tokens_seen": 1556086784 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005337024554646124, + "loss": 0.0731, + "theoretical_loss": 3.533172215305129, + "tokens_seen": 1556348928 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.0005209646187722683, + "objective/train/docs_used": 568457, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3047668933868408, + "objective/train/original_loss": 1.3047666549682617, + "objective/train/theoretical_loss": 3.5331463765057185, + "objective/train/tokens_used": 1576940000, + "objective/train/value_avg": -0.006496429443359375, + "objective/train/value_loss": 0.00021889428899157792, + "objective/train/value_max": -7.086992263793945e-05, + "objective/train/value_min": -0.93701171875, + "objective/train/value_reward_corr": 0.6957019665112554, + "objective/train/value_std": 0.013275146484375, + "objective/train/weight_avg": 1.0006130933761597, + "objective/train/weighted_lm_loss": 1.305483102798462, + "objective/train/weights_max": 1.1495888233184814, + "objective/train/weights_min": 0.23035363852977753, + "theoretical_loss": 3.5331463765057185, + "tokens_seen": 1556480000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005336222115230301, + "loss": 0.0681, + "theoretical_loss": 3.533120540491309, + "tokens_seen": 1556611072 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005335419675814476, + "loss": 0.0728, + "theoretical_loss": 3.533068876815352, + "tokens_seen": 1556873216 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005334617236398652, + "loss": 0.0686, + "theoretical_loss": 3.5330172242729834, + "tokens_seen": 1557135360 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005333814796982828, + "loss": 0.072, + "theoretical_loss": 3.53296558285993, + "tokens_seen": 1557397504 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005333012357567004, + "loss": 0.0671, + "theoretical_loss": 3.532913952571921, + "tokens_seen": 1557659648 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005332209918151179, + "loss": 0.0699, + "theoretical_loss": 3.5328623334046885, + "tokens_seen": 1557921792 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005331407478735357, + "loss": 0.0698, + "theoretical_loss": 3.532810725353966, + "tokens_seen": 1558183936 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005330605039319532, + "loss": 0.0722, + "theoretical_loss": 3.5327591284154893, + "tokens_seen": 1558446080 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005329802599903707, + "loss": 0.0692, + "theoretical_loss": 3.5327075425849985, + "tokens_seen": 1558708224 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005329000160487884, + "loss": 0.0703, + "theoretical_loss": 3.5326559678582337, + "tokens_seen": 1558970368 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005328197721072059, + "loss": 0.0701, + "theoretical_loss": 3.532604404230939, + "tokens_seen": 1559232512 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005327395281656235, + "loss": 0.0688, + "theoretical_loss": 3.53255285169886, + "tokens_seen": 1559494656 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 7.476971950381994e-05, + "objective/train/docs_used": 569616, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4254400730133057, + "objective/train/original_loss": 1.4254400730133057, + "objective/train/theoretical_loss": 3.5325013102577443, + "objective/train/tokens_used": 1580216800, + "objective/train/value_avg": -0.008270263671875, + "objective/train/value_loss": 0.00038079096702858806, + "objective/train/value_max": -8.028745651245117e-05, + "objective/train/value_min": -0.393310546875, + "objective/train/value_reward_corr": 0.7088766731470348, + "objective/train/value_std": 0.0188140869140625, + "objective/train/weight_avg": 1.0002461671829224, + "objective/train/weighted_lm_loss": 1.4250798225402832, + "objective/train/weights_max": 1.437957763671875, + "objective/train/weights_min": 0.36907142400741577, + "theoretical_loss": 3.5325013102577443, + "tokens_seen": 1559756800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005326592842240411, + "loss": 0.072, + "theoretical_loss": 3.5325013102577443, + "tokens_seen": 1559756800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005325790402824587, + "loss": 0.0729, + "theoretical_loss": 3.532449779903343, + "tokens_seen": 1560018944 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005324987963408762, + "loss": 0.0693, + "theoretical_loss": 3.5323982606314086, + "tokens_seen": 1560281088 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005324185523992938, + "loss": 0.0721, + "theoretical_loss": 3.5323467524376966, + "tokens_seen": 1560543232 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005323383084577115, + "loss": 0.0715, + "theoretical_loss": 3.532295255317964, + "tokens_seen": 1560805376 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005322580645161291, + "loss": 0.0673, + "theoretical_loss": 3.532243769267971, + "tokens_seen": 1561067520 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005321778205745467, + "loss": 0.0702, + "theoretical_loss": 3.5321922942834796, + "tokens_seen": 1561329664 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005320975766329642, + "loss": 0.0739, + "theoretical_loss": 3.5321408303602544, + "tokens_seen": 1561591808 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005320173326913819, + "loss": 0.0702, + "theoretical_loss": 3.532089377494062, + "tokens_seen": 1561853952 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005319370887497994, + "loss": 0.0754, + "theoretical_loss": 3.532037935680672, + "tokens_seen": 1562116096 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005318568448082169, + "loss": 0.0713, + "theoretical_loss": 3.5319865049158556, + "tokens_seen": 1562378240 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005317766008666346, + "loss": 0.0685, + "theoretical_loss": 3.5319350851953866, + "tokens_seen": 1562640384 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005316963569250521, + "loss": 0.0706, + "theoretical_loss": 3.531883676515041, + "tokens_seen": 1562902528 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": 0.00038564883288927376, + "objective/train/docs_used": 570864, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2100456953048706, + "objective/train/original_loss": 1.210045576095581, + "objective/train/theoretical_loss": 3.5318579763135953, + "objective/train/tokens_used": 1583493600, + "objective/train/value_avg": -0.01094818115234375, + "objective/train/value_loss": 0.00040198001079261303, + "objective/train/value_max": -0.00010389089584350586, + "objective/train/value_min": -0.431884765625, + "objective/train/value_reward_corr": 0.6607573354620048, + "objective/train/value_std": 0.0176239013671875, + "objective/train/weight_avg": 1.0005624294281006, + "objective/train/weighted_lm_loss": 1.2105127573013306, + "objective/train/weights_max": 1.5052146911621094, + "objective/train/weights_min": 0.3692784309387207, + "theoretical_loss": 3.5318579763135953, + "tokens_seen": 1563033600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005316161129834697, + "loss": 0.073, + "theoretical_loss": 3.5318322788705974, + "tokens_seen": 1563164672 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005315358690418874, + "loss": 0.0679, + "theoretical_loss": 3.531780892257837, + "tokens_seen": 1563426816 + }, + { + "epoch": 0.47, + "learning_rate": 0.000531455625100305, + "loss": 0.0692, + "theoretical_loss": 3.5317295166725424, + "tokens_seen": 1563688960 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005313753811587225, + "loss": 0.0689, + "theoretical_loss": 3.5316781521104996, + "tokens_seen": 1563951104 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005312951372171401, + "loss": 0.0687, + "theoretical_loss": 3.5316267985674954, + "tokens_seen": 1564213248 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005312148932755577, + "loss": 0.0694, + "theoretical_loss": 3.5315754560393207, + "tokens_seen": 1564475392 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005311346493339753, + "loss": 0.0687, + "theoretical_loss": 3.5315241245217672, + "tokens_seen": 1564737536 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005310544053923929, + "loss": 0.0698, + "theoretical_loss": 3.5314728040106305, + "tokens_seen": 1564999680 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005309741614508104, + "loss": 0.0729, + "theoretical_loss": 3.531421494501707, + "tokens_seen": 1565261824 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005308939175092282, + "loss": 0.0712, + "theoretical_loss": 3.5313701959907955, + "tokens_seen": 1565523968 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005308136735676457, + "loss": 0.0716, + "theoretical_loss": 3.5313189084736987, + "tokens_seen": 1565786112 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005307334296260632, + "loss": 0.0714, + "theoretical_loss": 3.5312676319462195, + "tokens_seen": 1566048256 + }, + { + "epoch": 0.47, + "objective/train/advantage_avg": -0.000856450293213129, + "objective/train/docs_used": 572070, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4109565019607544, + "objective/train/original_loss": 1.4109562635421753, + "objective/train/theoretical_loss": 3.531216366404165, + "objective/train/tokens_used": 1586770400, + "objective/train/value_avg": -0.0079193115234375, + "objective/train/value_loss": 0.0001714797253953293, + "objective/train/value_max": -3.707408905029297e-05, + "objective/train/value_min": -0.79638671875, + "objective/train/value_reward_corr": 0.8089750589892425, + "objective/train/value_std": 0.01605224609375, + "objective/train/weight_avg": 0.999224066734314, + "objective/train/weighted_lm_loss": 1.409802794456482, + "objective/train/weights_max": 1.2366453409194946, + "objective/train/weights_min": 0.3772245943546295, + "theoretical_loss": 3.531216366404165, + "tokens_seen": 1566310400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005306531856844809, + "loss": 0.0684, + "theoretical_loss": 3.531216366404165, + "tokens_seen": 1566310400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005305729417428984, + "loss": 0.0682, + "theoretical_loss": 3.531165111843343, + "tokens_seen": 1566572544 + }, + { + "epoch": 0.47, + "learning_rate": 0.000530492697801316, + "loss": 0.0709, + "theoretical_loss": 3.531113868259565, + "tokens_seen": 1566834688 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005304124538597336, + "loss": 0.0686, + "theoretical_loss": 3.5310626356486434, + "tokens_seen": 1567096832 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005303322099181512, + "loss": 0.069, + "theoretical_loss": 3.5310114140063944, + "tokens_seen": 1567358976 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005302519659765687, + "loss": 0.0702, + "theoretical_loss": 3.530960203328635, + "tokens_seen": 1567621120 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005301717220349863, + "loss": 0.0703, + "theoretical_loss": 3.5309090036111854, + "tokens_seen": 1567883264 + }, + { + "epoch": 0.48, + "learning_rate": 0.000530091478093404, + "loss": 0.0705, + "theoretical_loss": 3.5308578148498686, + "tokens_seen": 1568145408 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005300112341518215, + "loss": 0.0706, + "theoretical_loss": 3.5308066370405076, + "tokens_seen": 1568407552 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005299309902102392, + "loss": 0.0695, + "theoretical_loss": 3.530755470178931, + "tokens_seen": 1568669696 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005298507462686567, + "loss": 0.0697, + "theoretical_loss": 3.5307043142609666, + "tokens_seen": 1568931840 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005297705023270744, + "loss": 0.0694, + "theoretical_loss": 3.530653169282447, + "tokens_seen": 1569193984 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005296902583854919, + "loss": 0.0689, + "theoretical_loss": 3.5306020352392053, + "tokens_seen": 1569456128 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0017290321411564946, + "objective/train/docs_used": 573246, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3300449848175049, + "objective/train/original_loss": 1.3300449848175049, + "objective/train/theoretical_loss": 3.5305764723170125, + "objective/train/tokens_used": 1590047200, + "objective/train/value_avg": -0.00569915771484375, + "objective/train/value_loss": 0.00010214610665570945, + "objective/train/value_max": -8.153915405273438e-05, + "objective/train/value_min": -0.9501953125, + "objective/train/value_reward_corr": 0.8309916736042006, + "objective/train/value_std": 0.01422119140625, + "objective/train/weight_avg": 1.0017763376235962, + "objective/train/weighted_lm_loss": 1.3328149318695068, + "objective/train/weights_max": 1.5367772579193115, + "objective/train/weights_min": 0.3692037761211395, + "theoretical_loss": 3.5305764723170125, + "tokens_seen": 1569587200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005296100144439095, + "loss": 0.07, + "theoretical_loss": 3.5305509121270777, + "tokens_seen": 1569718272 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005295297705023271, + "loss": 0.0663, + "theoretical_loss": 3.5304997999419028, + "tokens_seen": 1569980416 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005294495265607446, + "loss": 0.0694, + "theoretical_loss": 3.5304486986795203, + "tokens_seen": 1570242560 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005293692826191623, + "loss": 0.07, + "theoretical_loss": 3.5303976083357735, + "tokens_seen": 1570504704 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005292890386775799, + "loss": 0.0649, + "theoretical_loss": 3.5303465289065077, + "tokens_seen": 1570766848 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005292087947359975, + "loss": 0.0674, + "theoretical_loss": 3.5302954603875696, + "tokens_seen": 1571028992 + }, + { + "epoch": 0.48, + "learning_rate": 0.000529128550794415, + "loss": 0.0695, + "theoretical_loss": 3.5302444027748106, + "tokens_seen": 1571291136 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005290483068528327, + "loss": 0.0701, + "theoretical_loss": 3.530193356064081, + "tokens_seen": 1571553280 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005289680629112502, + "loss": 0.0689, + "theoretical_loss": 3.530142320251235, + "tokens_seen": 1571815424 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005288878189696677, + "loss": 0.0688, + "theoretical_loss": 3.5300912953321304, + "tokens_seen": 1572077568 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005288075750280854, + "loss": 0.0686, + "theoretical_loss": 3.530040281302625, + "tokens_seen": 1572339712 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005287273310865029, + "loss": 0.0736, + "theoretical_loss": 3.5299892781585793, + "tokens_seen": 1572601856 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0004105140978936106, + "objective/train/docs_used": 574423, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.474505066871643, + "objective/train/original_loss": 1.4745049476623535, + "objective/train/theoretical_loss": 3.529938285895858, + "objective/train/tokens_used": 1593324000, + "objective/train/value_avg": -0.00910186767578125, + "objective/train/value_loss": 0.00025253358762711287, + "objective/train/value_max": -6.35385513305664e-05, + "objective/train/value_min": -0.323486328125, + "objective/train/value_reward_corr": 0.7612959342784285, + "objective/train/value_std": 0.016571044921875, + "objective/train/weight_avg": 1.0005269050598145, + "objective/train/weighted_lm_loss": 1.4759199619293213, + "objective/train/weights_max": 1.180879831314087, + "objective/train/weights_min": 0.3688413202762604, + "theoretical_loss": 3.529938285895858, + "tokens_seen": 1572864000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005286470871449207, + "loss": 0.0722, + "theoretical_loss": 3.529938285895858, + "tokens_seen": 1572864000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005285668432033382, + "loss": 0.0672, + "theoretical_loss": 3.529887304510326, + "tokens_seen": 1573126144 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005284865992617558, + "loss": 0.0667, + "theoretical_loss": 3.5298363339978502, + "tokens_seen": 1573388288 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005284063553201734, + "loss": 0.0672, + "theoretical_loss": 3.529785374354302, + "tokens_seen": 1573650432 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005283261113785909, + "loss": 0.0688, + "theoretical_loss": 3.529734425575553, + "tokens_seen": 1573912576 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005282458674370085, + "loss": 0.0689, + "theoretical_loss": 3.5296834876574783, + "tokens_seen": 1574174720 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005281656234954261, + "loss": 0.0702, + "theoretical_loss": 3.529632560595954, + "tokens_seen": 1574436864 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005280853795538437, + "loss": 0.0676, + "theoretical_loss": 3.5295816443868593, + "tokens_seen": 1574699008 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005280051356122612, + "loss": 0.0643, + "theoretical_loss": 3.529530739026076, + "tokens_seen": 1574961152 + }, + { + "epoch": 0.48, + "learning_rate": 0.000527924891670679, + "loss": 0.0679, + "theoretical_loss": 3.5294798445094875, + "tokens_seen": 1575223296 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005278446477290965, + "loss": 0.0702, + "theoretical_loss": 3.529428960832979, + "tokens_seen": 1575485440 + }, + { + "epoch": 0.48, + "learning_rate": 0.000527764403787514, + "loss": 0.068, + "theoretical_loss": 3.5293780879924395, + "tokens_seen": 1575747584 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005276841598459317, + "loss": 0.0665, + "theoretical_loss": 3.529327225983759, + "tokens_seen": 1576009728 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.00027555739507079124, + "objective/train/docs_used": 575737, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2302134037017822, + "objective/train/original_loss": 1.2302134037017822, + "objective/train/theoretical_loss": 3.529301799040082, + "objective/train/tokens_used": 1596600800, + "objective/train/value_avg": -0.0084075927734375, + "objective/train/value_loss": 0.00025561146321706474, + "objective/train/value_max": -5.561113357543945e-05, + "objective/train/value_min": -0.5439453125, + "objective/train/value_reward_corr": 0.6844903909527469, + "objective/train/value_std": 0.0140838623046875, + "objective/train/weight_avg": 1.0003907680511475, + "objective/train/weighted_lm_loss": 1.2300145626068115, + "objective/train/weights_max": 1.5279780626296997, + "objective/train/weights_min": 0.3712006211280823, + "theoretical_loss": 3.529301799040082, + "tokens_seen": 1576140800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005276039159043492, + "loss": 0.0684, + "theoretical_loss": 3.5292763748028295, + "tokens_seen": 1576271872 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005275236719627668, + "loss": 0.0684, + "theoretical_loss": 3.5292255344455468, + "tokens_seen": 1576534016 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005274434280211844, + "loss": 0.0676, + "theoretical_loss": 3.529174704907807, + "tokens_seen": 1576796160 + }, + { + "epoch": 0.48, + "learning_rate": 0.000527363184079602, + "loss": 0.0691, + "theoretical_loss": 3.529123886185509, + "tokens_seen": 1577058304 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005272829401380196, + "loss": 0.067, + "theoretical_loss": 3.529073078274556, + "tokens_seen": 1577320448 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005272026961964371, + "loss": 0.068, + "theoretical_loss": 3.5290222811708505, + "tokens_seen": 1577582592 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005271224522548548, + "loss": 0.0702, + "theoretical_loss": 3.528971494870299, + "tokens_seen": 1577844736 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005270422083132724, + "loss": 0.0663, + "theoretical_loss": 3.5289207193688092, + "tokens_seen": 1578106880 + }, + { + "epoch": 0.48, + "learning_rate": 0.00052696196437169, + "loss": 0.0671, + "theoretical_loss": 3.5288699546622913, + "tokens_seen": 1578369024 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005268817204301075, + "loss": 0.07, + "theoretical_loss": 3.528819200746659, + "tokens_seen": 1578631168 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005268014764885252, + "loss": 0.071, + "theoretical_loss": 3.5287684576178258, + "tokens_seen": 1578893312 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005267212325469427, + "loss": 0.071, + "theoretical_loss": 3.5287177252717097, + "tokens_seen": 1579155456 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0005158972926437855, + "objective/train/docs_used": 576827, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.405625343322754, + "objective/train/original_loss": 1.405625581741333, + "objective/train/theoretical_loss": 3.5286670037042303, + "objective/train/tokens_used": 1599877600, + "objective/train/value_avg": -0.0048828125, + "objective/train/value_loss": 0.00019700209668371826, + "objective/train/value_max": -6.556510925292969e-05, + "objective/train/value_min": -0.65673828125, + "objective/train/value_reward_corr": 0.6370804838393163, + "objective/train/value_std": 0.0123443603515625, + "objective/train/weight_avg": 1.0006061792373657, + "objective/train/weighted_lm_loss": 1.4065194129943848, + "objective/train/weights_max": 1.8773905038833618, + "objective/train/weights_min": 0.37096279859542847, + "theoretical_loss": 3.5286670037042303, + "tokens_seen": 1579417600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005266409886053603, + "loss": 0.0701, + "theoretical_loss": 3.5286670037042303, + "tokens_seen": 1579417600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005265607446637779, + "loss": 0.0683, + "theoretical_loss": 3.528616292911309, + "tokens_seen": 1579679744 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005264805007221954, + "loss": 0.0699, + "theoretical_loss": 3.5285655928888686, + "tokens_seen": 1579941888 + }, + { + "epoch": 0.48, + "learning_rate": 0.000526400256780613, + "loss": 0.0688, + "theoretical_loss": 3.5285149036328356, + "tokens_seen": 1580204032 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005263200128390307, + "loss": 0.0695, + "theoretical_loss": 3.528464225139139, + "tokens_seen": 1580466176 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005262397688974483, + "loss": 0.073, + "theoretical_loss": 3.5284135574037085, + "tokens_seen": 1580728320 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005261595249558658, + "loss": 0.0698, + "theoretical_loss": 3.5283629004224766, + "tokens_seen": 1580990464 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005260792810142835, + "loss": 0.0709, + "theoretical_loss": 3.5283122541913787, + "tokens_seen": 1581252608 + }, + { + "epoch": 0.48, + "learning_rate": 0.000525999037072701, + "loss": 0.0687, + "theoretical_loss": 3.5282616187063516, + "tokens_seen": 1581514752 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005259187931311186, + "loss": 0.0687, + "theoretical_loss": 3.528210993963334, + "tokens_seen": 1581776896 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005258385491895362, + "loss": 0.0712, + "theoretical_loss": 3.528160379958268, + "tokens_seen": 1582039040 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005257583052479537, + "loss": 0.0707, + "theoretical_loss": 3.528109776687097, + "tokens_seen": 1582301184 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005256780613063715, + "loss": 0.0721, + "theoretical_loss": 3.528059184145767, + "tokens_seen": 1582563328 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.001599782146513462, + "objective/train/docs_used": 578056, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2233972549438477, + "objective/train/original_loss": 1.2233970165252686, + "objective/train/theoretical_loss": 3.5280338918975263, + "objective/train/tokens_used": 1603154400, + "objective/train/value_avg": -0.00786590576171875, + "objective/train/value_loss": 0.00015577516751363873, + "objective/train/value_max": -6.711483001708984e-05, + "objective/train/value_min": -0.26220703125, + "objective/train/value_reward_corr": 0.7210829404756578, + "objective/train/value_std": 0.01317596435546875, + "objective/train/weight_avg": 1.001671314239502, + "objective/train/weighted_lm_loss": 1.2260372638702393, + "objective/train/weights_max": 1.1339726448059082, + "objective/train/weights_min": 0.3693629503250122, + "theoretical_loss": 3.5280338918975263, + "tokens_seen": 1582694400 + }, + { + "epoch": 0.48, + "learning_rate": 0.000525597817364789, + "loss": 0.0704, + "theoretical_loss": 3.5280086023302264, + "tokens_seen": 1582825472 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005255175734232066, + "loss": 0.0687, + "theoretical_loss": 3.527958031236425, + "tokens_seen": 1583087616 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005254373294816242, + "loss": 0.0694, + "theoretical_loss": 3.527907470860315, + "tokens_seen": 1583349760 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005253570855400417, + "loss": 0.071, + "theoretical_loss": 3.527856921197852, + "tokens_seen": 1583611904 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005252768415984593, + "loss": 0.0709, + "theoretical_loss": 3.5278063822449925, + "tokens_seen": 1583874048 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005251965976568769, + "loss": 0.07, + "theoretical_loss": 3.5277558539976956, + "tokens_seen": 1584136192 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005251163537152945, + "loss": 0.0702, + "theoretical_loss": 3.5277053364519215, + "tokens_seen": 1584398336 + }, + { + "epoch": 0.48, + "learning_rate": 0.000525036109773712, + "loss": 0.0705, + "theoretical_loss": 3.5276548296036356, + "tokens_seen": 1584660480 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005249558658321298, + "loss": 0.0685, + "theoretical_loss": 3.527604333448802, + "tokens_seen": 1584922624 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005248756218905473, + "loss": 0.0687, + "theoretical_loss": 3.527553847983389, + "tokens_seen": 1585184768 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005247953779489648, + "loss": 0.0701, + "theoretical_loss": 3.5275033732033667, + "tokens_seen": 1585446912 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005247151340073825, + "loss": 0.0672, + "theoretical_loss": 3.5274529091047073, + "tokens_seen": 1585709056 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.00040885916678234935, + "objective/train/docs_used": 579278, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2280428409576416, + "objective/train/original_loss": 1.2280428409576416, + "objective/train/theoretical_loss": 3.527402455683385, + "objective/train/tokens_used": 1606431200, + "objective/train/value_avg": -0.00798797607421875, + "objective/train/value_loss": 0.00015769110177643597, + "objective/train/value_max": -3.820657730102539e-05, + "objective/train/value_min": -0.3349609375, + "objective/train/value_reward_corr": 0.7601393100857727, + "objective/train/value_std": 0.01552581787109375, + "objective/train/weight_avg": 1.0004829168319702, + "objective/train/weighted_lm_loss": 1.2284908294677734, + "objective/train/weights_max": 1.20417058467865, + "objective/train/weights_min": 0.36865562200546265, + "theoretical_loss": 3.527402455683385, + "tokens_seen": 1585971200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005246348900658, + "loss": 0.0678, + "theoretical_loss": 3.527402455683385, + "tokens_seen": 1585971200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005245546461242177, + "loss": 0.0697, + "theoretical_loss": 3.527352012935377, + "tokens_seen": 1586233344 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005244744021826352, + "loss": 0.0695, + "theoretical_loss": 3.527301580856661, + "tokens_seen": 1586495488 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005243941582410528, + "loss": 0.0702, + "theoretical_loss": 3.527251159443219, + "tokens_seen": 1586757632 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005243139142994704, + "loss": 0.0723, + "theoretical_loss": 3.5272007486910333, + "tokens_seen": 1587019776 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005242336703578879, + "loss": 0.0701, + "theoretical_loss": 3.5271503485960896, + "tokens_seen": 1587281920 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005241534264163056, + "loss": 0.0694, + "theoretical_loss": 3.5270999591543752, + "tokens_seen": 1587544064 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005240731824747232, + "loss": 0.0686, + "theoretical_loss": 3.5270495803618793, + "tokens_seen": 1587806208 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005239929385331408, + "loss": 0.0656, + "theoretical_loss": 3.5269992122145952, + "tokens_seen": 1588068352 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005239126945915583, + "loss": 0.068, + "theoretical_loss": 3.526948854708515, + "tokens_seen": 1588330496 + }, + { + "epoch": 0.48, + "learning_rate": 0.000523832450649976, + "loss": 0.0688, + "theoretical_loss": 3.526898507839636, + "tokens_seen": 1588592640 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005237522067083935, + "loss": 0.0674, + "theoretical_loss": 3.526848171603956, + "tokens_seen": 1588854784 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005236719627668111, + "loss": 0.0705, + "theoretical_loss": 3.526797845997476, + "tokens_seen": 1589116928 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": -0.00024388304154854268, + "objective/train/docs_used": 580493, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3449385166168213, + "objective/train/original_loss": 1.3449382781982422, + "objective/train/theoretical_loss": 3.5267726871789367, + "objective/train/tokens_used": 1609708000, + "objective/train/value_avg": -0.00757598876953125, + "objective/train/value_loss": 0.0001988656003959477, + "objective/train/value_max": -6.252527236938477e-05, + "objective/train/value_min": -0.48291015625, + "objective/train/value_reward_corr": 0.8828137850946742, + "objective/train/value_std": 0.02032470703125, + "objective/train/weight_avg": 0.9998489618301392, + "objective/train/weighted_lm_loss": 1.3448597192764282, + "objective/train/weights_max": 1.1375212669372559, + "objective/train/weights_min": 0.38701966404914856, + "theoretical_loss": 3.5267726871789367, + "tokens_seen": 1589248000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005235917188252287, + "loss": 0.0669, + "theoretical_loss": 3.5267475310161984, + "tokens_seen": 1589379072 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005235114748836462, + "loss": 0.0713, + "theoretical_loss": 3.5266972266561276, + "tokens_seen": 1589641216 + }, + { + "epoch": 0.48, + "learning_rate": 0.000523431230942064, + "loss": 0.0673, + "theoretical_loss": 3.526646932913271, + "tokens_seen": 1589903360 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005233509870004815, + "loss": 0.067, + "theoretical_loss": 3.5265966497836376, + "tokens_seen": 1590165504 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005232707430588991, + "loss": 0.0727, + "theoretical_loss": 3.5265463772632386, + "tokens_seen": 1590427648 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005231904991173167, + "loss": 0.0681, + "theoretical_loss": 3.5264961153480874, + "tokens_seen": 1590689792 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005231102551757343, + "loss": 0.0706, + "theoretical_loss": 3.5264458640342, + "tokens_seen": 1590951936 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005230300112341518, + "loss": 0.0703, + "theoretical_loss": 3.5263956233175935, + "tokens_seen": 1591214080 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005229497672925694, + "loss": 0.067, + "theoretical_loss": 3.5263453931942883, + "tokens_seen": 1591476224 + }, + { + "epoch": 0.48, + "learning_rate": 0.000522869523350987, + "loss": 0.0673, + "theoretical_loss": 3.5262951736603063, + "tokens_seen": 1591738368 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005227892794094045, + "loss": 0.0691, + "theoretical_loss": 3.5262449647116716, + "tokens_seen": 1592000512 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005227090354678223, + "loss": 0.0665, + "theoretical_loss": 3.5261947663444104, + "tokens_seen": 1592262656 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 3.907005884684622e-05, + "objective/train/docs_used": 581545, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5011039972305298, + "objective/train/original_loss": 1.5011038780212402, + "objective/train/theoretical_loss": 3.5261445785545513, + "objective/train/tokens_used": 1612984800, + "objective/train/value_avg": -0.006710052490234375, + "objective/train/value_loss": 0.00017771663260646164, + "objective/train/value_max": -6.502866744995117e-05, + "objective/train/value_min": -0.21435546875, + "objective/train/value_reward_corr": 0.706507157381232, + "objective/train/value_std": 0.01238250732421875, + "objective/train/weight_avg": 1.0001215934753418, + "objective/train/weighted_lm_loss": 1.5013684034347534, + "objective/train/weights_max": 1.153806447982788, + "objective/train/weights_min": 0.36828702688217163, + "theoretical_loss": 3.5261445785545513, + "tokens_seen": 1592524800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005226287915262398, + "loss": 0.0679, + "theoretical_loss": 3.5261445785545513, + "tokens_seen": 1592524800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005225485475846574, + "loss": 0.069, + "theoretical_loss": 3.526094401338125, + "tokens_seen": 1592786944 + }, + { + "epoch": 0.48, + "learning_rate": 0.000522468303643075, + "loss": 0.0696, + "theoretical_loss": 3.526044234691165, + "tokens_seen": 1593049088 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005223880597014925, + "loss": 0.0682, + "theoretical_loss": 3.525994078609705, + "tokens_seen": 1593311232 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005223078157599101, + "loss": 0.0696, + "theoretical_loss": 3.525943933089782, + "tokens_seen": 1593573376 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005222275718183277, + "loss": 0.0717, + "theoretical_loss": 3.5258937981274365, + "tokens_seen": 1593835520 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005221473278767453, + "loss": 0.0689, + "theoretical_loss": 3.5258436737187084, + "tokens_seen": 1594097664 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005220670839351629, + "loss": 0.0672, + "theoretical_loss": 3.5257935598596424, + "tokens_seen": 1594359808 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005219868399935806, + "loss": 0.0676, + "theoretical_loss": 3.5257434565462833, + "tokens_seen": 1594621952 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005219065960519981, + "loss": 0.0695, + "theoretical_loss": 3.5256933637746792, + "tokens_seen": 1594884096 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005218263521104157, + "loss": 0.0652, + "theoretical_loss": 3.5256432815408796, + "tokens_seen": 1595146240 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005217461081688333, + "loss": 0.0661, + "theoretical_loss": 3.5255932098409364, + "tokens_seen": 1595408384 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005216658642272508, + "loss": 0.0679, + "theoretical_loss": 3.5255431486709043, + "tokens_seen": 1595670528 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.0021843239665031433, + "objective/train/docs_used": 582715, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3551676273345947, + "objective/train/original_loss": 1.3551673889160156, + "objective/train/theoretical_loss": 3.5255181220333722, + "objective/train/tokens_used": 1616261600, + "objective/train/value_avg": -0.011962890625, + "objective/train/value_loss": 0.0005959582631476223, + "objective/train/value_max": -6.556510925292969e-05, + "objective/train/value_min": -0.79541015625, + "objective/train/value_reward_corr": 0.6982835942676264, + "objective/train/value_std": 0.0255889892578125, + "objective/train/weight_avg": 1.0024514198303223, + "objective/train/weighted_lm_loss": 1.3577196598052979, + "objective/train/weights_max": 1.8855996131896973, + "objective/train/weights_min": 0.37003564834594727, + "theoretical_loss": 3.5255181220333722, + "tokens_seen": 1595801600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005215856202856685, + "loss": 0.0651, + "theoretical_loss": 3.525493098026839, + "tokens_seen": 1595932672 + }, + { + "epoch": 0.48, + "learning_rate": 0.000521505376344086, + "loss": 0.0692, + "theoretical_loss": 3.5254430579047993, + "tokens_seen": 1596194816 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005214251324025036, + "loss": 0.0659, + "theoretical_loss": 3.5253930283008454, + "tokens_seen": 1596456960 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005213448884609212, + "loss": 0.069, + "theoretical_loss": 3.5253430092110403, + "tokens_seen": 1596719104 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005212646445193387, + "loss": 0.067, + "theoretical_loss": 3.5252930006314482, + "tokens_seen": 1596981248 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005211844005777564, + "loss": 0.0679, + "theoretical_loss": 3.5252430025581356, + "tokens_seen": 1597243392 + }, + { + "epoch": 0.48, + "learning_rate": 0.000521104156636174, + "loss": 0.0678, + "theoretical_loss": 3.5251930149871726, + "tokens_seen": 1597505536 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005210239126945916, + "loss": 0.0675, + "theoretical_loss": 3.5251430379146296, + "tokens_seen": 1597767680 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005209436687530092, + "loss": 0.0674, + "theoretical_loss": 3.5250930713365802, + "tokens_seen": 1598029824 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005208634248114268, + "loss": 0.0655, + "theoretical_loss": 3.525043115249099, + "tokens_seen": 1598291968 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005207831808698443, + "loss": 0.07, + "theoretical_loss": 3.5249931696482637, + "tokens_seen": 1598554112 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005207029369282619, + "loss": 0.0699, + "theoretical_loss": 3.524943234530154, + "tokens_seen": 1598816256 + }, + { + "epoch": 0.48, + "objective/train/advantage_avg": 0.00034000459709204733, + "objective/train/docs_used": 583998, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4760903120040894, + "objective/train/original_loss": 1.4760903120040894, + "objective/train/theoretical_loss": 3.5248933098908517, + "objective/train/tokens_used": 1619538400, + "objective/train/value_avg": -0.006893157958984375, + "objective/train/value_loss": 0.00016073322331067175, + "objective/train/value_max": -5.739927291870117e-05, + "objective/train/value_min": -0.345703125, + "objective/train/value_reward_corr": 0.6807590254059671, + "objective/train/value_std": 0.01171112060546875, + "objective/train/weight_avg": 1.0004159212112427, + "objective/train/weighted_lm_loss": 1.4770082235336304, + "objective/train/weights_max": 1.1568527221679688, + "objective/train/weights_min": 0.40742918848991394, + "theoretical_loss": 3.5248933098908517, + "tokens_seen": 1599078400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005206226929866795, + "loss": 0.0688, + "theoretical_loss": 3.5248933098908517, + "tokens_seen": 1599078400 + }, + { + "epoch": 0.48, + "learning_rate": 0.000520542449045097, + "loss": 0.0682, + "theoretical_loss": 3.5248433957264402, + "tokens_seen": 1599340544 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005204622051035148, + "loss": 0.0703, + "theoretical_loss": 3.5247934920330053, + "tokens_seen": 1599602688 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005203819611619323, + "loss": 0.0703, + "theoretical_loss": 3.5247435988066353, + "tokens_seen": 1599864832 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005203017172203499, + "loss": 0.0694, + "theoretical_loss": 3.52469371604342, + "tokens_seen": 1600126976 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005202214732787675, + "loss": 0.0692, + "theoretical_loss": 3.524643843739452, + "tokens_seen": 1600389120 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005201412293371851, + "loss": 0.0727, + "theoretical_loss": 3.524593981890825, + "tokens_seen": 1600651264 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005200609853956026, + "loss": 0.069, + "theoretical_loss": 3.524544130493635, + "tokens_seen": 1600913408 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005199807414540202, + "loss": 0.0686, + "theoretical_loss": 3.5244942895439815, + "tokens_seen": 1601175552 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005199004975124378, + "loss": 0.069, + "theoretical_loss": 3.524444459037965, + "tokens_seen": 1601437696 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005198202535708553, + "loss": 0.0686, + "theoretical_loss": 3.5243946389716867, + "tokens_seen": 1601699840 + }, + { + "epoch": 0.49, + "learning_rate": 0.000519740009629273, + "loss": 0.0705, + "theoretical_loss": 3.5243448293412527, + "tokens_seen": 1601961984 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005196597656876906, + "loss": 0.0706, + "theoretical_loss": 3.5242950301427696, + "tokens_seen": 1602224128 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0010038897162303329, + "objective/train/docs_used": 585126, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3920528888702393, + "objective/train/original_loss": 1.3920525312423706, + "objective/train/theoretical_loss": 3.524270134454293, + "objective/train/tokens_used": 1622815200, + "objective/train/value_avg": -0.006145477294921875, + "objective/train/value_loss": 0.00022318839910440147, + "objective/train/value_max": -3.737211227416992e-05, + "objective/train/value_min": -0.71484375, + "objective/train/value_reward_corr": 0.6453517865349159, + "objective/train/value_std": 0.01385498046875, + "objective/train/weight_avg": 1.0011008977890015, + "objective/train/weighted_lm_loss": 1.3941224813461304, + "objective/train/weights_max": 1.4599730968475342, + "objective/train/weights_min": 0.3827209174633026, + "theoretical_loss": 3.524270134454293, + "tokens_seen": 1602355200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005195795217461083, + "loss": 0.0675, + "theoretical_loss": 3.5242452413723457, + "tokens_seen": 1602486272 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005194992778045258, + "loss": 0.069, + "theoretical_loss": 3.5241954630260923, + "tokens_seen": 1602748416 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005194190338629433, + "loss": 0.0655, + "theoretical_loss": 3.524145695100123, + "tokens_seen": 1603010560 + }, + { + "epoch": 0.49, + "learning_rate": 0.000519338789921361, + "loss": 0.0686, + "theoretical_loss": 3.5240959375905527, + "tokens_seen": 1603272704 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005192585459797785, + "loss": 0.0689, + "theoretical_loss": 3.5240461904934977, + "tokens_seen": 1603534848 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005191783020381961, + "loss": 0.0696, + "theoretical_loss": 3.5239964538050788, + "tokens_seen": 1603796992 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005190980580966137, + "loss": 0.0692, + "theoretical_loss": 3.5239467275214165, + "tokens_seen": 1604059136 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005190178141550314, + "loss": 0.0707, + "theoretical_loss": 3.523897011638635, + "tokens_seen": 1604321280 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005189375702134489, + "loss": 0.0691, + "theoretical_loss": 3.5238473061528586, + "tokens_seen": 1604583424 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005188573262718665, + "loss": 0.0659, + "theoretical_loss": 3.5237976110602163, + "tokens_seen": 1604845568 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005187770823302841, + "loss": 0.0715, + "theoretical_loss": 3.523747926356837, + "tokens_seen": 1605107712 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005186968383887016, + "loss": 0.0686, + "theoretical_loss": 3.523698252038853, + "tokens_seen": 1605369856 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.00131322315428406, + "objective/train/docs_used": 586391, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3650020360946655, + "objective/train/original_loss": 1.365002155303955, + "objective/train/theoretical_loss": 3.5236485881023976, + "objective/train/tokens_used": 1626092000, + "objective/train/value_avg": -0.00806427001953125, + "objective/train/value_loss": 0.0001866552047431469, + "objective/train/value_max": -8.612871170043945e-05, + "objective/train/value_min": -0.99072265625, + "objective/train/value_reward_corr": 0.7294081039501898, + "objective/train/value_std": 0.01560211181640625, + "objective/train/weight_avg": 1.001397728919983, + "objective/train/weighted_lm_loss": 1.3666753768920898, + "objective/train/weights_max": 1.173264503479004, + "objective/train/weights_min": 0.36904817819595337, + "theoretical_loss": 3.5236485881023976, + "tokens_seen": 1605632000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005186165944471193, + "loss": 0.0676, + "theoretical_loss": 3.5236485881023976, + "tokens_seen": 1605632000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005185363505055368, + "loss": 0.0671, + "theoretical_loss": 3.5235989345436076, + "tokens_seen": 1605894144 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005184561065639545, + "loss": 0.0693, + "theoretical_loss": 3.52354929135862, + "tokens_seen": 1606156288 + }, + { + "epoch": 0.49, + "learning_rate": 0.000518375862622372, + "loss": 0.0665, + "theoretical_loss": 3.5234996585435754, + "tokens_seen": 1606418432 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005182956186807895, + "loss": 0.0687, + "theoretical_loss": 3.5234500360946157, + "tokens_seen": 1606680576 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005182153747392073, + "loss": 0.0663, + "theoretical_loss": 3.5234004240078853, + "tokens_seen": 1606942720 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005181351307976248, + "loss": 0.0691, + "theoretical_loss": 3.5233508222795304, + "tokens_seen": 1607204864 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005180548868560424, + "loss": 0.0673, + "theoretical_loss": 3.5233012309057, + "tokens_seen": 1607467008 + }, + { + "epoch": 0.49, + "learning_rate": 0.00051797464291446, + "loss": 0.0685, + "theoretical_loss": 3.5232516498825426, + "tokens_seen": 1607729152 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005178943989728776, + "loss": 0.0706, + "theoretical_loss": 3.5232020792062126, + "tokens_seen": 1607991296 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005178141550312951, + "loss": 0.067, + "theoretical_loss": 3.523152518872864, + "tokens_seen": 1608253440 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005177339110897127, + "loss": 0.0695, + "theoretical_loss": 3.5231029688786526, + "tokens_seen": 1608515584 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005176536671481303, + "loss": 0.0692, + "theoretical_loss": 3.523053429219738, + "tokens_seen": 1608777728 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": -0.00025140075013041496, + "objective/train/docs_used": 587617, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.6009109020233154, + "objective/train/original_loss": 1.6009111404418945, + "objective/train/theoretical_loss": 3.5230286632648165, + "objective/train/tokens_used": 1629368800, + "objective/train/value_avg": -0.00858306884765625, + "objective/train/value_loss": 0.0004015047161374241, + "objective/train/value_max": -7.033348083496094e-05, + "objective/train/value_min": -0.63232421875, + "objective/train/value_reward_corr": 0.6128993489929545, + "objective/train/value_std": 0.01490020751953125, + "objective/train/weight_avg": 0.9999117851257324, + "objective/train/weighted_lm_loss": 1.6008124351501465, + "objective/train/weights_max": 1.3433096408843994, + "objective/train/weights_min": 0.2304520606994629, + "theoretical_loss": 3.5230286632648165, + "tokens_seen": 1608908800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005175734232065478, + "loss": 0.0742, + "theoretical_loss": 3.52300389989228, + "tokens_seen": 1609039872 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005174931792649656, + "loss": 0.0716, + "theoretical_loss": 3.5229543808924415, + "tokens_seen": 1609302016 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005174129353233831, + "loss": 0.0684, + "theoretical_loss": 3.522904872216388, + "tokens_seen": 1609564160 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005173326913818007, + "loss": 0.069, + "theoretical_loss": 3.5228553738602857, + "tokens_seen": 1609826304 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005172524474402183, + "loss": 0.0706, + "theoretical_loss": 3.5228058858203033, + "tokens_seen": 1610088448 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005171722034986359, + "loss": 0.0704, + "theoretical_loss": 3.5227564080926115, + "tokens_seen": 1610350592 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005170919595570535, + "loss": 0.0696, + "theoretical_loss": 3.5227069406733844, + "tokens_seen": 1610612736 + }, + { + "epoch": 0.49, + "learning_rate": 0.000517011715615471, + "loss": 0.0709, + "theoretical_loss": 3.5226574835587963, + "tokens_seen": 1610874880 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005169314716738886, + "loss": 0.0676, + "theoretical_loss": 3.522608036745024, + "tokens_seen": 1611137024 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005168512277323062, + "loss": 0.0698, + "theoretical_loss": 3.5225586002282467, + "tokens_seen": 1611399168 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005167709837907239, + "loss": 0.0702, + "theoretical_loss": 3.5225091740046457, + "tokens_seen": 1611661312 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005166907398491414, + "loss": 0.072, + "theoretical_loss": 3.5224597580704033, + "tokens_seen": 1611923456 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": -0.00028426916105672717, + "objective/train/docs_used": 588885, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4351452589035034, + "objective/train/original_loss": 1.4351451396942139, + "objective/train/theoretical_loss": 3.5224103524217067, + "objective/train/tokens_used": 1632645600, + "objective/train/value_avg": -0.0109710693359375, + "objective/train/value_loss": 0.0004222493735142052, + "objective/train/value_max": -5.7816505432128906e-05, + "objective/train/value_min": -0.90771484375, + "objective/train/value_reward_corr": 0.8208013011530193, + "objective/train/value_std": 0.0279388427734375, + "objective/train/weight_avg": 0.9999049305915833, + "objective/train/weighted_lm_loss": 1.4349756240844727, + "objective/train/weights_max": 1.4129830598831177, + "objective/train/weights_min": 0.37088075280189514, + "theoretical_loss": 3.5224103524217067, + "tokens_seen": 1612185600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005166104959075591, + "loss": 0.0694, + "theoretical_loss": 3.5224103524217067, + "tokens_seen": 1612185600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005165302519659766, + "loss": 0.0706, + "theoretical_loss": 3.522360957054741, + "tokens_seen": 1612447744 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005164500080243941, + "loss": 0.0668, + "theoretical_loss": 3.5223115719656963, + "tokens_seen": 1612709888 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005163697640828118, + "loss": 0.0705, + "theoretical_loss": 3.5222621971507646, + "tokens_seen": 1612972032 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005162895201412293, + "loss": 0.0696, + "theoretical_loss": 3.522212832606138, + "tokens_seen": 1613234176 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005162092761996469, + "loss": 0.0714, + "theoretical_loss": 3.5221634783280122, + "tokens_seen": 1613496320 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005161290322580645, + "loss": 0.0691, + "theoretical_loss": 3.5221141343125852, + "tokens_seen": 1613758464 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005160487883164821, + "loss": 0.0713, + "theoretical_loss": 3.5220648005560555, + "tokens_seen": 1614020608 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005159685443748997, + "loss": 0.075, + "theoretical_loss": 3.522015477054625, + "tokens_seen": 1614282752 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005158883004333173, + "loss": 0.0729, + "theoretical_loss": 3.521966163804497, + "tokens_seen": 1614544896 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005158080564917349, + "loss": 0.0707, + "theoretical_loss": 3.521916860801877, + "tokens_seen": 1614807040 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005157278125501525, + "loss": 0.0705, + "theoretical_loss": 3.521867568042973, + "tokens_seen": 1615069184 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005156475686085701, + "loss": 0.0701, + "theoretical_loss": 3.5218182855239935, + "tokens_seen": 1615331328 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.001808401197195053, + "objective/train/docs_used": 590099, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3354768753051758, + "objective/train/original_loss": 1.3354768753051758, + "objective/train/theoretical_loss": 3.5217936481032917, + "objective/train/tokens_used": 1635922400, + "objective/train/value_avg": -0.01221466064453125, + "objective/train/value_loss": 0.0007001345511525869, + "objective/train/value_max": -5.346536636352539e-05, + "objective/train/value_min": -0.861328125, + "objective/train/value_reward_corr": 0.7123812003133756, + "objective/train/value_std": 0.0258026123046875, + "objective/train/weight_avg": 1.002112865447998, + "objective/train/weighted_lm_loss": 1.3375283479690552, + "objective/train/weights_max": 1.4017969369888306, + "objective/train/weights_min": 0.3730887472629547, + "theoretical_loss": 3.5217936481032917, + "tokens_seen": 1615462400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005155673246669876, + "loss": 0.0682, + "theoretical_loss": 3.5217690132411508, + "tokens_seen": 1615593472 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005154870807254053, + "loss": 0.0709, + "theoretical_loss": 3.521719751190658, + "tokens_seen": 1615855616 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005154068367838228, + "loss": 0.0698, + "theoretical_loss": 3.5216704993687307, + "tokens_seen": 1616117760 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005153265928422403, + "loss": 0.0694, + "theoretical_loss": 3.5216212577715873, + "tokens_seen": 1616379904 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005152463489006581, + "loss": 0.0711, + "theoretical_loss": 3.5215720263954458, + "tokens_seen": 1616642048 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005151661049590756, + "loss": 0.0693, + "theoretical_loss": 3.521522805236529, + "tokens_seen": 1616904192 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005150858610174932, + "loss": 0.0725, + "theoretical_loss": 3.52147359429106, + "tokens_seen": 1617166336 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005150056170759108, + "loss": 0.071, + "theoretical_loss": 3.5214243935552654, + "tokens_seen": 1617428480 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005149253731343284, + "loss": 0.0663, + "theoretical_loss": 3.5213752030253715, + "tokens_seen": 1617690624 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005148451291927459, + "loss": 0.0699, + "theoretical_loss": 3.5213260226976084, + "tokens_seen": 1617952768 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005147648852511635, + "loss": 0.0703, + "theoretical_loss": 3.5212768525682074, + "tokens_seen": 1618214912 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005146846413095811, + "loss": 0.0714, + "theoretical_loss": 3.5212276926334027, + "tokens_seen": 1618477056 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0004180370888207108, + "objective/train/docs_used": 591377, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.235845923423767, + "objective/train/original_loss": 1.235845923423767, + "objective/train/theoretical_loss": 3.5211785428894293, + "objective/train/tokens_used": 1639199200, + "objective/train/value_avg": -0.0059814453125, + "objective/train/value_loss": 0.00012016700202366337, + "objective/train/value_max": -7.426738739013672e-05, + "objective/train/value_min": -0.394775390625, + "objective/train/value_reward_corr": 0.6571299259985038, + "objective/train/value_std": 0.00959014892578125, + "objective/train/weight_avg": 1.000473976135254, + "objective/train/weighted_lm_loss": 1.2364455461502075, + "objective/train/weights_max": 1.16512930393219, + "objective/train/weights_min": 0.39590752124786377, + "theoretical_loss": 3.5211785428894293, + "tokens_seen": 1618739200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005146043973679986, + "loss": 0.0673, + "theoretical_loss": 3.5211785428894293, + "tokens_seen": 1618739200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005145241534264164, + "loss": 0.072, + "theoretical_loss": 3.5211294033325258, + "tokens_seen": 1619001344 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005144439094848339, + "loss": 0.0686, + "theoretical_loss": 3.5210802739589306, + "tokens_seen": 1619263488 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005143636655432516, + "loss": 0.0696, + "theoretical_loss": 3.521031154764886, + "tokens_seen": 1619525632 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005142834216016691, + "loss": 0.0708, + "theoretical_loss": 3.520982045746636, + "tokens_seen": 1619787776 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005142031776600867, + "loss": 0.0699, + "theoretical_loss": 3.5209329469004254, + "tokens_seen": 1620049920 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005141229337185043, + "loss": 0.0715, + "theoretical_loss": 3.5208838582225024, + "tokens_seen": 1620312064 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005140426897769218, + "loss": 0.0686, + "theoretical_loss": 3.5208347797091157, + "tokens_seen": 1620574208 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005139624458353394, + "loss": 0.0681, + "theoretical_loss": 3.5207857113565177, + "tokens_seen": 1620836352 + }, + { + "epoch": 0.49, + "learning_rate": 0.000513882201893757, + "loss": 0.0693, + "theoretical_loss": 3.520736653160962, + "tokens_seen": 1621098496 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005138019579521747, + "loss": 0.0694, + "theoretical_loss": 3.520687605118704, + "tokens_seen": 1621360640 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005137217140105922, + "loss": 0.0722, + "theoretical_loss": 3.520638567226001, + "tokens_seen": 1621622784 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005136414700690099, + "loss": 0.0697, + "theoretical_loss": 3.5205895394791127, + "tokens_seen": 1621884928 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.00047303320025093853, + "objective/train/docs_used": 592532, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3057827949523926, + "objective/train/original_loss": 1.3057827949523926, + "objective/train/theoretical_loss": 3.5205650294091804, + "objective/train/tokens_used": 1642476000, + "objective/train/value_avg": -0.0077362060546875, + "objective/train/value_loss": 0.0004286039329599589, + "objective/train/value_max": -2.9981136322021484e-05, + "objective/train/value_min": -0.9453125, + "objective/train/value_reward_corr": 0.7418295034563557, + "objective/train/value_std": 0.023773193359375, + "objective/train/weight_avg": 1.0006773471832275, + "objective/train/weighted_lm_loss": 1.3059204816818237, + "objective/train/weights_max": 2.067409038543701, + "objective/train/weights_min": 0.39820975065231323, + "theoretical_loss": 3.5205650294091804, + "tokens_seen": 1622016000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005135612261274274, + "loss": 0.0687, + "theoretical_loss": 3.5205405218743007, + "tokens_seen": 1622147072 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005134809821858449, + "loss": 0.0701, + "theoretical_loss": 3.520491514407828, + "tokens_seen": 1622409216 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005134007382442626, + "loss": 0.07, + "theoretical_loss": 3.520442517075961, + "tokens_seen": 1622671360 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005133204943026801, + "loss": 0.0681, + "theoretical_loss": 3.5203935298749656, + "tokens_seen": 1622933504 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005132402503610978, + "loss": 0.0688, + "theoretical_loss": 3.520344552801113, + "tokens_seen": 1623195648 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005131600064195153, + "loss": 0.0706, + "theoretical_loss": 3.5202955858506737, + "tokens_seen": 1623457792 + }, + { + "epoch": 0.49, + "learning_rate": 0.000513079762477933, + "loss": 0.0725, + "theoretical_loss": 3.520246629019921, + "tokens_seen": 1623719936 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005129995185363506, + "loss": 0.0694, + "theoretical_loss": 3.5201976823051306, + "tokens_seen": 1623982080 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005129192745947681, + "loss": 0.0709, + "theoretical_loss": 3.5201487457025795, + "tokens_seen": 1624244224 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005128390306531857, + "loss": 0.0695, + "theoretical_loss": 3.5200998192085473, + "tokens_seen": 1624506368 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005127587867116033, + "loss": 0.069, + "theoretical_loss": 3.5200509028193148, + "tokens_seen": 1624768512 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005126785427700209, + "loss": 0.0722, + "theoretical_loss": 3.5200019965311657, + "tokens_seen": 1625030656 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0004936844925396144, + "objective/train/docs_used": 593743, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.600699782371521, + "objective/train/original_loss": 1.6006999015808105, + "objective/train/theoretical_loss": 3.519953100340385, + "objective/train/tokens_used": 1645752800, + "objective/train/value_avg": -0.007678985595703125, + "objective/train/value_loss": 0.00025552220176905394, + "objective/train/value_max": -6.657838821411133e-05, + "objective/train/value_min": -0.8681640625, + "objective/train/value_reward_corr": 0.7401646371099073, + "objective/train/value_std": 0.01708984375, + "objective/train/weight_avg": 1.0006078481674194, + "objective/train/weighted_lm_loss": 1.602151870727539, + "objective/train/weights_max": 1.651138186454773, + "objective/train/weights_min": 0.36892783641815186, + "theoretical_loss": 3.519953100340385, + "tokens_seen": 1625292800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005125982988284384, + "loss": 0.0712, + "theoretical_loss": 3.519953100340385, + "tokens_seen": 1625292800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005125180548868561, + "loss": 0.0729, + "theoretical_loss": 3.51990421424326, + "tokens_seen": 1625554944 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005124378109452736, + "loss": 0.0703, + "theoretical_loss": 3.519855338236079, + "tokens_seen": 1625817088 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005123575670036911, + "loss": 0.0687, + "theoretical_loss": 3.5198064723151345, + "tokens_seen": 1626079232 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005122773230621089, + "loss": 0.0707, + "theoretical_loss": 3.519757616476719, + "tokens_seen": 1626341376 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005121970791205264, + "loss": 0.0692, + "theoretical_loss": 3.519708770717126, + "tokens_seen": 1626603520 + }, + { + "epoch": 0.49, + "learning_rate": 0.000512116835178944, + "loss": 0.0718, + "theoretical_loss": 3.519659935032655, + "tokens_seen": 1626865664 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005120365912373616, + "loss": 0.0709, + "theoretical_loss": 3.5196111094196034, + "tokens_seen": 1627127808 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005119563472957792, + "loss": 0.0684, + "theoretical_loss": 3.5195622938742726, + "tokens_seen": 1627389952 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005118761033541968, + "loss": 0.0712, + "theoretical_loss": 3.519513488392965, + "tokens_seen": 1627652096 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005117958594126143, + "loss": 0.0706, + "theoretical_loss": 3.5194646929719853, + "tokens_seen": 1627914240 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005117156154710319, + "loss": 0.0706, + "theoretical_loss": 3.519415907607641, + "tokens_seen": 1628176384 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005116353715294495, + "loss": 0.0684, + "theoretical_loss": 3.5193671322962397, + "tokens_seen": 1628438528 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.00031531660351902246, + "objective/train/docs_used": 595003, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.356420874595642, + "objective/train/original_loss": 1.3564209938049316, + "objective/train/theoretical_loss": 3.5193427484092403, + "objective/train/tokens_used": 1649029600, + "objective/train/value_avg": -0.00811004638671875, + "objective/train/value_loss": 0.0006286876741796732, + "objective/train/value_max": -0.00010639429092407227, + "objective/train/value_min": -0.9814453125, + "objective/train/value_reward_corr": 0.7337605809007626, + "objective/train/value_std": 0.029541015625, + "objective/train/weight_avg": 1.0006129741668701, + "objective/train/weighted_lm_loss": 1.3556339740753174, + "objective/train/weights_max": 2.1884546279907227, + "objective/train/weights_min": 0.3697238564491272, + "theoretical_loss": 3.5193427484092403, + "tokens_seen": 1628569600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005115551275878672, + "loss": 0.0703, + "theoretical_loss": 3.519318367034093, + "tokens_seen": 1628700672 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005114748836462847, + "loss": 0.0692, + "theoretical_loss": 3.519269611817513, + "tokens_seen": 1628962816 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005113946397047024, + "loss": 0.0712, + "theoretical_loss": 3.5192208666428146, + "tokens_seen": 1629224960 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005113143957631199, + "loss": 0.0702, + "theoretical_loss": 3.5191721315063136, + "tokens_seen": 1629487104 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005112341518215374, + "loss": 0.0683, + "theoretical_loss": 3.5191234064043293, + "tokens_seen": 1629749248 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005111539078799551, + "loss": 0.0678, + "theoretical_loss": 3.5190746913331816, + "tokens_seen": 1630011392 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005110736639383726, + "loss": 0.0706, + "theoretical_loss": 3.5190259862891926, + "tokens_seen": 1630273536 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005109934199967902, + "loss": 0.0684, + "theoretical_loss": 3.518977291268686, + "tokens_seen": 1630535680 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005109131760552078, + "loss": 0.071, + "theoretical_loss": 3.5189286062679894, + "tokens_seen": 1630797824 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005108329321136255, + "loss": 0.0697, + "theoretical_loss": 3.51887993128343, + "tokens_seen": 1631059968 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005107526881720431, + "loss": 0.0661, + "theoretical_loss": 3.518831266311339, + "tokens_seen": 1631322112 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005106724442304607, + "loss": 0.0704, + "theoretical_loss": 3.518782611348046, + "tokens_seen": 1631584256 + }, + { + "epoch": 0.49, + "objective/train/advantage_avg": 0.0003234239702578634, + "objective/train/docs_used": 596193, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4978060722351074, + "objective/train/original_loss": 1.4978063106536865, + "objective/train/theoretical_loss": 3.5187339663898873, + "objective/train/tokens_used": 1652306400, + "objective/train/value_avg": -0.005161285400390625, + "objective/train/value_loss": 9.56832809606567e-05, + "objective/train/value_max": -6.973743438720703e-05, + "objective/train/value_min": -0.171630859375, + "objective/train/value_reward_corr": 0.634942167538914, + "objective/train/value_std": 0.0083770751953125, + "objective/train/weight_avg": 1.0003665685653687, + "objective/train/weighted_lm_loss": 1.4985394477844238, + "objective/train/weights_max": 1.1682624816894531, + "objective/train/weights_min": 0.36887577176094055, + "theoretical_loss": 3.5187339663898873, + "tokens_seen": 1631846400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005105922002888782, + "loss": 0.0707, + "theoretical_loss": 3.5187339663898873, + "tokens_seen": 1631846400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005105119563472958, + "loss": 0.0709, + "theoretical_loss": 3.5186853314331974, + "tokens_seen": 1632108544 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005104317124057134, + "loss": 0.0703, + "theoretical_loss": 3.5186367064743145, + "tokens_seen": 1632370688 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005103514684641309, + "loss": 0.073, + "theoretical_loss": 3.518588091509578, + "tokens_seen": 1632632832 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005102712245225486, + "loss": 0.0695, + "theoretical_loss": 3.51853948653533, + "tokens_seen": 1632894976 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005101909805809661, + "loss": 0.0689, + "theoretical_loss": 3.5184908915479145, + "tokens_seen": 1633157120 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005101107366393837, + "loss": 0.0696, + "theoretical_loss": 3.5184423065436756, + "tokens_seen": 1633419264 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005100304926978014, + "loss": 0.0694, + "theoretical_loss": 3.5183937315189615, + "tokens_seen": 1633681408 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005099502487562189, + "loss": 0.0679, + "theoretical_loss": 3.5183451664701217, + "tokens_seen": 1633943552 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005098700048146365, + "loss": 0.0693, + "theoretical_loss": 3.5182966113935072, + "tokens_seen": 1634205696 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005097897608730541, + "loss": 0.0696, + "theoretical_loss": 3.518248066285471, + "tokens_seen": 1634467840 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005097095169314717, + "loss": 0.0676, + "theoretical_loss": 3.518199531142369, + "tokens_seen": 1634729984 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005096292729898892, + "loss": 0.0705, + "theoretical_loss": 3.518151005960557, + "tokens_seen": 1634992128 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0016112735029309988, + "objective/train/docs_used": 597373, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4085406064987183, + "objective/train/original_loss": 1.4085404872894287, + "objective/train/theoretical_loss": 3.518126747103997, + "objective/train/tokens_used": 1655583200, + "objective/train/value_avg": -0.00795745849609375, + "objective/train/value_loss": 0.0002556704275775701, + "objective/train/value_max": -5.692243576049805e-05, + "objective/train/value_min": -0.71630859375, + "objective/train/value_reward_corr": 0.7167812131379807, + "objective/train/value_std": 0.0161590576171875, + "objective/train/weight_avg": 1.0017259120941162, + "objective/train/weighted_lm_loss": 1.4109013080596924, + "objective/train/weights_max": 1.4989804029464722, + "objective/train/weights_min": 0.3708694577217102, + "theoretical_loss": 3.518126747103997, + "tokens_seen": 1635123200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005095490290483069, + "loss": 0.0698, + "theoretical_loss": 3.5181024907363945, + "tokens_seen": 1635254272 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005094687851067244, + "loss": 0.0692, + "theoretical_loss": 3.518053985466243, + "tokens_seen": 1635516416 + }, + { + "epoch": 0.5, + "learning_rate": 0.000509388541165142, + "loss": 0.0661, + "theoretical_loss": 3.518005490146464, + "tokens_seen": 1635778560 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005093082972235597, + "loss": 0.0658, + "theoretical_loss": 3.5179570047734225, + "tokens_seen": 1636040704 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005092280532819772, + "loss": 0.0732, + "theoretical_loss": 3.5179085293434857, + "tokens_seen": 1636302848 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005091478093403949, + "loss": 0.0652, + "theoretical_loss": 3.517860063853022, + "tokens_seen": 1636564992 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005090675653988124, + "loss": 0.0695, + "theoretical_loss": 3.517811608298401, + "tokens_seen": 1636827136 + }, + { + "epoch": 0.5, + "learning_rate": 0.00050898732145723, + "loss": 0.0707, + "theoretical_loss": 3.5177631626759958, + "tokens_seen": 1637089280 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005089070775156476, + "loss": 0.0664, + "theoretical_loss": 3.5177147269821805, + "tokens_seen": 1637351424 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005088268335740651, + "loss": 0.0676, + "theoretical_loss": 3.517666301213331, + "tokens_seen": 1637613568 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005087465896324827, + "loss": 0.069, + "theoretical_loss": 3.5176178853658246, + "tokens_seen": 1637875712 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005086663456909003, + "loss": 0.0714, + "theoretical_loss": 3.5175694794360424, + "tokens_seen": 1638137856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7474626180525646, + "debugging/entropy-1-grams": 5.599013278347353, + "debugging/length": 499.42857142857144, + "debugging/num_segments": 14, + "debugging/raw_token_scores_avg": 0.007199673913419247, + "debugging/raw_token_scores_std": 0.028890423476696014, + "debugging/score": 0.0110880360129877, + "debugging/score_std": 0.01573414919444883, + "epoch": 0.5, + "objective/train/advantage_avg": 0.0007880714256316423, + "objective/train/docs_used": 598618, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3821892738342285, + "objective/train/original_loss": 1.3821892738342285, + "objective/train/theoretical_loss": 3.5175210834203656, + "objective/train/tokens_used": 1658860000, + "objective/train/value_avg": -0.00799560546875, + "objective/train/value_loss": 0.00038576649967581034, + "objective/train/value_max": -6.502866744995117e-05, + "objective/train/value_min": -0.85009765625, + "objective/train/value_reward_corr": 0.7350670241397752, + "objective/train/value_std": 0.0201873779296875, + "objective/train/weight_avg": 1.0009615421295166, + "objective/train/weighted_lm_loss": 1.383286952972412, + "objective/train/weights_max": 1.962690830230713, + "objective/train/weights_min": 0.3717447817325592, + "theoretical_loss": 3.5175210834203656, + "tokens_seen": 1638400000 + }, + { + "epoch": 0.5, + "learning_rate": 0.000508586101749318, + "loss": 0.0705, + "theoretical_loss": 3.5175210834203656, + "tokens_seen": 1638400000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005085058578077355, + "loss": 0.0736, + "theoretical_loss": 3.5174726973151778, + "tokens_seen": 1638662144 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005084256138661532, + "loss": 0.068, + "theoretical_loss": 3.517424321116865, + "tokens_seen": 1638924288 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005083453699245707, + "loss": 0.0709, + "theoretical_loss": 3.517375954821815, + "tokens_seen": 1639186432 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005082651259829882, + "loss": 0.0691, + "theoretical_loss": 3.517327598426416, + "tokens_seen": 1639448576 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005081848820414059, + "loss": 0.0684, + "theoretical_loss": 3.5172792519270604, + "tokens_seen": 1639710720 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005081046380998234, + "loss": 0.0715, + "theoretical_loss": 3.517230915320141, + "tokens_seen": 1639972864 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005080243941582411, + "loss": 0.0699, + "theoretical_loss": 3.5171825886020525, + "tokens_seen": 1640235008 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005079441502166586, + "loss": 0.0673, + "theoretical_loss": 3.5171342717691925, + "tokens_seen": 1640497152 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005078639062750763, + "loss": 0.0675, + "theoretical_loss": 3.517085964817959, + "tokens_seen": 1640759296 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005077836623334939, + "loss": 0.0694, + "theoretical_loss": 3.517037667744754, + "tokens_seen": 1641021440 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005077034183919115, + "loss": 0.0688, + "theoretical_loss": 3.516989380545979, + "tokens_seen": 1641283584 + }, + { + "epoch": 0.5, + "learning_rate": 0.000507623174450329, + "loss": 0.0708, + "theoretical_loss": 3.5169411032180387, + "tokens_seen": 1641545728 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -0.000734575791284442, + "objective/train/docs_used": 599774, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3677737712860107, + "objective/train/original_loss": 1.3677736520767212, + "objective/train/theoretical_loss": 3.5169169682545087, + "objective/train/tokens_used": 1662136800, + "objective/train/value_avg": -0.0098114013671875, + "objective/train/value_loss": 0.0007994266925379634, + "objective/train/value_max": -0.00010228157043457031, + "objective/train/value_min": -0.90771484375, + "objective/train/value_reward_corr": 0.8485356054895296, + "objective/train/value_std": 0.0374755859375, + "objective/train/weight_avg": 0.9996302127838135, + "objective/train/weighted_lm_loss": 1.3674414157867432, + "objective/train/weights_max": 1.9520905017852783, + "objective/train/weights_min": 0.3693559169769287, + "theoretical_loss": 3.5169169682545087, + "tokens_seen": 1641676800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005075429305087466, + "loss": 0.0713, + "theoretical_loss": 3.51689283575734, + "tokens_seen": 1641807872 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005074626865671642, + "loss": 0.0723, + "theoretical_loss": 3.516844578160291, + "tokens_seen": 1642070016 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005073824426255817, + "loss": 0.0683, + "theoretical_loss": 3.5167963304233014, + "tokens_seen": 1642332160 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005073021986839994, + "loss": 0.0701, + "theoretical_loss": 3.516748092542784, + "tokens_seen": 1642594304 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005072219547424169, + "loss": 0.0718, + "theoretical_loss": 3.5166998645151515, + "tokens_seen": 1642856448 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005071417108008345, + "loss": 0.0722, + "theoretical_loss": 3.516651646336821, + "tokens_seen": 1643118592 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005070614668592522, + "loss": 0.0703, + "theoretical_loss": 3.5166034380042093, + "tokens_seen": 1643380736 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005069812229176697, + "loss": 0.0701, + "theoretical_loss": 3.5165552395137363, + "tokens_seen": 1643642880 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005069009789760874, + "loss": 0.068, + "theoretical_loss": 3.516507050861823, + "tokens_seen": 1643905024 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005068207350345049, + "loss": 0.0671, + "theoretical_loss": 3.5164588720448937, + "tokens_seen": 1644167168 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005067404910929225, + "loss": 0.07, + "theoretical_loss": 3.5164107030593725, + "tokens_seen": 1644429312 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005066602471513401, + "loss": 0.0706, + "theoretical_loss": 3.516362543901687, + "tokens_seen": 1644691456 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0013407982187345624, + "objective/train/docs_used": 601010, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.416961908340454, + "objective/train/original_loss": 1.416961908340454, + "objective/train/theoretical_loss": 3.5163143945682656, + "objective/train/tokens_used": 1665413600, + "objective/train/value_avg": -0.0066070556640625, + "objective/train/value_loss": 9.873360977508128e-05, + "objective/train/value_max": -0.00011146068572998047, + "objective/train/value_min": -0.261474609375, + "objective/train/value_reward_corr": 0.7415103919665526, + "objective/train/value_std": 0.0123443603515625, + "objective/train/weight_avg": 1.0013898611068726, + "objective/train/weighted_lm_loss": 1.4189093112945557, + "objective/train/weights_max": 1.2988438606262207, + "objective/train/weights_min": 0.7572005987167358, + "theoretical_loss": 3.5163143945682656, + "tokens_seen": 1644953600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005065800032097577, + "loss": 0.0723, + "theoretical_loss": 3.5163143945682656, + "tokens_seen": 1644953600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005064997592681752, + "loss": 0.0714, + "theoretical_loss": 3.5162662550555392, + "tokens_seen": 1645215744 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005064195153265928, + "loss": 0.0721, + "theoretical_loss": 3.5162181253599405, + "tokens_seen": 1645477888 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005063392713850105, + "loss": 0.0704, + "theoretical_loss": 3.516170005477904, + "tokens_seen": 1645740032 + }, + { + "epoch": 0.5, + "learning_rate": 0.000506259027443428, + "loss": 0.0699, + "theoretical_loss": 3.516121895405866, + "tokens_seen": 1646002176 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005061787835018457, + "loss": 0.0732, + "theoretical_loss": 3.516073795140265, + "tokens_seen": 1646264320 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005060985395602632, + "loss": 0.0692, + "theoretical_loss": 3.5160257046775407, + "tokens_seen": 1646526464 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005060182956186808, + "loss": 0.0697, + "theoretical_loss": 3.515977624014135, + "tokens_seen": 1646788608 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005059380516770984, + "loss": 0.0681, + "theoretical_loss": 3.515929553146492, + "tokens_seen": 1647050752 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005058578077355159, + "loss": 0.068, + "theoretical_loss": 3.515881492071057, + "tokens_seen": 1647312896 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005057775637939335, + "loss": 0.0705, + "theoretical_loss": 3.515833440784278, + "tokens_seen": 1647575040 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005056973198523511, + "loss": 0.0689, + "theoretical_loss": 3.5157853992826036, + "tokens_seen": 1647837184 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005056170759107688, + "loss": 0.0696, + "theoretical_loss": 3.5157373675624854, + "tokens_seen": 1648099328 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0007609121967107058, + "objective/train/docs_used": 602144, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.442240595817566, + "objective/train/original_loss": 1.4422404766082764, + "objective/train/theoretical_loss": 3.5157133553694013, + "objective/train/tokens_used": 1668690400, + "objective/train/value_avg": -0.0084991455078125, + "objective/train/value_loss": 0.0003012235974892974, + "objective/train/value_max": -0.00011771917343139648, + "objective/train/value_min": -0.34228515625, + "objective/train/value_reward_corr": 0.6924308598838854, + "objective/train/value_std": 0.0149688720703125, + "objective/train/weight_avg": 1.0008883476257324, + "objective/train/weighted_lm_loss": 1.4430891275405884, + "objective/train/weights_max": 1.3021063804626465, + "objective/train/weights_min": 0.22860285639762878, + "theoretical_loss": 3.5157133553694013, + "tokens_seen": 1648230400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005055368319691864, + "loss": 0.0712, + "theoretical_loss": 3.515689345620377, + "tokens_seen": 1648361472 + }, + { + "epoch": 0.5, + "learning_rate": 0.000505456588027604, + "loss": 0.0711, + "theoretical_loss": 3.5156413334527317, + "tokens_seen": 1648623616 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005053763440860215, + "loss": 0.0697, + "theoretical_loss": 3.515593331056008, + "tokens_seen": 1648885760 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005052961001444391, + "loss": 0.068, + "theoretical_loss": 3.5155453384266635, + "tokens_seen": 1649147904 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005052158562028567, + "loss": 0.0679, + "theoretical_loss": 3.515497355561159, + "tokens_seen": 1649410048 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005051356122612742, + "loss": 0.0712, + "theoretical_loss": 3.515449382455957, + "tokens_seen": 1649672192 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005050553683196919, + "loss": 0.07, + "theoretical_loss": 3.515401419107521, + "tokens_seen": 1649934336 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005049751243781094, + "loss": 0.0682, + "theoretical_loss": 3.515353465512317, + "tokens_seen": 1650196480 + }, + { + "epoch": 0.5, + "learning_rate": 0.000504894880436527, + "loss": 0.0721, + "theoretical_loss": 3.5153055216668134, + "tokens_seen": 1650458624 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005048146364949447, + "loss": 0.0703, + "theoretical_loss": 3.51525758756748, + "tokens_seen": 1650720768 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005047343925533623, + "loss": 0.0691, + "theoretical_loss": 3.5152096632107876, + "tokens_seen": 1650982912 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005046541486117798, + "loss": 0.0704, + "theoretical_loss": 3.5151617485932096, + "tokens_seen": 1651245056 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.00040685301064513624, + "objective/train/docs_used": 603335, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3096529245376587, + "objective/train/original_loss": 1.3096530437469482, + "objective/train/theoretical_loss": 3.5151138437112213, + "objective/train/tokens_used": 1671967200, + "objective/train/value_avg": -0.0088348388671875, + "objective/train/value_loss": 0.0004952080198563635, + "objective/train/value_max": -9.101629257202148e-05, + "objective/train/value_min": -0.8935546875, + "objective/train/value_reward_corr": 0.6091750570438847, + "objective/train/value_std": 0.01788330078125, + "objective/train/weight_avg": 1.0006325244903564, + "objective/train/weighted_lm_loss": 1.3091342449188232, + "objective/train/weights_max": 2.221797466278076, + "objective/train/weights_min": 0.3713989555835724, + "theoretical_loss": 3.5151138437112213, + "tokens_seen": 1651507200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005045739046701974, + "loss": 0.0665, + "theoretical_loss": 3.5151138437112213, + "tokens_seen": 1651507200 + }, + { + "epoch": 0.5, + "learning_rate": 0.000504493660728615, + "loss": 0.0714, + "theoretical_loss": 3.5150659485613005, + "tokens_seen": 1651769344 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005044134167870326, + "loss": 0.0692, + "theoretical_loss": 3.5150180631399244, + "tokens_seen": 1652031488 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005043331728454502, + "loss": 0.069, + "theoretical_loss": 3.5149701874435753, + "tokens_seen": 1652293632 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005042529289038677, + "loss": 0.074, + "theoretical_loss": 3.514922321468734, + "tokens_seen": 1652555776 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005041726849622855, + "loss": 0.0711, + "theoretical_loss": 3.5148744652118866, + "tokens_seen": 1652817920 + }, + { + "epoch": 0.5, + "learning_rate": 0.000504092441020703, + "loss": 0.0711, + "theoretical_loss": 3.5148266186695185, + "tokens_seen": 1653080064 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005040121970791205, + "loss": 0.069, + "theoretical_loss": 3.5147787818381175, + "tokens_seen": 1653342208 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005039319531375382, + "loss": 0.0696, + "theoretical_loss": 3.514730954714173, + "tokens_seen": 1653604352 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005038517091959557, + "loss": 0.0694, + "theoretical_loss": 3.5146831372941776, + "tokens_seen": 1653866496 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005037714652543733, + "loss": 0.0705, + "theoretical_loss": 3.5146353295746247, + "tokens_seen": 1654128640 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005036912213127909, + "loss": 0.0687, + "theoretical_loss": 3.514587531552009, + "tokens_seen": 1654390784 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005036109773712085, + "loss": 0.0681, + "theoretical_loss": 3.5145397432228274, + "tokens_seen": 1654652928 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0005927335005253553, + "objective/train/docs_used": 604403, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4796189069747925, + "objective/train/original_loss": 1.479619026184082, + "objective/train/theoretical_loss": 3.5145158526921807, + "objective/train/tokens_used": 1675244000, + "objective/train/value_avg": -0.009033203125, + "objective/train/value_loss": 0.00033400507527403533, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.697265625, + "objective/train/value_reward_corr": 0.7117968222736691, + "objective/train/value_std": 0.0211029052734375, + "objective/train/weight_avg": 1.0007562637329102, + "objective/train/weighted_lm_loss": 1.4803884029388428, + "objective/train/weights_max": 1.8430651426315308, + "objective/train/weights_min": 0.5630186200141907, + "theoretical_loss": 3.5145158526921807, + "tokens_seen": 1654784000 + }, + { + "epoch": 0.5, + "learning_rate": 0.000503530733429626, + "loss": 0.0722, + "theoretical_loss": 3.5144919645835797, + "tokens_seen": 1654915072 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005034504894880436, + "loss": 0.068, + "theoretical_loss": 3.514444195630766, + "tokens_seen": 1655177216 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005033702455464613, + "loss": 0.0682, + "theoretical_loss": 3.5143964363608893, + "tokens_seen": 1655439360 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005032900016048788, + "loss": 0.0719, + "theoretical_loss": 3.514348686770454, + "tokens_seen": 1655701504 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005032097576632965, + "loss": 0.0682, + "theoretical_loss": 3.5143009468559656, + "tokens_seen": 1655963648 + }, + { + "epoch": 0.5, + "learning_rate": 0.000503129513721714, + "loss": 0.0686, + "theoretical_loss": 3.514253216613932, + "tokens_seen": 1656225792 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005030492697801317, + "loss": 0.0712, + "theoretical_loss": 3.514205496040865, + "tokens_seen": 1656487936 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005029690258385492, + "loss": 0.0692, + "theoretical_loss": 3.5141577851332735, + "tokens_seen": 1656750080 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005028887818969667, + "loss": 0.0683, + "theoretical_loss": 3.5141100838876724, + "tokens_seen": 1657012224 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005028085379553844, + "loss": 0.0674, + "theoretical_loss": 3.5140623923005774, + "tokens_seen": 1657274368 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005027282940138019, + "loss": 0.0676, + "theoretical_loss": 3.514014710368505, + "tokens_seen": 1657536512 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005026480500722196, + "loss": 0.0675, + "theoretical_loss": 3.513967038087973, + "tokens_seen": 1657798656 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0010964076500386, + "objective/train/docs_used": 605625, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3653086423873901, + "objective/train/original_loss": 1.3653085231781006, + "objective/train/theoretical_loss": 3.5139193754555036, + "objective/train/tokens_used": 1678520800, + "objective/train/value_avg": -0.007221221923828125, + "objective/train/value_loss": 0.00029157879180274904, + "objective/train/value_max": -7.545948028564453e-05, + "objective/train/value_min": -0.355712890625, + "objective/train/value_reward_corr": 0.6027662181567327, + "objective/train/value_std": 0.012420654296875, + "objective/train/weight_avg": 1.001227617263794, + "objective/train/weighted_lm_loss": 1.367215633392334, + "objective/train/weights_max": 1.1224560737609863, + "objective/train/weights_min": 0.3705088496208191, + "theoretical_loss": 3.5139193754555036, + "tokens_seen": 1658060800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005025678061306372, + "loss": 0.069, + "theoretical_loss": 3.5139193754555036, + "tokens_seen": 1658060800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005024875621890548, + "loss": 0.0674, + "theoretical_loss": 3.513871722467619, + "tokens_seen": 1658322944 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005024073182474723, + "loss": 0.0701, + "theoretical_loss": 3.513824079120843, + "tokens_seen": 1658585088 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005023270743058899, + "loss": 0.0698, + "theoretical_loss": 3.513776445411702, + "tokens_seen": 1658847232 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005022468303643075, + "loss": 0.067, + "theoretical_loss": 3.5137288213367235, + "tokens_seen": 1659109376 + }, + { + "epoch": 0.5, + "learning_rate": 0.000502166586422725, + "loss": 0.0696, + "theoretical_loss": 3.5136812068924375, + "tokens_seen": 1659371520 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005020863424811427, + "loss": 0.0689, + "theoretical_loss": 3.513633602075376, + "tokens_seen": 1659633664 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005020060985395602, + "loss": 0.0685, + "theoretical_loss": 3.513586006882071, + "tokens_seen": 1659895808 + }, + { + "epoch": 0.5, + "learning_rate": 0.000501925854597978, + "loss": 0.0693, + "theoretical_loss": 3.513538421309059, + "tokens_seen": 1660157952 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005018456106563955, + "loss": 0.0698, + "theoretical_loss": 3.5134908453528757, + "tokens_seen": 1660420096 + }, + { + "epoch": 0.5, + "learning_rate": 0.000501765366714813, + "loss": 0.0683, + "theoretical_loss": 3.51344327901006, + "tokens_seen": 1660682240 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005016851227732307, + "loss": 0.0697, + "theoretical_loss": 3.513395722277153, + "tokens_seen": 1660944384 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005016048788316482, + "loss": 0.0706, + "theoretical_loss": 3.513348175150696, + "tokens_seen": 1661206528 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": -1.5146979421842843e-05, + "objective/train/docs_used": 606795, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4264380931854248, + "objective/train/original_loss": 1.4264380931854248, + "objective/train/theoretical_loss": 3.513324405188806, + "objective/train/tokens_used": 1681797600, + "objective/train/value_avg": -0.007694244384765625, + "objective/train/value_loss": 0.0006180181517265737, + "objective/train/value_max": -4.100799560546875e-05, + "objective/train/value_min": -0.75, + "objective/train/value_reward_corr": 0.7114442243862558, + "objective/train/value_std": 0.0225982666015625, + "objective/train/weight_avg": 1.0002539157867432, + "objective/train/weighted_lm_loss": 1.42630934715271, + "objective/train/weights_max": 1.878307580947876, + "objective/train/weights_min": 0.36932775378227234, + "theoretical_loss": 3.513324405188806, + "tokens_seen": 1661337600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005015246348900658, + "loss": 0.0659, + "theoretical_loss": 3.5133006376272338, + "tokens_seen": 1661468672 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005014443909484834, + "loss": 0.0684, + "theoretical_loss": 3.5132531097033115, + "tokens_seen": 1661730816 + }, + { + "epoch": 0.5, + "learning_rate": 0.000501364147006901, + "loss": 0.0655, + "theoretical_loss": 3.5132055913754776, + "tokens_seen": 1661992960 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005012839030653185, + "loss": 0.0694, + "theoretical_loss": 3.5131580826402806, + "tokens_seen": 1662255104 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005012036591237363, + "loss": 0.0704, + "theoretical_loss": 3.5131105834942726, + "tokens_seen": 1662517248 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005011234151821538, + "loss": 0.0699, + "theoretical_loss": 3.5130630939340053, + "tokens_seen": 1662779392 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005010431712405713, + "loss": 0.069, + "theoretical_loss": 3.5130156139560347, + "tokens_seen": 1663041536 + }, + { + "epoch": 0.5, + "learning_rate": 0.000500962927298989, + "loss": 0.0705, + "theoretical_loss": 3.512968143556917, + "tokens_seen": 1663303680 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005008826833574065, + "loss": 0.0691, + "theoretical_loss": 3.51292068273321, + "tokens_seen": 1663565824 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005008024394158241, + "loss": 0.0681, + "theoretical_loss": 3.512873231481474, + "tokens_seen": 1663827968 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005007221954742417, + "loss": 0.0682, + "theoretical_loss": 3.512825789798271, + "tokens_seen": 1664090112 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005006419515326593, + "loss": 0.0683, + "theoretical_loss": 3.5127783576801646, + "tokens_seen": 1664352256 + }, + { + "epoch": 0.5, + "objective/train/advantage_avg": 0.0006472986424341798, + "objective/train/docs_used": 608106, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2632955312728882, + "objective/train/original_loss": 1.2632955312728882, + "objective/train/theoretical_loss": 3.51273093512372, + "objective/train/tokens_used": 1685074400, + "objective/train/value_avg": -0.01052093505859375, + "objective/train/value_loss": 0.0002703302598092705, + "objective/train/value_max": -4.649162292480469e-05, + "objective/train/value_min": -0.70849609375, + "objective/train/value_reward_corr": 0.7499424196920753, + "objective/train/value_std": 0.0195159912109375, + "objective/train/weight_avg": 1.000773549079895, + "objective/train/weighted_lm_loss": 1.263318657875061, + "objective/train/weights_max": 1.3154696226119995, + "objective/train/weights_min": 0.4099985957145691, + "theoretical_loss": 3.51273093512372, + "tokens_seen": 1664614400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005005617075910769, + "loss": 0.0707, + "theoretical_loss": 3.51273093512372, + "tokens_seen": 1664614400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005004814636494944, + "loss": 0.0682, + "theoretical_loss": 3.512683522125505, + "tokens_seen": 1664876544 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005004012197079121, + "loss": 0.0682, + "theoretical_loss": 3.512636118682088, + "tokens_seen": 1665138688 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005003209757663297, + "loss": 0.0688, + "theoretical_loss": 3.5125887247900396, + "tokens_seen": 1665400832 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005002407318247473, + "loss": 0.071, + "theoretical_loss": 3.512541340445933, + "tokens_seen": 1665662976 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005001604878831648, + "loss": 0.0695, + "theoretical_loss": 3.5124939656463416, + "tokens_seen": 1665925120 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005000802439415825, + "loss": 0.0678, + "theoretical_loss": 3.5124466003878423, + "tokens_seen": 1666187264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005, + "loss": 0.0694, + "theoretical_loss": 3.512399244667012, + "tokens_seen": 1666449408 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004999197560584176, + "loss": 0.0711, + "theoretical_loss": 3.5123518984804303, + "tokens_seen": 1666711552 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004998395121168352, + "loss": 0.0727, + "theoretical_loss": 3.5123045618246795, + "tokens_seen": 1666973696 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004997592681752528, + "loss": 0.0685, + "theoretical_loss": 3.5122572346963423, + "tokens_seen": 1667235840 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004996790242336704, + "loss": 0.0682, + "theoretical_loss": 3.512209917092003, + "tokens_seen": 1667497984 + }, + { + "epoch": 0.51, + "learning_rate": 0.000499598780292088, + "loss": 0.0698, + "theoretical_loss": 3.5121626090082487, + "tokens_seen": 1667760128 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.0011781208449974656, + "objective/train/docs_used": 609331, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5010648965835571, + "objective/train/original_loss": 1.5010650157928467, + "objective/train/theoretical_loss": 3.512138958535525, + "objective/train/tokens_used": 1688351200, + "objective/train/value_avg": -0.004985809326171875, + "objective/train/value_loss": 0.000247854070039466, + "objective/train/value_max": -4.166364669799805e-05, + "objective/train/value_min": -0.64453125, + "objective/train/value_reward_corr": 0.6039150359454911, + "objective/train/value_std": 0.00983428955078125, + "objective/train/weight_avg": 1.0012835264205933, + "objective/train/weighted_lm_loss": 1.5030659437179565, + "objective/train/weights_max": 1.1962590217590332, + "objective/train/weights_min": 0.3713054358959198, + "theoretical_loss": 3.512138958535525, + "tokens_seen": 1667891200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004995185363505056, + "loss": 0.0695, + "theoretical_loss": 3.5121153104416676, + "tokens_seen": 1668022272 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004994382924089231, + "loss": 0.0654, + "theoretical_loss": 3.5120680213888504, + "tokens_seen": 1668284416 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004993580484673407, + "loss": 0.0689, + "theoretical_loss": 3.512020741846388, + "tokens_seen": 1668546560 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004992778045257583, + "loss": 0.0675, + "theoretical_loss": 3.511973471810875, + "tokens_seen": 1668808704 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004991975605841759, + "loss": 0.069, + "theoretical_loss": 3.5119262112789063, + "tokens_seen": 1669070848 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004991173166425935, + "loss": 0.0679, + "theoretical_loss": 3.5118789602470786, + "tokens_seen": 1669332992 + }, + { + "epoch": 0.51, + "learning_rate": 0.000499037072701011, + "loss": 0.0727, + "theoretical_loss": 3.5118317187119916, + "tokens_seen": 1669595136 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004989568287594286, + "loss": 0.07, + "theoretical_loss": 3.511784486670246, + "tokens_seen": 1669857280 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004988765848178463, + "loss": 0.0673, + "theoretical_loss": 3.5117372641184432, + "tokens_seen": 1670119424 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004987963408762639, + "loss": 0.0707, + "theoretical_loss": 3.5116900510531885, + "tokens_seen": 1670381568 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004987160969346815, + "loss": 0.069, + "theoretical_loss": 3.5116428474710872, + "tokens_seen": 1670643712 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004986358529930991, + "loss": 0.0708, + "theoretical_loss": 3.5115956533687473, + "tokens_seen": 1670905856 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00025810531224124134, + "objective/train/docs_used": 610761, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.323618769645691, + "objective/train/original_loss": 1.3236186504364014, + "objective/train/theoretical_loss": 3.5115484687427783, + "objective/train/tokens_used": 1691628000, + "objective/train/value_avg": -0.005733489990234375, + "objective/train/value_loss": 0.00011536670353962108, + "objective/train/value_max": -9.387731552124023e-05, + "objective/train/value_min": -0.31689453125, + "objective/train/value_reward_corr": 0.7299094363574254, + "objective/train/value_std": 0.0103912353515625, + "objective/train/weight_avg": 1.0003145933151245, + "objective/train/weighted_lm_loss": 1.323650598526001, + "objective/train/weights_max": 1.1653170585632324, + "objective/train/weights_min": 0.723071813583374, + "theoretical_loss": 3.5115484687427783, + "tokens_seen": 1671168000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004985556090515166, + "loss": 0.0732, + "theoretical_loss": 3.5115484687427783, + "tokens_seen": 1671168000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004984753651099342, + "loss": 0.0652, + "theoretical_loss": 3.5115012935897907, + "tokens_seen": 1671430144 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004983951211683518, + "loss": 0.0676, + "theoretical_loss": 3.5114541279063975, + "tokens_seen": 1671692288 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004983148772267693, + "loss": 0.0691, + "theoretical_loss": 3.511406971689214, + "tokens_seen": 1671954432 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004982346332851869, + "loss": 0.0672, + "theoretical_loss": 3.511359824934856, + "tokens_seen": 1672216576 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004981543893436046, + "loss": 0.0665, + "theoretical_loss": 3.5113126876399416, + "tokens_seen": 1672478720 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004980741454020222, + "loss": 0.0676, + "theoretical_loss": 3.511265559801091, + "tokens_seen": 1672740864 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004979939014604398, + "loss": 0.069, + "theoretical_loss": 3.5112184414149255, + "tokens_seen": 1673003008 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004979136575188573, + "loss": 0.0681, + "theoretical_loss": 3.5111713324780687, + "tokens_seen": 1673265152 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004978334135772749, + "loss": 0.0701, + "theoretical_loss": 3.5111242329871457, + "tokens_seen": 1673527296 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004977531696356925, + "loss": 0.0688, + "theoretical_loss": 3.5110771429387824, + "tokens_seen": 1673789440 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004976729256941101, + "loss": 0.0688, + "theoretical_loss": 3.511030062329608, + "tokens_seen": 1674051584 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004975926817525277, + "loss": 0.069, + "theoretical_loss": 3.5109829911562533, + "tokens_seen": 1674313728 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00035731392563320696, + "objective/train/docs_used": 611357, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3454358577728271, + "objective/train/original_loss": 1.3454358577728271, + "objective/train/theoretical_loss": 3.5109594591069553, + "objective/train/tokens_used": 1694904800, + "objective/train/value_avg": -0.005359649658203125, + "objective/train/value_loss": 0.00013092935841996223, + "objective/train/value_max": -5.519390106201172e-05, + "objective/train/value_min": -0.72705078125, + "objective/train/value_reward_corr": 0.6681693229191178, + "objective/train/value_std": 0.01116180419921875, + "objective/train/weight_avg": 1.0004169940948486, + "objective/train/weighted_lm_loss": 1.3458484411239624, + "objective/train/weights_max": 1.2602665424346924, + "objective/train/weights_min": 0.37362140417099, + "theoretical_loss": 3.5109594591069553, + "tokens_seen": 1674444800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004975124378109452, + "loss": 0.0662, + "theoretical_loss": 3.5109359294153495, + "tokens_seen": 1674575872 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004974321938693629, + "loss": 0.0677, + "theoretical_loss": 3.5108888771035307, + "tokens_seen": 1674838016 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004973519499277805, + "loss": 0.0682, + "theoretical_loss": 3.5108418342174317, + "tokens_seen": 1675100160 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004972717059861981, + "loss": 0.0676, + "theoretical_loss": 3.5107948007536907, + "tokens_seen": 1675362304 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004971914620446156, + "loss": 0.0673, + "theoretical_loss": 3.5107477767089454, + "tokens_seen": 1675624448 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004971112181030332, + "loss": 0.0662, + "theoretical_loss": 3.5107007620798374, + "tokens_seen": 1675886592 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004970309741614508, + "loss": 0.0681, + "theoretical_loss": 3.510653756863009, + "tokens_seen": 1676148736 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004969507302198684, + "loss": 0.0675, + "theoretical_loss": 3.5106067610551035, + "tokens_seen": 1676410880 + }, + { + "epoch": 0.51, + "learning_rate": 0.000496870486278286, + "loss": 0.0709, + "theoretical_loss": 3.5105597746527675, + "tokens_seen": 1676673024 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004967902423367036, + "loss": 0.0657, + "theoretical_loss": 3.5105127976526482, + "tokens_seen": 1676935168 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004967099983951212, + "loss": 0.0679, + "theoretical_loss": 3.5104658300513942, + "tokens_seen": 1677197312 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004966297544535388, + "loss": 0.0655, + "theoretical_loss": 3.5104188718456575, + "tokens_seen": 1677459456 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00032650245702825487, + "objective/train/docs_used": 612648, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2856462001800537, + "objective/train/original_loss": 1.2856462001800537, + "objective/train/theoretical_loss": 3.5103719230320904, + "objective/train/tokens_used": 1698181600, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.0005081266863271594, + "objective/train/value_max": -4.988908767700195e-05, + "objective/train/value_min": -0.74755859375, + "objective/train/value_reward_corr": 0.6733552587749285, + "objective/train/value_std": 0.016265869140625, + "objective/train/weight_avg": 1.0005440711975098, + "objective/train/weighted_lm_loss": 1.2856409549713135, + "objective/train/weights_max": 1.4916789531707764, + "objective/train/weights_min": 0.3753184378147125, + "theoretical_loss": 3.5103719230320904, + "tokens_seen": 1677721600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004965495105119564, + "loss": 0.0681, + "theoretical_loss": 3.5103719230320904, + "tokens_seen": 1677721600 + }, + { + "epoch": 0.51, + "learning_rate": 0.000496469266570374, + "loss": 0.0691, + "theoretical_loss": 3.510324983607347, + "tokens_seen": 1677983744 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004963890226287915, + "loss": 0.0683, + "theoretical_loss": 3.5102780535680838, + "tokens_seen": 1678245888 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004963087786872091, + "loss": 0.07, + "theoretical_loss": 3.510231132910958, + "tokens_seen": 1678508032 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004962285347456267, + "loss": 0.068, + "theoretical_loss": 3.5101842216326293, + "tokens_seen": 1678770176 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004961482908040443, + "loss": 0.0674, + "theoretical_loss": 3.5101373197297594, + "tokens_seen": 1679032320 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004960680468624618, + "loss": 0.0671, + "theoretical_loss": 3.510090427199011, + "tokens_seen": 1679294464 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004959878029208794, + "loss": 0.0689, + "theoretical_loss": 3.5100435440370483, + "tokens_seen": 1679556608 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004959075589792971, + "loss": 0.0684, + "theoretical_loss": 3.5099966702405383, + "tokens_seen": 1679818752 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004958273150377147, + "loss": 0.0713, + "theoretical_loss": 3.5099498058061487, + "tokens_seen": 1680080896 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004957470710961323, + "loss": 0.068, + "theoretical_loss": 3.5099029507305497, + "tokens_seen": 1680343040 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004956668271545499, + "loss": 0.0704, + "theoretical_loss": 3.509856105010412, + "tokens_seen": 1680605184 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004955865832129674, + "loss": 0.0729, + "theoretical_loss": 3.509809268642409, + "tokens_seen": 1680867328 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.0005266556981950998, + "objective/train/docs_used": 613922, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3320040702819824, + "objective/train/original_loss": 1.3320040702819824, + "objective/train/theoretical_loss": 3.509785853964419, + "objective/train/tokens_used": 1701458400, + "objective/train/value_avg": -0.01157379150390625, + "objective/train/value_loss": 0.00023857805354055017, + "objective/train/value_max": -3.4809112548828125e-05, + "objective/train/value_min": -0.982421875, + "objective/train/value_reward_corr": 0.9455989378013424, + "objective/train/value_std": 0.04180908203125, + "objective/train/weight_avg": 0.9995850920677185, + "objective/train/weighted_lm_loss": 1.331617832183838, + "objective/train/weights_max": 1.1823036670684814, + "objective/train/weights_min": 0.3787992298603058, + "theoretical_loss": 3.509785853964419, + "tokens_seen": 1680998400 + }, + { + "epoch": 0.51, + "learning_rate": 0.000495506339271385, + "loss": 0.071, + "theoretical_loss": 3.5097624416232156, + "tokens_seen": 1681129472 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004954260953298026, + "loss": 0.0702, + "theoretical_loss": 3.509715623949509, + "tokens_seen": 1681391616 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004953458513882202, + "loss": 0.0711, + "theoretical_loss": 3.509668815617967, + "tokens_seen": 1681653760 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004952656074466377, + "loss": 0.0692, + "theoretical_loss": 3.5096220166252694, + "tokens_seen": 1681915904 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004951853635050554, + "loss": 0.0683, + "theoretical_loss": 3.509575226968098, + "tokens_seen": 1682178048 + }, + { + "epoch": 0.51, + "learning_rate": 0.000495105119563473, + "loss": 0.0691, + "theoretical_loss": 3.5095284466431362, + "tokens_seen": 1682440192 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004950248756218906, + "loss": 0.0681, + "theoretical_loss": 3.509481675647069, + "tokens_seen": 1682702336 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004949446316803081, + "loss": 0.0684, + "theoretical_loss": 3.509434913976583, + "tokens_seen": 1682964480 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004948643877387257, + "loss": 0.0694, + "theoretical_loss": 3.509388161628367, + "tokens_seen": 1683226624 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004947841437971433, + "loss": 0.0699, + "theoretical_loss": 3.509341418599111, + "tokens_seen": 1683488768 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004947038998555609, + "loss": 0.0695, + "theoretical_loss": 3.509294684885506, + "tokens_seen": 1683750912 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004946236559139785, + "loss": 0.0712, + "theoretical_loss": 3.509247960484247, + "tokens_seen": 1684013056 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00041788024827837944, + "objective/train/docs_used": 615159, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2733129262924194, + "objective/train/original_loss": 1.273313045501709, + "objective/train/theoretical_loss": 3.5092012453920285, + "objective/train/tokens_used": 1704735200, + "objective/train/value_avg": -0.00682830810546875, + "objective/train/value_loss": 0.00011341172648826614, + "objective/train/value_max": -5.143880844116211e-05, + "objective/train/value_min": -0.26318359375, + "objective/train/value_reward_corr": 0.6867672303390299, + "objective/train/value_std": 0.0099029541015625, + "objective/train/weight_avg": 1.0004736185073853, + "objective/train/weighted_lm_loss": 1.2733094692230225, + "objective/train/weights_max": 1.2000617980957031, + "objective/train/weights_min": 0.7181033492088318, + "theoretical_loss": 3.5092012453920285, + "tokens_seen": 1684275200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004945434119723962, + "loss": 0.0685, + "theoretical_loss": 3.5092012453920285, + "tokens_seen": 1684275200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004944631680308137, + "loss": 0.0685, + "theoretical_loss": 3.5091545396055475, + "tokens_seen": 1684537344 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004943829240892313, + "loss": 0.0676, + "theoretical_loss": 3.5091078431215017, + "tokens_seen": 1684799488 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004943026801476489, + "loss": 0.0674, + "theoretical_loss": 3.5090611559365925, + "tokens_seen": 1685061632 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004942224362060665, + "loss": 0.0686, + "theoretical_loss": 3.509014478047522, + "tokens_seen": 1685323776 + }, + { + "epoch": 0.51, + "learning_rate": 0.000494142192264484, + "loss": 0.071, + "theoretical_loss": 3.5089678094509926, + "tokens_seen": 1685585920 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004940619483229016, + "loss": 0.0691, + "theoretical_loss": 3.5089211501437103, + "tokens_seen": 1685848064 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004939817043813192, + "loss": 0.0684, + "theoretical_loss": 3.5088745001223822, + "tokens_seen": 1686110208 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004939014604397368, + "loss": 0.0699, + "theoretical_loss": 3.508827859383717, + "tokens_seen": 1686372352 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004938212164981544, + "loss": 0.0675, + "theoretical_loss": 3.5087812279244246, + "tokens_seen": 1686634496 + }, + { + "epoch": 0.51, + "learning_rate": 0.000493740972556572, + "loss": 0.0691, + "theoretical_loss": 3.5087346057412176, + "tokens_seen": 1686896640 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004936607286149896, + "loss": 0.0734, + "theoretical_loss": 3.508687992830809, + "tokens_seen": 1687158784 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004935804846734072, + "loss": 0.0732, + "theoretical_loss": 3.5086413891899144, + "tokens_seen": 1687420928 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.00013062905054539442, + "objective/train/docs_used": 616363, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4347667694091797, + "objective/train/original_loss": 1.4347667694091797, + "objective/train/theoretical_loss": 3.508618090844509, + "objective/train/tokens_used": 1708012000, + "objective/train/value_avg": -0.007480621337890625, + "objective/train/value_loss": 0.00023279149900190532, + "objective/train/value_max": -5.435943603515625e-05, + "objective/train/value_min": -0.2467041015625, + "objective/train/value_reward_corr": 0.7050742886378683, + "objective/train/value_std": 0.0143280029296875, + "objective/train/weight_avg": 1.0002341270446777, + "objective/train/weighted_lm_loss": 1.4352725744247437, + "objective/train/weights_max": 1.1323878765106201, + "objective/train/weights_min": 0.37623587250709534, + "theoretical_loss": 3.508618090844509, + "tokens_seen": 1687552000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004935002407318248, + "loss": 0.0714, + "theoretical_loss": 3.5085947948152514, + "tokens_seen": 1687683072 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004934199967902424, + "loss": 0.0689, + "theoretical_loss": 3.5085482097035383, + "tokens_seen": 1687945216 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004933397528486599, + "loss": 0.0694, + "theoretical_loss": 3.508501633851495, + "tokens_seen": 1688207360 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004932595089070775, + "loss": 0.0672, + "theoretical_loss": 3.5084550672558446, + "tokens_seen": 1688469504 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004931792649654951, + "loss": 0.0694, + "theoretical_loss": 3.50840850991331, + "tokens_seen": 1688731648 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004930990210239126, + "loss": 0.0656, + "theoretical_loss": 3.5083619618206168, + "tokens_seen": 1688993792 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004930187770823302, + "loss": 0.0663, + "theoretical_loss": 3.5083154229744924, + "tokens_seen": 1689255936 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004929385331407479, + "loss": 0.0734, + "theoretical_loss": 3.5082688933716653, + "tokens_seen": 1689518080 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004928582891991655, + "loss": 0.0702, + "theoretical_loss": 3.5082223730088655, + "tokens_seen": 1689780224 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004927780452575831, + "loss": 0.0694, + "theoretical_loss": 3.5081758618828256, + "tokens_seen": 1690042368 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004926978013160007, + "loss": 0.0683, + "theoretical_loss": 3.5081293599902788, + "tokens_seen": 1690304512 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004926175573744183, + "loss": 0.0695, + "theoretical_loss": 3.5080828673279614, + "tokens_seen": 1690566656 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.0007474888698197901, + "objective/train/docs_used": 617433, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3971322774887085, + "objective/train/original_loss": 1.397132396697998, + "objective/train/theoretical_loss": 3.5080363838926094, + "objective/train/tokens_used": 1711288800, + "objective/train/value_avg": -0.007564544677734375, + "objective/train/value_loss": 0.0004391712718643248, + "objective/train/value_max": -5.692243576049805e-05, + "objective/train/value_min": -0.73828125, + "objective/train/value_reward_corr": 0.6730230280800624, + "objective/train/value_std": 0.021270751953125, + "objective/train/weight_avg": 1.0009394884109497, + "objective/train/weighted_lm_loss": 1.3970917463302612, + "objective/train/weights_max": 1.8592804670333862, + "objective/train/weights_min": 0.3687421381473541, + "theoretical_loss": 3.5080363838926094, + "tokens_seen": 1690828800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004925373134328358, + "loss": 0.0688, + "theoretical_loss": 3.5080363838926094, + "tokens_seen": 1690828800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004924570694912534, + "loss": 0.069, + "theoretical_loss": 3.5079899096809624, + "tokens_seen": 1691090944 + }, + { + "epoch": 0.51, + "learning_rate": 0.000492376825549671, + "loss": 0.0693, + "theoretical_loss": 3.5079434446897597, + "tokens_seen": 1691353088 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004922965816080887, + "loss": 0.0696, + "theoretical_loss": 3.5078969889157445, + "tokens_seen": 1691615232 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004922163376665062, + "loss": 0.0685, + "theoretical_loss": 3.5078505423556594, + "tokens_seen": 1691877376 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004921360937249238, + "loss": 0.0707, + "theoretical_loss": 3.50780410500625, + "tokens_seen": 1692139520 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004920558497833414, + "loss": 0.0707, + "theoretical_loss": 3.507757676864264, + "tokens_seen": 1692401664 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004919756058417589, + "loss": 0.067, + "theoretical_loss": 3.5077112579264496, + "tokens_seen": 1692663808 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004918953619001765, + "loss": 0.0693, + "theoretical_loss": 3.507664848189557, + "tokens_seen": 1692925952 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004918151179585941, + "loss": 0.0716, + "theoretical_loss": 3.507618447650337, + "tokens_seen": 1693188096 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004917348740170117, + "loss": 0.0696, + "theoretical_loss": 3.5075720563055457, + "tokens_seen": 1693450240 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004916546300754293, + "loss": 0.0668, + "theoretical_loss": 3.5075256741519363, + "tokens_seen": 1693712384 + }, + { + "epoch": 0.51, + "learning_rate": 0.000491574386133847, + "loss": 0.0667, + "theoretical_loss": 3.507479301186266, + "tokens_seen": 1693974528 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": -0.00035341508919373155, + "objective/train/docs_used": 618718, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4741839170455933, + "objective/train/original_loss": 1.4741837978363037, + "objective/train/theoretical_loss": 3.507456118147896, + "objective/train/tokens_used": 1714565600, + "objective/train/value_avg": -0.00838470458984375, + "objective/train/value_loss": 0.00018926823395304382, + "objective/train/value_max": -5.143880844116211e-05, + "objective/train/value_min": -0.35107421875, + "objective/train/value_reward_corr": 0.6988737523826818, + "objective/train/value_std": 0.01287078857421875, + "objective/train/weight_avg": 0.9997391104698181, + "objective/train/weighted_lm_loss": 1.4718536138534546, + "objective/train/weights_max": 1.1573328971862793, + "objective/train/weights_min": 0.6129269003868103, + "theoretical_loss": 3.507456118147896, + "tokens_seen": 1694105600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004914941421922646, + "loss": 0.0708, + "theoretical_loss": 3.5074329374052944, + "tokens_seen": 1694236672 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004914138982506821, + "loss": 0.0663, + "theoretical_loss": 3.507386582805781, + "tokens_seen": 1694498816 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004913336543090997, + "loss": 0.0682, + "theoretical_loss": 3.5073402373844864, + "tokens_seen": 1694760960 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004912534103675173, + "loss": 0.0694, + "theoretical_loss": 3.5072939011381763, + "tokens_seen": 1695023104 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004911731664259348, + "loss": 0.0672, + "theoretical_loss": 3.507247574063614, + "tokens_seen": 1695285248 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004910929224843524, + "loss": 0.0664, + "theoretical_loss": 3.5072012561575674, + "tokens_seen": 1695547392 + }, + { + "epoch": 0.51, + "learning_rate": 0.00049101267854277, + "loss": 0.0674, + "theoretical_loss": 3.5071549474168036, + "tokens_seen": 1695809536 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004909324346011876, + "loss": 0.0698, + "theoretical_loss": 3.507108647838094, + "tokens_seen": 1696071680 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004908521906596052, + "loss": 0.0671, + "theoretical_loss": 3.507062357418209, + "tokens_seen": 1696333824 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004907719467180228, + "loss": 0.0681, + "theoretical_loss": 3.5070160761539233, + "tokens_seen": 1696595968 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004906917027764404, + "loss": 0.0682, + "theoretical_loss": 3.506969804042011, + "tokens_seen": 1696858112 + }, + { + "epoch": 0.51, + "learning_rate": 0.000490611458834858, + "loss": 0.0707, + "theoretical_loss": 3.5069235410792485, + "tokens_seen": 1697120256 + }, + { + "epoch": 0.51, + "objective/train/advantage_avg": 0.0010268003679811954, + "objective/train/docs_used": 619912, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3225995302200317, + "objective/train/original_loss": 1.3225996494293213, + "objective/train/theoretical_loss": 3.506877287262414, + "objective/train/tokens_used": 1717842400, + "objective/train/value_avg": -0.00870513916015625, + "objective/train/value_loss": 0.0001602449337951839, + "objective/train/value_max": -4.756450653076172e-05, + "objective/train/value_min": -0.217529296875, + "objective/train/value_reward_corr": 0.7187718829305348, + "objective/train/value_std": 0.01302337646484375, + "objective/train/weight_avg": 1.0011017322540283, + "objective/train/weighted_lm_loss": 1.3236242532730103, + "objective/train/weights_max": 1.0969452857971191, + "objective/train/weights_min": 0.36948272585868835, + "theoretical_loss": 3.506877287262414, + "tokens_seen": 1697382400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004905312148932756, + "loss": 0.0691, + "theoretical_loss": 3.506877287262414, + "tokens_seen": 1697382400 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004904509709516932, + "loss": 0.0671, + "theoretical_loss": 3.5068310425882876, + "tokens_seen": 1697644544 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004903707270101108, + "loss": 0.0697, + "theoretical_loss": 3.506784807053651, + "tokens_seen": 1697906688 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004902904830685283, + "loss": 0.0703, + "theoretical_loss": 3.506738580655287, + "tokens_seen": 1698168832 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004902102391269459, + "loss": 0.0672, + "theoretical_loss": 3.5066923633899796, + "tokens_seen": 1698430976 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004901299951853635, + "loss": 0.0683, + "theoretical_loss": 3.506646155254516, + "tokens_seen": 1698693120 + }, + { + "epoch": 0.51, + "learning_rate": 0.000490049751243781, + "loss": 0.0687, + "theoretical_loss": 3.506599956245684, + "tokens_seen": 1698955264 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004899695073021987, + "loss": 0.0691, + "theoretical_loss": 3.5065537663602737, + "tokens_seen": 1699217408 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004898892633606163, + "loss": 0.0679, + "theoretical_loss": 3.506507585595075, + "tokens_seen": 1699479552 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004898090194190339, + "loss": 0.0682, + "theoretical_loss": 3.506461413946882, + "tokens_seen": 1699741696 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004897287754774515, + "loss": 0.0653, + "theoretical_loss": 3.5064152514124887, + "tokens_seen": 1700003840 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004896485315358691, + "loss": 0.0702, + "theoretical_loss": 3.506369097988691, + "tokens_seen": 1700265984 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004895682875942866, + "loss": 0.068, + "theoretical_loss": 3.5063229536722864, + "tokens_seen": 1700528128 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.001018267241306603, + "objective/train/docs_used": 621118, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4754199981689453, + "objective/train/original_loss": 1.4754199981689453, + "objective/train/theoretical_loss": 3.5062998849283566, + "objective/train/tokens_used": 1721119200, + "objective/train/value_avg": -0.006374359130859375, + "objective/train/value_loss": 0.0001637029490666464, + "objective/train/value_max": -5.519390106201172e-05, + "objective/train/value_min": -0.374755859375, + "objective/train/value_reward_corr": 0.7205456742670221, + "objective/train/value_std": 0.0139312744140625, + "objective/train/weight_avg": 1.0010955333709717, + "objective/train/weighted_lm_loss": 1.4766151905059814, + "objective/train/weights_max": 1.219853162765503, + "objective/train/weights_min": 0.388961523771286, + "theoretical_loss": 3.5062998849283566, + "tokens_seen": 1700659200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004894880436527042, + "loss": 0.0688, + "theoretical_loss": 3.5062768184600754, + "tokens_seen": 1700790272 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004894077997111218, + "loss": 0.0672, + "theoretical_loss": 3.5062306923488573, + "tokens_seen": 1701052416 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004893275557695395, + "loss": 0.0697, + "theoretical_loss": 3.5061845753354355, + "tokens_seen": 1701314560 + }, + { + "epoch": 0.52, + "learning_rate": 0.000489247311827957, + "loss": 0.0674, + "theoretical_loss": 3.5061384674166147, + "tokens_seen": 1701576704 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004891670678863746, + "loss": 0.0665, + "theoretical_loss": 3.5060923685892, + "tokens_seen": 1701838848 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004890868239447922, + "loss": 0.0714, + "theoretical_loss": 3.5060462788499986, + "tokens_seen": 1702100992 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004890065800032098, + "loss": 0.0685, + "theoretical_loss": 3.50600019819582, + "tokens_seen": 1702363136 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004889263360616273, + "loss": 0.0679, + "theoretical_loss": 3.5059541266234744, + "tokens_seen": 1702625280 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004888460921200449, + "loss": 0.0689, + "theoretical_loss": 3.505908064129775, + "tokens_seen": 1702887424 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004887658481784625, + "loss": 0.0654, + "theoretical_loss": 3.5058620107115344, + "tokens_seen": 1703149568 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004886856042368801, + "loss": 0.068, + "theoretical_loss": 3.505815966365568, + "tokens_seen": 1703411712 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004886053602952977, + "loss": 0.07, + "theoretical_loss": 3.5057699310886945, + "tokens_seen": 1703673856 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0014890459133312106, + "objective/train/docs_used": 622235, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3723922967910767, + "objective/train/original_loss": 1.372392177581787, + "objective/train/theoretical_loss": 3.505723904877731, + "objective/train/tokens_used": 1724396000, + "objective/train/value_avg": -0.00720977783203125, + "objective/train/value_loss": 0.00032538699451833963, + "objective/train/value_max": -9.763240814208984e-05, + "objective/train/value_min": -0.9609375, + "objective/train/value_reward_corr": 0.6740766317905353, + "objective/train/value_std": 0.0184173583984375, + "objective/train/weight_avg": 1.0016391277313232, + "objective/train/weighted_lm_loss": 1.374948501586914, + "objective/train/weights_max": 2.2131869792938232, + "objective/train/weights_min": 0.3746318221092224, + "theoretical_loss": 3.505723904877731, + "tokens_seen": 1703936000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004885251163537154, + "loss": 0.0688, + "theoretical_loss": 3.505723904877731, + "tokens_seen": 1703936000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004884448724121329, + "loss": 0.0669, + "theoretical_loss": 3.5056778877294983, + "tokens_seen": 1704198144 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004883646284705505, + "loss": 0.068, + "theoretical_loss": 3.5056318796408186, + "tokens_seen": 1704460288 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004882843845289681, + "loss": 0.0684, + "theoretical_loss": 3.505585880608515, + "tokens_seen": 1704722432 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048820414058738565, + "loss": 0.0682, + "theoretical_loss": 3.505539890629412, + "tokens_seen": 1704984576 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048812389664580326, + "loss": 0.0683, + "theoretical_loss": 3.5054939097003377, + "tokens_seen": 1705246720 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004880436527042209, + "loss": 0.0694, + "theoretical_loss": 3.505447937818119, + "tokens_seen": 1705508864 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048796340876263843, + "loss": 0.0685, + "theoretical_loss": 3.5054019749795864, + "tokens_seen": 1705771008 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048788316482105605, + "loss": 0.0711, + "theoretical_loss": 3.5053560211815715, + "tokens_seen": 1706033152 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004878029208794736, + "loss": 0.0683, + "theoretical_loss": 3.505310076420907, + "tokens_seen": 1706295296 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048772267693789116, + "loss": 0.0674, + "theoretical_loss": 3.505264140694428, + "tokens_seen": 1706557440 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004876424329963088, + "loss": 0.0696, + "theoretical_loss": 3.5052182139989707, + "tokens_seen": 1706819584 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004875621890547264, + "loss": 0.0697, + "theoretical_loss": 3.5051722963313723, + "tokens_seen": 1707081728 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0002717103052418679, + "objective/train/docs_used": 623332, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5116854906082153, + "objective/train/original_loss": 1.5116853713989258, + "objective/train/theoretical_loss": 3.5051493408820327, + "objective/train/tokens_used": 1727672800, + "objective/train/value_avg": -0.0070037841796875, + "objective/train/value_loss": 0.0002370732690906152, + "objective/train/value_max": -4.00543212890625e-05, + "objective/train/value_min": -0.56884765625, + "objective/train/value_reward_corr": 0.7653480297142256, + "objective/train/value_std": 0.017669677734375, + "objective/train/weight_avg": 1.0003830194473267, + "objective/train/weighted_lm_loss": 1.5120874643325806, + "objective/train/weights_max": 1.4940978288650513, + "objective/train/weights_min": 0.37812936305999756, + "theoretical_loss": 3.5051493408820327, + "tokens_seen": 1707212800 + }, + { + "epoch": 0.52, + "learning_rate": 0.000487481945113144, + "loss": 0.0671, + "theoretical_loss": 3.505126387688473, + "tokens_seen": 1707343872 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048740170117156156, + "loss": 0.0655, + "theoretical_loss": 3.5050804880671134, + "tokens_seen": 1707606016 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048732145722997917, + "loss": 0.0678, + "theoretical_loss": 3.505034597464137, + "tokens_seen": 1707868160 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048724121328839673, + "loss": 0.0691, + "theoretical_loss": 3.5049887158763866, + "tokens_seen": 1708130304 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004871609693468143, + "loss": 0.0659, + "theoretical_loss": 3.504942843300709, + "tokens_seen": 1708392448 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004870807254052319, + "loss": 0.0682, + "theoretical_loss": 3.5048969797339513, + "tokens_seen": 1708654592 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004870004814636495, + "loss": 0.0702, + "theoretical_loss": 3.5048511251729626, + "tokens_seen": 1708916736 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004869202375220671, + "loss": 0.068, + "theoretical_loss": 3.504805279614594, + "tokens_seen": 1709178880 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004868399935804847, + "loss": 0.0704, + "theoretical_loss": 3.504759443055696, + "tokens_seen": 1709441024 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004867597496389023, + "loss": 0.0678, + "theoretical_loss": 3.5047136154931238, + "tokens_seen": 1709703168 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048667950569731986, + "loss": 0.0695, + "theoretical_loss": 3.5046677969237328, + "tokens_seen": 1709965312 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004865992617557374, + "loss": 0.0678, + "theoretical_loss": 3.504621987344379, + "tokens_seen": 1710227456 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0009433527011424303, + "objective/train/docs_used": 624643, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4271879196166992, + "objective/train/original_loss": 1.4271876811981201, + "objective/train/theoretical_loss": 3.504576186751921, + "objective/train/tokens_used": 1730949600, + "objective/train/value_avg": -0.007293701171875, + "objective/train/value_loss": 0.00016305405006278306, + "objective/train/value_max": -3.7610530853271484e-05, + "objective/train/value_min": -0.338134765625, + "objective/train/value_reward_corr": 0.7030048069232187, + "objective/train/value_std": 0.01326751708984375, + "objective/train/weight_avg": 1.0010199546813965, + "objective/train/weighted_lm_loss": 1.428601861000061, + "objective/train/weights_max": 1.1191952228546143, + "objective/train/weights_min": 0.3719745874404907, + "theoretical_loss": 3.504576186751921, + "tokens_seen": 1710489600 + }, + { + "epoch": 0.52, + "learning_rate": 0.000486519017814155, + "loss": 0.0694, + "theoretical_loss": 3.504576186751921, + "tokens_seen": 1710489600 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048643877387257264, + "loss": 0.0699, + "theoretical_loss": 3.5045303951432194, + "tokens_seen": 1710751744 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004863585299309902, + "loss": 0.0681, + "theoretical_loss": 3.504484612515136, + "tokens_seen": 1711013888 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004862782859894078, + "loss": 0.0701, + "theoretical_loss": 3.5044388388645333, + "tokens_seen": 1711276032 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004861980420478254, + "loss": 0.0651, + "theoretical_loss": 3.5043930741882763, + "tokens_seen": 1711538176 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048611779810624304, + "loss": 0.0695, + "theoretical_loss": 3.504347318483232, + "tokens_seen": 1711800320 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048603755416466054, + "loss": 0.0709, + "theoretical_loss": 3.504301571746267, + "tokens_seen": 1712062464 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048595731022307815, + "loss": 0.0693, + "theoretical_loss": 3.504255833974252, + "tokens_seen": 1712324608 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048587706628149576, + "loss": 0.0683, + "theoretical_loss": 3.5042101051640575, + "tokens_seen": 1712586752 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004857968223399133, + "loss": 0.0707, + "theoretical_loss": 3.5041643853125564, + "tokens_seen": 1712848896 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048571657839833094, + "loss": 0.0697, + "theoretical_loss": 3.504118674416623, + "tokens_seen": 1713111040 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048563633445674855, + "loss": 0.0706, + "theoretical_loss": 3.504072972473133, + "tokens_seen": 1713373184 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048555609051516616, + "loss": 0.0671, + "theoretical_loss": 3.504027279478964, + "tokens_seen": 1713635328 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.0007473386940546334, + "objective/train/docs_used": 625897, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2894264459609985, + "objective/train/original_loss": 1.289426326751709, + "objective/train/theoretical_loss": 3.504004436336899, + "objective/train/tokens_used": 1734226400, + "objective/train/value_avg": -0.00562286376953125, + "objective/train/value_loss": 0.0001757982827257365, + "objective/train/value_max": -2.4497509002685547e-05, + "objective/train/value_min": -0.226318359375, + "objective/train/value_reward_corr": 0.6555531158316058, + "objective/train/value_std": 0.01038360595703125, + "objective/train/weight_avg": 0.9993323683738708, + "objective/train/weighted_lm_loss": 1.288981556892395, + "objective/train/weights_max": 1.1413358449935913, + "objective/train/weights_min": 0.3692460358142853, + "theoretical_loss": 3.504004436336899, + "tokens_seen": 1713766400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048547584657358366, + "loss": 0.069, + "theoretical_loss": 3.5039815954309943, + "tokens_seen": 1713897472 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004853956026320013, + "loss": 0.0679, + "theoretical_loss": 3.5039359203261045, + "tokens_seen": 1714159616 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004853153586904189, + "loss": 0.0678, + "theoretical_loss": 3.5038902541611776, + "tokens_seen": 1714421760 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048523511474883645, + "loss": 0.0686, + "theoretical_loss": 3.503844596933096, + "tokens_seen": 1714683904 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048515487080725406, + "loss": 0.0709, + "theoretical_loss": 3.503798948638746, + "tokens_seen": 1714946048 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004850746268656717, + "loss": 0.0714, + "theoretical_loss": 3.503753309275013, + "tokens_seen": 1715208192 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004849943829240893, + "loss": 0.0719, + "theoretical_loss": 3.5037076788387864, + "tokens_seen": 1715470336 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048491413898250684, + "loss": 0.0689, + "theoretical_loss": 3.503662057326956, + "tokens_seen": 1715732480 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004848338950409244, + "loss": 0.0689, + "theoretical_loss": 3.503616444736412, + "tokens_seen": 1715994624 + }, + { + "epoch": 0.52, + "learning_rate": 0.000484753651099342, + "loss": 0.0673, + "theoretical_loss": 3.5035708410640494, + "tokens_seen": 1716256768 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004846734071577596, + "loss": 0.0686, + "theoretical_loss": 3.5035252463067614, + "tokens_seen": 1716518912 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004845931632161772, + "loss": 0.0685, + "theoretical_loss": 3.503479660461444, + "tokens_seen": 1716781056 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0008675092831254005, + "objective/train/docs_used": 627098, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3586034774780273, + "objective/train/original_loss": 1.3586034774780273, + "objective/train/theoretical_loss": 3.5034340835249953, + "objective/train/tokens_used": 1737503200, + "objective/train/value_avg": -0.00962066650390625, + "objective/train/value_loss": 0.00028026950894854963, + "objective/train/value_max": -8.821487426757812e-05, + "objective/train/value_min": -0.76806640625, + "objective/train/value_reward_corr": 0.7356101623856774, + "objective/train/value_std": 0.0185699462890625, + "objective/train/weight_avg": 1.0009956359863281, + "objective/train/weighted_lm_loss": 1.3600226640701294, + "objective/train/weights_max": 1.2380048036575317, + "objective/train/weights_min": 0.38121721148490906, + "theoretical_loss": 3.5034340835249953, + "tokens_seen": 1717043200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004845129192745948, + "loss": 0.0663, + "theoretical_loss": 3.5034340835249953, + "tokens_seen": 1717043200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048443267533301236, + "loss": 0.0684, + "theoretical_loss": 3.503388515494315, + "tokens_seen": 1717305344 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048435243139142997, + "loss": 0.0682, + "theoretical_loss": 3.5033429563663026, + "tokens_seen": 1717567488 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048427218744984753, + "loss": 0.0709, + "theoretical_loss": 3.503297406137861, + "tokens_seen": 1717829632 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048419194350826514, + "loss": 0.0691, + "theoretical_loss": 3.5032518648058946, + "tokens_seen": 1718091776 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004841116995666827, + "loss": 0.0664, + "theoretical_loss": 3.503206332367308, + "tokens_seen": 1718353920 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004840314556251003, + "loss": 0.0682, + "theoretical_loss": 3.5031608088190085, + "tokens_seen": 1718616064 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004839512116835179, + "loss": 0.0674, + "theoretical_loss": 3.5031152941579045, + "tokens_seen": 1718878208 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004838709677419355, + "loss": 0.0674, + "theoretical_loss": 3.5030697883809063, + "tokens_seen": 1719140352 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004837907238003531, + "loss": 0.0697, + "theoretical_loss": 3.5030242914849254, + "tokens_seen": 1719402496 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048371047985877065, + "loss": 0.0709, + "theoretical_loss": 3.502978803466875, + "tokens_seen": 1719664640 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048363023591718827, + "loss": 0.0705, + "theoretical_loss": 3.502933324323669, + "tokens_seen": 1719926784 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004835499919756058, + "loss": 0.0707, + "theoretical_loss": 3.502887854052225, + "tokens_seen": 1720188928 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0003047508653253317, + "objective/train/docs_used": 628251, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5686558485031128, + "objective/train/original_loss": 1.5686558485031128, + "objective/train/theoretical_loss": 3.50286512224245, + "objective/train/tokens_used": 1740780000, + "objective/train/value_avg": -0.00920867919921875, + "objective/train/value_loss": 0.0004943870007991791, + "objective/train/value_max": -4.57763671875e-05, + "objective/train/value_min": -0.671875, + "objective/train/value_reward_corr": 0.6343885890994714, + "objective/train/value_std": 0.01812744140625, + "objective/train/weight_avg": 1.0005214214324951, + "objective/train/weighted_lm_loss": 1.569042682647705, + "objective/train/weights_max": 1.6559826135635376, + "objective/train/weights_min": 0.3744032382965088, + "theoretical_loss": 3.50286512224245, + "tokens_seen": 1720320000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048346974803402344, + "loss": 0.0689, + "theoretical_loss": 3.5028423926494594, + "tokens_seen": 1720451072 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048338950409244105, + "loss": 0.0686, + "theoretical_loss": 3.5027969401122925, + "tokens_seen": 1720713216 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004833092601508586, + "loss": 0.0688, + "theoretical_loss": 3.5027514964376447, + "tokens_seen": 1720975360 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004832290162092762, + "loss": 0.0703, + "theoretical_loss": 3.502706061622438, + "tokens_seen": 1721237504 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048314877226769383, + "loss": 0.0687, + "theoretical_loss": 3.502660635663597, + "tokens_seen": 1721499648 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004830685283261114, + "loss": 0.0699, + "theoretical_loss": 3.5026152185580472, + "tokens_seen": 1721761792 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048298828438452895, + "loss": 0.0686, + "theoretical_loss": 3.502569810302715, + "tokens_seen": 1722023936 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048290804044294656, + "loss": 0.0682, + "theoretical_loss": 3.5025244108945293, + "tokens_seen": 1722286080 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004828277965013642, + "loss": 0.0683, + "theoretical_loss": 3.50247902033042, + "tokens_seen": 1722548224 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048274755255978173, + "loss": 0.0704, + "theoretical_loss": 3.502433638607319, + "tokens_seen": 1722810368 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048266730861819935, + "loss": 0.0694, + "theoretical_loss": 3.502388265722159, + "tokens_seen": 1723072512 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048258706467661696, + "loss": 0.0698, + "theoretical_loss": 3.5023429016718755, + "tokens_seen": 1723334656 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.00030591912218369544, + "objective/train/docs_used": 629447, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4868828058242798, + "objective/train/original_loss": 1.4868826866149902, + "objective/train/theoretical_loss": 3.502297546453403, + "objective/train/tokens_used": 1744056800, + "objective/train/value_avg": -0.00592803955078125, + "objective/train/value_loss": 0.00018052756786346436, + "objective/train/value_max": -3.427267074584961e-05, + "objective/train/value_min": -0.2147216796875, + "objective/train/value_reward_corr": 0.647845852948392, + "objective/train/value_std": 0.01120758056640625, + "objective/train/weight_avg": 1.0003873109817505, + "objective/train/weighted_lm_loss": 1.4872801303863525, + "objective/train/weights_max": 1.1227530241012573, + "objective/train/weights_min": 0.3682018518447876, + "theoretical_loss": 3.502297546453403, + "tokens_seen": 1723596800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048250682073503446, + "loss": 0.0704, + "theoretical_loss": 3.502297546453403, + "tokens_seen": 1723596800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004824265767934521, + "loss": 0.0682, + "theoretical_loss": 3.5022522000636807, + "tokens_seen": 1723858944 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004823463328518697, + "loss": 0.0702, + "theoretical_loss": 3.502206862499647, + "tokens_seen": 1724121088 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004822660889102873, + "loss": 0.0684, + "theoretical_loss": 3.5021615337582435, + "tokens_seen": 1724383232 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048218584496870486, + "loss": 0.0686, + "theoretical_loss": 3.5021162138364117, + "tokens_seen": 1724645376 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004821056010271225, + "loss": 0.0692, + "theoretical_loss": 3.502070902731096, + "tokens_seen": 1724907520 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004820253570855401, + "loss": 0.0714, + "theoretical_loss": 3.502025600439241, + "tokens_seen": 1725169664 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004819451131439576, + "loss": 0.0717, + "theoretical_loss": 3.5019803069577944, + "tokens_seen": 1725431808 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004818648692023752, + "loss": 0.0691, + "theoretical_loss": 3.5019350222837042, + "tokens_seen": 1725693952 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004817846252607928, + "loss": 0.0702, + "theoretical_loss": 3.5018897464139203, + "tokens_seen": 1725956096 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048170438131921043, + "loss": 0.0719, + "theoretical_loss": 3.501844479345394, + "tokens_seen": 1726218240 + }, + { + "epoch": 0.52, + "learning_rate": 0.000481624137377628, + "loss": 0.069, + "theoretical_loss": 3.5017992210750783, + "tokens_seen": 1726480384 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004815438934360456, + "loss": 0.0716, + "theoretical_loss": 3.5017539715999275, + "tokens_seen": 1726742528 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": 0.0002910619368776679, + "objective/train/docs_used": 630619, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4500676393508911, + "objective/train/original_loss": 1.4500675201416016, + "objective/train/theoretical_loss": 3.501731350159588, + "objective/train/tokens_used": 1747333600, + "objective/train/value_avg": -0.01136016845703125, + "objective/train/value_loss": 0.0006346271256916225, + "objective/train/value_max": -3.0219554901123047e-05, + "objective/train/value_min": -0.8798828125, + "objective/train/value_reward_corr": 0.7903729671685932, + "objective/train/value_std": 0.0281829833984375, + "objective/train/weight_avg": 1.0005794763565063, + "objective/train/weighted_lm_loss": 1.4503711462020874, + "objective/train/weights_max": 2.2934980392456055, + "objective/train/weights_min": 0.3900670111179352, + "theoretical_loss": 3.501731350159588, + "tokens_seen": 1726873600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004814636494944632, + "loss": 0.0696, + "theoretical_loss": 3.5017087309168975, + "tokens_seen": 1727004672 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048138340555288077, + "loss": 0.0707, + "theoretical_loss": 3.501663499022947, + "tokens_seen": 1727266816 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048130316161129833, + "loss": 0.0681, + "theoretical_loss": 3.5016182759150336, + "tokens_seen": 1727528960 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048122291766971594, + "loss": 0.0688, + "theoretical_loss": 3.501573061590118, + "tokens_seen": 1727791104 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048114267372813355, + "loss": 0.0678, + "theoretical_loss": 3.5015278560451626, + "tokens_seen": 1728053248 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004810624297865511, + "loss": 0.069, + "theoretical_loss": 3.501482659277131, + "tokens_seen": 1728315392 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004809821858449687, + "loss": 0.0697, + "theoretical_loss": 3.501437471282988, + "tokens_seen": 1728577536 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048090194190338634, + "loss": 0.0669, + "theoretical_loss": 3.5013922920597, + "tokens_seen": 1728839680 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004808216979618039, + "loss": 0.0687, + "theoretical_loss": 3.5013471216042356, + "tokens_seen": 1729101824 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048074145402022145, + "loss": 0.0665, + "theoretical_loss": 3.5013019599135635, + "tokens_seen": 1729363968 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048066121007863907, + "loss": 0.0698, + "theoretical_loss": 3.501256806984656, + "tokens_seen": 1729626112 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004805809661370567, + "loss": 0.0697, + "theoretical_loss": 3.5012116628144847, + "tokens_seen": 1729888256 + }, + { + "epoch": 0.52, + "objective/train/advantage_avg": -0.0012186147505417466, + "objective/train/docs_used": 631803, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4452341794967651, + "objective/train/original_loss": 1.4452341794967651, + "objective/train/theoretical_loss": 3.5011665274000245, + "objective/train/tokens_used": 1750610400, + "objective/train/value_avg": -0.00994110107421875, + "objective/train/value_loss": 0.0008707749075256288, + "objective/train/value_max": -5.692243576049805e-05, + "objective/train/value_min": -0.7236328125, + "objective/train/value_reward_corr": 0.7037228321696455, + "objective/train/value_std": 0.0241546630859375, + "objective/train/weight_avg": 0.9991577863693237, + "objective/train/weighted_lm_loss": 1.4447067975997925, + "objective/train/weights_max": 1.752554178237915, + "objective/train/weights_min": 0.371359258890152, + "theoretical_loss": 3.5011665274000245, + "tokens_seen": 1730150400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048050072219547424, + "loss": 0.0666, + "theoretical_loss": 3.5011665274000245, + "tokens_seen": 1730150400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048042047825389185, + "loss": 0.072, + "theoretical_loss": 3.50112140073825, + "tokens_seen": 1730412544 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048034023431230946, + "loss": 0.0693, + "theoretical_loss": 3.501076282826139, + "tokens_seen": 1730674688 + }, + { + "epoch": 0.52, + "learning_rate": 0.000480259990370727, + "loss": 0.0694, + "theoretical_loss": 3.5010311736606696, + "tokens_seen": 1730936832 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048017974642914463, + "loss": 0.0711, + "theoretical_loss": 3.5009860732388223, + "tokens_seen": 1731198976 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004800995024875622, + "loss": 0.0705, + "theoretical_loss": 3.5009409815575787, + "tokens_seen": 1731461120 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048001925854597975, + "loss": 0.0678, + "theoretical_loss": 3.500895898613922, + "tokens_seen": 1731723264 + }, + { + "epoch": 0.52, + "learning_rate": 0.00047993901460439736, + "loss": 0.0709, + "theoretical_loss": 3.5008508244048366, + "tokens_seen": 1731985408 + }, + { + "epoch": 0.52, + "learning_rate": 0.000479858770662815, + "loss": 0.0688, + "theoretical_loss": 3.5008057589273083, + "tokens_seen": 1732247552 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004797785267212326, + "loss": 0.0674, + "theoretical_loss": 3.500760702178325, + "tokens_seen": 1732509696 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047969828277965015, + "loss": 0.0655, + "theoretical_loss": 3.5007156541548756, + "tokens_seen": 1732771840 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047961803883806776, + "loss": 0.0708, + "theoretical_loss": 3.5006706148539513, + "tokens_seen": 1733033984 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004795377948964853, + "loss": 0.0716, + "theoretical_loss": 3.500625584272543, + "tokens_seen": 1733296128 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0006020920118317008, + "objective/train/docs_used": 632954, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3608434200286865, + "objective/train/original_loss": 1.360843539237976, + "objective/train/theoretical_loss": 3.500603072250718, + "objective/train/tokens_used": 1753887200, + "objective/train/value_avg": -0.007480621337890625, + "objective/train/value_loss": 0.0004119122459087521, + "objective/train/value_max": -6.556510925292969e-05, + "objective/train/value_min": -0.68798828125, + "objective/train/value_reward_corr": 0.6353150839836534, + "objective/train/value_std": 0.0165557861328125, + "objective/train/weight_avg": 1.0007774829864502, + "objective/train/weighted_lm_loss": 1.3612209558486938, + "objective/train/weights_max": 1.4956903457641602, + "objective/train/weights_min": 0.36957859992980957, + "theoretical_loss": 3.500603072250718, + "tokens_seen": 1733427200 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004794575509549029, + "loss": 0.0681, + "theoretical_loss": 3.5005805624076456, + "tokens_seen": 1733558272 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004793773070133205, + "loss": 0.0688, + "theoretical_loss": 3.500535549256253, + "tokens_seen": 1733820416 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004792970630717381, + "loss": 0.0672, + "theoretical_loss": 3.5004905448153627, + "tokens_seen": 1734082560 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004792168191301557, + "loss": 0.0705, + "theoretical_loss": 3.500445549081972, + "tokens_seen": 1734344704 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047913657518857327, + "loss": 0.069, + "theoretical_loss": 3.500400562053081, + "tokens_seen": 1734606848 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004790563312469909, + "loss": 0.0698, + "theoretical_loss": 3.50035558372569, + "tokens_seen": 1734868992 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047897608730540844, + "loss": 0.0711, + "theoretical_loss": 3.5003106140968026, + "tokens_seen": 1735131136 + }, + { + "epoch": 0.53, + "learning_rate": 0.000478895843363826, + "loss": 0.0701, + "theoretical_loss": 3.5002656531634213, + "tokens_seen": 1735393280 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004788155994222436, + "loss": 0.0693, + "theoretical_loss": 3.5002207009225526, + "tokens_seen": 1735655424 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004787353554806612, + "loss": 0.0685, + "theoretical_loss": 3.5001757573712036, + "tokens_seen": 1735917568 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047865511153907884, + "loss": 0.0702, + "theoretical_loss": 3.5001308225063816, + "tokens_seen": 1736179712 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004785748675974964, + "loss": 0.0689, + "theoretical_loss": 3.500085896325098, + "tokens_seen": 1736441856 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": -0.00023456213239114732, + "objective/train/docs_used": 634251, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4015569686889648, + "objective/train/original_loss": 1.401557207107544, + "objective/train/theoretical_loss": 3.5000409788243623, + "objective/train/tokens_used": 1757164000, + "objective/train/value_avg": -0.006313323974609375, + "objective/train/value_loss": 0.00014978312538005412, + "objective/train/value_max": -6.812810897827148e-05, + "objective/train/value_min": -0.3037109375, + "objective/train/value_reward_corr": 0.7739671798839213, + "objective/train/value_std": 0.01079559326171875, + "objective/train/weight_avg": 0.9998375773429871, + "objective/train/weighted_lm_loss": 1.4020917415618896, + "objective/train/weights_max": 1.1075072288513184, + "objective/train/weights_min": 0.6098971962928772, + "theoretical_loss": 3.5000409788243623, + "tokens_seen": 1736704000 + }, + { + "epoch": 0.53, + "learning_rate": 0.000478494623655914, + "loss": 0.069, + "theoretical_loss": 3.5000409788243623, + "tokens_seen": 1736704000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004784143797143316, + "loss": 0.0706, + "theoretical_loss": 3.4999960700011896, + "tokens_seen": 1736966144 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047833413577274913, + "loss": 0.0697, + "theoretical_loss": 3.4999511698525927, + "tokens_seen": 1737228288 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047825389183116674, + "loss": 0.0691, + "theoretical_loss": 3.4999062783755877, + "tokens_seen": 1737490432 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047817364788958435, + "loss": 0.0675, + "theoretical_loss": 3.4998613955671924, + "tokens_seen": 1737752576 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004780934039480019, + "loss": 0.0682, + "theoretical_loss": 3.4998165214244246, + "tokens_seen": 1738014720 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004780131600064195, + "loss": 0.0695, + "theoretical_loss": 3.499771655944306, + "tokens_seen": 1738276864 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047793291606483714, + "loss": 0.0682, + "theoretical_loss": 3.4997267991238568, + "tokens_seen": 1738539008 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047785267212325475, + "loss": 0.0667, + "theoretical_loss": 3.4996819509601016, + "tokens_seen": 1738801152 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047777242818167225, + "loss": 0.0672, + "theoretical_loss": 3.499637111450064, + "tokens_seen": 1739063296 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047769218424008987, + "loss": 0.0708, + "theoretical_loss": 3.499592280590771, + "tokens_seen": 1739325440 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004776119402985075, + "loss": 0.0711, + "theoretical_loss": 3.499547458379249, + "tokens_seen": 1739587584 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047753169635692504, + "loss": 0.0692, + "theoretical_loss": 3.4995026448125284, + "tokens_seen": 1739849728 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.000589077128097415, + "objective/train/docs_used": 635535, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3270041942596436, + "objective/train/original_loss": 1.327004313468933, + "objective/train/theoretical_loss": 3.49948024127004, + "objective/train/tokens_used": 1760440800, + "objective/train/value_avg": -0.005573272705078125, + "objective/train/value_loss": 0.00010660909174475819, + "objective/train/value_max": -6.657838821411133e-05, + "objective/train/value_min": -0.2166748046875, + "objective/train/value_reward_corr": 0.6834865549309408, + "objective/train/value_std": 0.00957489013671875, + "objective/train/weight_avg": 1.000641107559204, + "objective/train/weighted_lm_loss": 1.3277722597122192, + "objective/train/weights_max": 1.1486068964004517, + "objective/train/weights_min": 0.7876083850860596, + "theoretical_loss": 3.49948024127004, + "tokens_seen": 1739980800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047745145241534265, + "loss": 0.067, + "theoretical_loss": 3.499457839887639, + "tokens_seen": 1740111872 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047737120847376026, + "loss": 0.0674, + "theoretical_loss": 3.4994130436016126, + "tokens_seen": 1740374016 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004772909645321779, + "loss": 0.0694, + "theoretical_loss": 3.4993682559514836, + "tokens_seen": 1740636160 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004772107205905954, + "loss": 0.0689, + "theoretical_loss": 3.4993234769342862, + "tokens_seen": 1740898304 + }, + { + "epoch": 0.53, + "learning_rate": 0.000477130476649013, + "loss": 0.0674, + "theoretical_loss": 3.4992787065470567, + "tokens_seen": 1741160448 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004770502327074306, + "loss": 0.0676, + "theoretical_loss": 3.499233944786834, + "tokens_seen": 1741422592 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047696998876584816, + "loss": 0.0679, + "theoretical_loss": 3.499189191650656, + "tokens_seen": 1741684736 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004768897448242658, + "loss": 0.0683, + "theoretical_loss": 3.499144447135565, + "tokens_seen": 1741946880 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004768095008826834, + "loss": 0.0715, + "theoretical_loss": 3.499099711238602, + "tokens_seen": 1742209024 + }, + { + "epoch": 0.53, + "learning_rate": 0.000476729256941101, + "loss": 0.0676, + "theoretical_loss": 3.4990549839568112, + "tokens_seen": 1742471168 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047664901299951856, + "loss": 0.0704, + "theoretical_loss": 3.4990102652872377, + "tokens_seen": 1742733312 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004765687690579361, + "loss": 0.0712, + "theoretical_loss": 3.4989655552269285, + "tokens_seen": 1742995456 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0008427195716649294, + "objective/train/docs_used": 636817, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3889329433441162, + "objective/train/original_loss": 1.3889329433441162, + "objective/train/theoretical_loss": 3.4989208537729315, + "objective/train/tokens_used": 1763717600, + "objective/train/value_avg": -0.004062652587890625, + "objective/train/value_loss": 0.00011055525101255625, + "objective/train/value_max": -4.5418739318847656e-05, + "objective/train/value_min": -0.203125, + "objective/train/value_reward_corr": 0.4945603011219797, + "objective/train/value_std": 0.006389617919921875, + "objective/train/weight_avg": 1.0008906126022339, + "objective/train/weighted_lm_loss": 1.3909051418304443, + "objective/train/weights_max": 1.1333805322647095, + "objective/train/weights_min": 0.3810659945011139, + "theoretical_loss": 3.4989208537729315, + "tokens_seen": 1743257600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047648852511635373, + "loss": 0.0711, + "theoretical_loss": 3.4989208537729315, + "tokens_seen": 1743257600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004764082811747713, + "loss": 0.0704, + "theoretical_loss": 3.498876160922296, + "tokens_seen": 1743519744 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004763280372331889, + "loss": 0.0698, + "theoretical_loss": 3.4988314766720734, + "tokens_seen": 1743781888 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004762477932916065, + "loss": 0.0705, + "theoretical_loss": 3.4987868010193157, + "tokens_seen": 1744044032 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047616754935002407, + "loss": 0.0646, + "theoretical_loss": 3.4987421339610774, + "tokens_seen": 1744306176 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004760873054084417, + "loss": 0.0683, + "theoretical_loss": 3.498697475494413, + "tokens_seen": 1744568320 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047600706146685924, + "loss": 0.0693, + "theoretical_loss": 3.49865282561638, + "tokens_seen": 1744830464 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047592681752527686, + "loss": 0.0702, + "theoretical_loss": 3.498608184324037, + "tokens_seen": 1745092608 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004758465735836944, + "loss": 0.0698, + "theoretical_loss": 3.498563551614443, + "tokens_seen": 1745354752 + }, + { + "epoch": 0.53, + "learning_rate": 0.000475766329642112, + "loss": 0.0685, + "theoretical_loss": 3.4985189274846586, + "tokens_seen": 1745616896 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047568608570052964, + "loss": 0.0712, + "theoretical_loss": 3.498474311931748, + "tokens_seen": 1745879040 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004756058417589472, + "loss": 0.0711, + "theoretical_loss": 3.4984297049527737, + "tokens_seen": 1746141184 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004755255978173648, + "loss": 0.0695, + "theoretical_loss": 3.498385106544802, + "tokens_seen": 1746403328 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0013079342897981405, + "objective/train/docs_used": 637932, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4305325746536255, + "objective/train/original_loss": 1.4305325746536255, + "objective/train/theoretical_loss": 3.498362810554026, + "objective/train/tokens_used": 1766994400, + "objective/train/value_avg": -0.00585174560546875, + "objective/train/value_loss": 0.0001362788025289774, + "objective/train/value_max": -3.349781036376953e-05, + "objective/train/value_min": -0.398681640625, + "objective/train/value_reward_corr": 0.6356918638700473, + "objective/train/value_std": 0.01058197021484375, + "objective/train/weight_avg": 1.0013703107833862, + "objective/train/weighted_lm_loss": 1.4319835901260376, + "objective/train/weights_max": 1.1282485723495483, + "objective/train/weights_min": 0.36817076802253723, + "theoretical_loss": 3.498362810554026, + "tokens_seen": 1746534400 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004754453538757824, + "loss": 0.0668, + "theoretical_loss": 3.4983405167049004, + "tokens_seen": 1746665472 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004753651099342, + "loss": 0.0692, + "theoretical_loss": 3.4982959354301366, + "tokens_seen": 1746927616 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047528486599261754, + "loss": 0.0651, + "theoretical_loss": 3.49825136271758, + "tokens_seen": 1747189760 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047520462205103515, + "loss": 0.0704, + "theoretical_loss": 3.498206798564303, + "tokens_seen": 1747451904 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047512437810945276, + "loss": 0.0687, + "theoretical_loss": 3.498162242967377, + "tokens_seen": 1747714048 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004750441341678703, + "loss": 0.0699, + "theoretical_loss": 3.4981176959238773, + "tokens_seen": 1747976192 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047496389022628794, + "loss": 0.0677, + "theoretical_loss": 3.4980731574308788, + "tokens_seen": 1748238336 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047488364628470555, + "loss": 0.0657, + "theoretical_loss": 3.498028627485459, + "tokens_seen": 1748500480 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004748034023431231, + "loss": 0.0686, + "theoretical_loss": 3.497984106084696, + "tokens_seen": 1748762624 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047472315840154066, + "loss": 0.0715, + "theoretical_loss": 3.4979395932256696, + "tokens_seen": 1749024768 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004746429144599583, + "loss": 0.0676, + "theoretical_loss": 3.4978950889054614, + "tokens_seen": 1749286912 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004745626705183759, + "loss": 0.0684, + "theoretical_loss": 3.4978505931211545, + "tokens_seen": 1749549056 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": -0.0006571173435077071, + "objective/train/docs_used": 639149, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3665317296981812, + "objective/train/original_loss": 1.3665317296981812, + "objective/train/theoretical_loss": 3.4978061058698326, + "objective/train/tokens_used": 1770271200, + "objective/train/value_avg": -0.007808685302734375, + "objective/train/value_loss": 0.00021528959041461349, + "objective/train/value_max": -5.7816505432128906e-05, + "objective/train/value_min": -0.264404296875, + "objective/train/value_reward_corr": 0.8101872704556123, + "objective/train/value_std": 0.016448974609375, + "objective/train/weight_avg": 0.999443769454956, + "objective/train/weighted_lm_loss": 1.3659167289733887, + "objective/train/weights_max": 1.2142077684402466, + "objective/train/weights_min": 0.3840899169445038, + "theoretical_loss": 3.4978061058698326, + "tokens_seen": 1749811200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047448242657679345, + "loss": 0.0693, + "theoretical_loss": 3.4978061058698326, + "tokens_seen": 1749811200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047440218263521106, + "loss": 0.0721, + "theoretical_loss": 3.4977616271485816, + "tokens_seen": 1750073344 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004743219386936287, + "loss": 0.0688, + "theoretical_loss": 3.4977171569544883, + "tokens_seen": 1750335488 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004742416947520462, + "loss": 0.0671, + "theoretical_loss": 3.497672695284641, + "tokens_seen": 1750597632 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004741614508104638, + "loss": 0.0683, + "theoretical_loss": 3.497628242136131, + "tokens_seen": 1750859776 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004740812068688814, + "loss": 0.0675, + "theoretical_loss": 3.497583797506048, + "tokens_seen": 1751121920 + }, + { + "epoch": 0.53, + "learning_rate": 0.000474000962927299, + "loss": 0.0701, + "theoretical_loss": 3.4975393613914854, + "tokens_seen": 1751384064 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004739207189857166, + "loss": 0.0691, + "theoretical_loss": 3.497494933789538, + "tokens_seen": 1751646208 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004738404750441342, + "loss": 0.071, + "theoretical_loss": 3.4974505146973005, + "tokens_seen": 1751908352 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004737602311025518, + "loss": 0.072, + "theoretical_loss": 3.497406104111871, + "tokens_seen": 1752170496 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047367998716096936, + "loss": 0.071, + "theoretical_loss": 3.4973617020303465, + "tokens_seen": 1752432640 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004735997432193869, + "loss": 0.0704, + "theoretical_loss": 3.497317308449828, + "tokens_seen": 1752694784 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047351949927780453, + "loss": 0.0658, + "theoretical_loss": 3.4972729233674174, + "tokens_seen": 1752956928 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0008459224482066929, + "objective/train/docs_used": 640338, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3503456115722656, + "objective/train/original_loss": 1.3503456115722656, + "objective/train/theoretical_loss": 3.4972507340120966, + "objective/train/tokens_used": 1773548000, + "objective/train/value_avg": -0.00661468505859375, + "objective/train/value_loss": 0.0001745822373777628, + "objective/train/value_max": -3.9458274841308594e-05, + "objective/train/value_min": -0.2017822265625, + "objective/train/value_reward_corr": 0.8029245560216447, + "objective/train/value_std": 0.01456451416015625, + "objective/train/weight_avg": 1.0009305477142334, + "objective/train/weighted_lm_loss": 1.351764440536499, + "objective/train/weights_max": 1.1526963710784912, + "objective/train/weights_min": 0.719582736492157, + "theoretical_loss": 3.4972507340120966, + "tokens_seen": 1753088000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047343925533622214, + "loss": 0.0681, + "theoretical_loss": 3.4972285467802164, + "tokens_seen": 1753219072 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004733590113946397, + "loss": 0.0708, + "theoretical_loss": 3.4971841786853295, + "tokens_seen": 1753481216 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004732787674530573, + "loss": 0.0707, + "theoretical_loss": 3.4971398190798615, + "tokens_seen": 1753743360 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004731985235114749, + "loss": 0.0687, + "theoretical_loss": 3.4970954679609214, + "tokens_seen": 1754005504 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004731182795698925, + "loss": 0.0725, + "theoretical_loss": 3.4970511253256156, + "tokens_seen": 1754267648 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047303803562831004, + "loss": 0.0719, + "theoretical_loss": 3.4970067911710556, + "tokens_seen": 1754529792 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047295779168672765, + "loss": 0.0727, + "theoretical_loss": 3.4969624654943514, + "tokens_seen": 1754791936 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047287754774514527, + "loss": 0.0695, + "theoretical_loss": 3.496918148292616, + "tokens_seen": 1755054080 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004727973038035628, + "loss": 0.07, + "theoretical_loss": 3.496873839562964, + "tokens_seen": 1755316224 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047271705986198044, + "loss": 0.0693, + "theoretical_loss": 3.49682953930251, + "tokens_seen": 1755578368 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047263681592039805, + "loss": 0.0685, + "theoretical_loss": 3.4967852475083725, + "tokens_seen": 1755840512 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004725565719788156, + "loss": 0.0701, + "theoretical_loss": 3.4967409641776683, + "tokens_seen": 1756102656 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0014478386146947742, + "objective/train/docs_used": 641471, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3184295892715454, + "objective/train/original_loss": 1.3184294700622559, + "objective/train/theoretical_loss": 3.4966966893075178, + "objective/train/tokens_used": 1776824800, + "objective/train/value_avg": -0.0074462890625, + "objective/train/value_loss": 0.0001307493366766721, + "objective/train/value_max": -3.594160079956055e-05, + "objective/train/value_min": -0.240234375, + "objective/train/value_reward_corr": 0.7224784018460088, + "objective/train/value_std": 0.01288604736328125, + "objective/train/weight_avg": 1.001512050628662, + "objective/train/weighted_lm_loss": 1.3203070163726807, + "objective/train/weights_max": 1.2715470790863037, + "objective/train/weights_min": 0.6124921441078186, + "theoretical_loss": 3.4966966893075178, + "tokens_seen": 1756364800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047247632803723317, + "loss": 0.0674, + "theoretical_loss": 3.4966966893075178, + "tokens_seen": 1756364800 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004723960840956508, + "loss": 0.0704, + "theoretical_loss": 3.496652422895042, + "tokens_seen": 1756626944 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004723158401540684, + "loss": 0.069, + "theoretical_loss": 3.496608164937364, + "tokens_seen": 1756889088 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047223559621248595, + "loss": 0.0705, + "theoretical_loss": 3.496563915431607, + "tokens_seen": 1757151232 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047215535227090356, + "loss": 0.0707, + "theoretical_loss": 3.4965196743748965, + "tokens_seen": 1757413376 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004720751083293212, + "loss": 0.0709, + "theoretical_loss": 3.49647544176436, + "tokens_seen": 1757675520 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047199486438773873, + "loss": 0.0729, + "theoretical_loss": 3.4964312175971246, + "tokens_seen": 1757937664 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047191462044615635, + "loss": 0.072, + "theoretical_loss": 3.496387001870321, + "tokens_seen": 1758199808 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004718343765045739, + "loss": 0.0706, + "theoretical_loss": 3.49634279458108, + "tokens_seen": 1758461952 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047175413256299146, + "loss": 0.0684, + "theoretical_loss": 3.4962985957265333, + "tokens_seen": 1758724096 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004716738886214091, + "loss": 0.0681, + "theoretical_loss": 3.496254405303815, + "tokens_seen": 1758986240 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004715936446798267, + "loss": 0.0705, + "theoretical_loss": 3.4962102233100607, + "tokens_seen": 1759248384 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004715134007382443, + "loss": 0.0728, + "theoretical_loss": 3.4961660497424063, + "tokens_seen": 1759510528 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.0015444692689925432, + "objective/train/docs_used": 642679, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3258466720581055, + "objective/train/original_loss": 1.3258464336395264, + "objective/train/theoretical_loss": 3.4961439661174727, + "objective/train/tokens_used": 1780101600, + "objective/train/value_avg": -0.007198333740234375, + "objective/train/value_loss": 0.0004468359693419188, + "objective/train/value_max": -6.604194641113281e-05, + "objective/train/value_min": -0.8720703125, + "objective/train/value_reward_corr": 0.6289400413877285, + "objective/train/value_std": 0.0161285400390625, + "objective/train/weight_avg": 1.0017292499542236, + "objective/train/weighted_lm_loss": 1.3280916213989258, + "objective/train/weights_max": 1.299414873123169, + "objective/train/weights_min": 0.36901721358299255, + "theoretical_loss": 3.4961439661174727, + "tokens_seen": 1759641600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047143315679666186, + "loss": 0.0701, + "theoretical_loss": 3.496121884597991, + "tokens_seen": 1759772672 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004713529128550795, + "loss": 0.0698, + "theoretical_loss": 3.4960777278739528, + "tokens_seen": 1760034816 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047127266891349703, + "loss": 0.0721, + "theoretical_loss": 3.4960335795674338, + "tokens_seen": 1760296960 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004711924249719146, + "loss": 0.0703, + "theoretical_loss": 3.495989439675575, + "tokens_seen": 1760559104 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004711121810303322, + "loss": 0.0699, + "theoretical_loss": 3.4959453081955205, + "tokens_seen": 1760821248 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004710319370887498, + "loss": 0.0688, + "theoretical_loss": 3.495901185124416, + "tokens_seen": 1761083392 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047095169314716743, + "loss": 0.0692, + "theoretical_loss": 3.4958570704594067, + "tokens_seen": 1761345536 + }, + { + "epoch": 0.53, + "learning_rate": 0.000470871449205585, + "loss": 0.0685, + "theoretical_loss": 3.495812964197641, + "tokens_seen": 1761607680 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004707912052640026, + "loss": 0.0702, + "theoretical_loss": 3.4957688663362685, + "tokens_seen": 1761869824 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004707109613224202, + "loss": 0.0716, + "theoretical_loss": 3.495724776872439, + "tokens_seen": 1762131968 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004706307173808377, + "loss": 0.0709, + "theoretical_loss": 3.4956806958033044, + "tokens_seen": 1762394112 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047055047343925533, + "loss": 0.0677, + "theoretical_loss": 3.4956366231260185, + "tokens_seen": 1762656256 + }, + { + "epoch": 0.53, + "objective/train/advantage_avg": 0.000993677182123065, + "objective/train/docs_used": 643830, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.203426718711853, + "objective/train/original_loss": 1.2034268379211426, + "objective/train/theoretical_loss": 3.4955925588377363, + "objective/train/tokens_used": 1783378400, + "objective/train/value_avg": -0.004913330078125, + "objective/train/value_loss": 0.00017356479656882584, + "objective/train/value_max": -5.1021575927734375e-05, + "objective/train/value_min": -0.369384765625, + "objective/train/value_reward_corr": 0.572270645143692, + "objective/train/value_std": 0.009613037109375, + "objective/train/weight_avg": 1.0010701417922974, + "objective/train/weighted_lm_loss": 1.20392644405365, + "objective/train/weights_max": 1.2256743907928467, + "objective/train/weights_min": 0.3710845410823822, + "theoretical_loss": 3.4955925588377363, + "tokens_seen": 1762918400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047047022949767294, + "loss": 0.0682, + "theoretical_loss": 3.4955925588377363, + "tokens_seen": 1762918400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047038998555609055, + "loss": 0.0729, + "theoretical_loss": 3.4955485029356135, + "tokens_seen": 1763180544 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004703097416145081, + "loss": 0.0693, + "theoretical_loss": 3.495504455416807, + "tokens_seen": 1763442688 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004702294976729257, + "loss": 0.0674, + "theoretical_loss": 3.495460416278477, + "tokens_seen": 1763704832 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047014925373134334, + "loss": 0.069, + "theoretical_loss": 3.4954163855177827, + "tokens_seen": 1763966976 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047006900978976084, + "loss": 0.0701, + "theoretical_loss": 3.495372363131886, + "tokens_seen": 1764229120 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046998876584817845, + "loss": 0.0665, + "theoretical_loss": 3.4953283491179503, + "tokens_seen": 1764491264 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046990852190659607, + "loss": 0.0694, + "theoretical_loss": 3.4952843434731395, + "tokens_seen": 1764753408 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004698282779650136, + "loss": 0.0698, + "theoretical_loss": 3.49524034619462, + "tokens_seen": 1765015552 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046974803402343124, + "loss": 0.0717, + "theoretical_loss": 3.495196357279559, + "tokens_seen": 1765277696 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046966779008184885, + "loss": 0.0715, + "theoretical_loss": 3.495152376725124, + "tokens_seen": 1765539840 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046958754614026646, + "loss": 0.0663, + "theoretical_loss": 3.4951084045284864, + "tokens_seen": 1765801984 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046950730219868397, + "loss": 0.0698, + "theoretical_loss": 3.495064440686816, + "tokens_seen": 1766064128 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.000716030306648463, + "objective/train/docs_used": 644977, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5920703411102295, + "objective/train/original_loss": 1.5920703411102295, + "objective/train/theoretical_loss": 3.495042461898211, + "objective/train/tokens_used": 1786655200, + "objective/train/value_avg": -0.00447845458984375, + "objective/train/value_loss": 0.00016367589705623686, + "objective/train/value_max": -5.3048133850097656e-05, + "objective/train/value_min": -0.255615234375, + "objective/train/value_reward_corr": 0.5714298891637101, + "objective/train/value_std": 0.00833892822265625, + "objective/train/weight_avg": 1.0007867813110352, + "objective/train/weighted_lm_loss": 1.5934929847717285, + "objective/train/weights_max": 1.171118140220642, + "objective/train/weights_min": 0.36817076802253723, + "theoretical_loss": 3.495042461898211, + "tokens_seen": 1766195200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004694270582571016, + "loss": 0.0699, + "theoretical_loss": 3.495020485197287, + "tokens_seen": 1766326272 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004693468143155192, + "loss": 0.0701, + "theoretical_loss": 3.494976538057073, + "tokens_seen": 1766588416 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046926657037393675, + "loss": 0.071, + "theoretical_loss": 3.4949325992633486, + "tokens_seen": 1766850560 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046918632643235436, + "loss": 0.0706, + "theoretical_loss": 3.4948886688132923, + "tokens_seen": 1767112704 + }, + { + "epoch": 0.54, + "learning_rate": 0.000469106082490772, + "loss": 0.0701, + "theoretical_loss": 3.4948447467040804, + "tokens_seen": 1767374848 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004690258385491896, + "loss": 0.0695, + "theoretical_loss": 3.494800832932894, + "tokens_seen": 1767636992 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046894559460760715, + "loss": 0.0663, + "theoretical_loss": 3.494756927496913, + "tokens_seen": 1767899136 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004688653506660247, + "loss": 0.0684, + "theoretical_loss": 3.49471303039332, + "tokens_seen": 1768161280 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004687851067244423, + "loss": 0.0707, + "theoretical_loss": 3.4946691416192985, + "tokens_seen": 1768423424 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004687048627828599, + "loss": 0.0692, + "theoretical_loss": 3.4946252611720348, + "tokens_seen": 1768685568 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004686246188412775, + "loss": 0.0706, + "theoretical_loss": 3.494581389048714, + "tokens_seen": 1768947712 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004685443748996951, + "loss": 0.0694, + "theoretical_loss": 3.494537525246524, + "tokens_seen": 1769209856 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.00046761715202592313, + "objective/train/docs_used": 646001, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.280922293663025, + "objective/train/original_loss": 1.2809221744537354, + "objective/train/theoretical_loss": 3.4944936697626545, + "objective/train/tokens_used": 1789932000, + "objective/train/value_avg": -0.00762939453125, + "objective/train/value_loss": 0.0009191989665850997, + "objective/train/value_max": -5.02467155456543e-05, + "objective/train/value_min": -0.57470703125, + "objective/train/value_reward_corr": 0.6030385380283607, + "objective/train/value_std": 0.0171356201171875, + "objective/train/weight_avg": 1.0007696151733398, + "objective/train/weighted_lm_loss": 1.2816685438156128, + "objective/train/weights_max": 1.776610016822815, + "objective/train/weights_min": 0.0282779261469841, + "theoretical_loss": 3.4944936697626545, + "tokens_seen": 1769472000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004684641309581127, + "loss": 0.0656, + "theoretical_loss": 3.4944936697626545, + "tokens_seen": 1769472000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046838388701653027, + "loss": 0.0679, + "theoretical_loss": 3.4944498225942953, + "tokens_seen": 1769734144 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046830364307494783, + "loss": 0.0699, + "theoretical_loss": 3.4944059837386394, + "tokens_seen": 1769996288 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046822339913336544, + "loss": 0.0712, + "theoretical_loss": 3.494362153192879, + "tokens_seen": 1770258432 + }, + { + "epoch": 0.54, + "learning_rate": 0.000468143155191783, + "loss": 0.0698, + "theoretical_loss": 3.494318330954209, + "tokens_seen": 1770520576 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004680629112502006, + "loss": 0.0691, + "theoretical_loss": 3.494274517019826, + "tokens_seen": 1770782720 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004679826673086182, + "loss": 0.0701, + "theoretical_loss": 3.494230711386926, + "tokens_seen": 1771044864 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004679024233670358, + "loss": 0.068, + "theoretical_loss": 3.4941869140527095, + "tokens_seen": 1771307008 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004678221794254534, + "loss": 0.0687, + "theoretical_loss": 3.494143125014375, + "tokens_seen": 1771569152 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046774193548387096, + "loss": 0.0716, + "theoretical_loss": 3.4940993442691246, + "tokens_seen": 1771831296 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046766169154228857, + "loss": 0.0701, + "theoretical_loss": 3.4940555718141613, + "tokens_seen": 1772093440 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046758144760070613, + "loss": 0.0691, + "theoretical_loss": 3.4940118076466886, + "tokens_seen": 1772355584 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046750120365912374, + "loss": 0.069, + "theoretical_loss": 3.493968051763912, + "tokens_seen": 1772617728 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": -0.0010038224281743169, + "objective/train/docs_used": 647211, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5441070795059204, + "objective/train/original_loss": 1.54410719871521, + "objective/train/theoretical_loss": 3.493946176928412, + "objective/train/tokens_used": 1793208800, + "objective/train/value_avg": -0.00521087646484375, + "objective/train/value_loss": 0.0002107528707711026, + "objective/train/value_max": -8.094310760498047e-05, + "objective/train/value_min": -0.254638671875, + "objective/train/value_reward_corr": 0.7200556121099299, + "objective/train/value_std": 0.01129150390625, + "objective/train/weight_avg": 0.9990896582603455, + "objective/train/weighted_lm_loss": 1.5431265830993652, + "objective/train/weights_max": 1.1554977893829346, + "objective/train/weights_min": 0.38069406151771545, + "theoretical_loss": 3.493946176928412, + "tokens_seen": 1772748800 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046742095971754135, + "loss": 0.0692, + "theoretical_loss": 3.4939243041630395, + "tokens_seen": 1772879872 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004673407157759589, + "loss": 0.0699, + "theoretical_loss": 3.4938805648412776, + "tokens_seen": 1773142016 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004672604718343765, + "loss": 0.07, + "theoretical_loss": 3.4938368337958368, + "tokens_seen": 1773404160 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046718022789279414, + "loss": 0.0677, + "theoretical_loss": 3.493793111023928, + "tokens_seen": 1773666304 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004670999839512117, + "loss": 0.0677, + "theoretical_loss": 3.4937493965227633, + "tokens_seen": 1773928448 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046701974000962925, + "loss": 0.0667, + "theoretical_loss": 3.4937056902895565, + "tokens_seen": 1774190592 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046693949606804687, + "loss": 0.0697, + "theoretical_loss": 3.4936619923215226, + "tokens_seen": 1774452736 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004668592521264645, + "loss": 0.0662, + "theoretical_loss": 3.493618302615878, + "tokens_seen": 1774714880 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046677900818488204, + "loss": 0.0686, + "theoretical_loss": 3.4935746211698393, + "tokens_seen": 1774977024 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046669876424329965, + "loss": 0.0682, + "theoretical_loss": 3.4935309479806262, + "tokens_seen": 1775239168 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046661852030171726, + "loss": 0.0717, + "theoretical_loss": 3.49348728304546, + "tokens_seen": 1775501312 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004665382763601348, + "loss": 0.0662, + "theoretical_loss": 3.493443626361561, + "tokens_seen": 1775763456 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.0011500284308567643, + "objective/train/docs_used": 648417, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3718308210372925, + "objective/train/original_loss": 1.371830701828003, + "objective/train/theoretical_loss": 3.4933999779261526, + "objective/train/tokens_used": 1796485600, + "objective/train/value_avg": -0.006534576416015625, + "objective/train/value_loss": 0.00019606102432589978, + "objective/train/value_max": -8.094310760498047e-05, + "objective/train/value_min": -0.62353515625, + "objective/train/value_reward_corr": 0.6778037876062109, + "objective/train/value_std": 0.0126953125, + "objective/train/weight_avg": 1.0012376308441162, + "objective/train/weighted_lm_loss": 1.37326979637146, + "objective/train/weights_max": 1.2040235996246338, + "objective/train/weights_min": 0.3703491687774658, + "theoretical_loss": 3.4933999779261526, + "tokens_seen": 1776025600 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004664580324185524, + "loss": 0.0694, + "theoretical_loss": 3.4933999779261526, + "tokens_seen": 1776025600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046637778847697, + "loss": 0.069, + "theoretical_loss": 3.4933563377364596, + "tokens_seen": 1776287744 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004662975445353876, + "loss": 0.0681, + "theoretical_loss": 3.493312705789708, + "tokens_seen": 1776549888 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046621730059380516, + "loss": 0.07, + "theoretical_loss": 3.493269082083123, + "tokens_seen": 1776812032 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004661370566522228, + "loss": 0.0708, + "theoretical_loss": 3.4932254666139357, + "tokens_seen": 1777074176 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004660568127106404, + "loss": 0.0649, + "theoretical_loss": 3.493181859379374, + "tokens_seen": 1777336320 + }, + { + "epoch": 0.54, + "learning_rate": 0.000465976568769058, + "loss": 0.0685, + "theoretical_loss": 3.4931382603766696, + "tokens_seen": 1777598464 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004658963248274755, + "loss": 0.0662, + "theoretical_loss": 3.493094669603055, + "tokens_seen": 1777860608 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004658160808858931, + "loss": 0.0685, + "theoretical_loss": 3.493051087055764, + "tokens_seen": 1778122752 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046573583694431073, + "loss": 0.071, + "theoretical_loss": 3.493007512732031, + "tokens_seen": 1778384896 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004656555930027283, + "loss": 0.0698, + "theoretical_loss": 3.4929639466290934, + "tokens_seen": 1778647040 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004655753490611459, + "loss": 0.0681, + "theoretical_loss": 3.492920388744188, + "tokens_seen": 1778909184 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004654951051195635, + "loss": 0.0695, + "theoretical_loss": 3.4928768390745555, + "tokens_seen": 1779171328 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.0007943502278067172, + "objective/train/docs_used": 649543, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4501819610595703, + "objective/train/original_loss": 1.4501819610595703, + "objective/train/theoretical_loss": 3.4928550673196033, + "objective/train/tokens_used": 1799762400, + "objective/train/value_avg": -0.00791168212890625, + "objective/train/value_loss": 0.0001940899674082175, + "objective/train/value_max": -5.1856040954589844e-05, + "objective/train/value_min": -0.533203125, + "objective/train/value_reward_corr": 0.7600946184960429, + "objective/train/value_std": 0.0164642333984375, + "objective/train/weight_avg": 1.0008825063705444, + "objective/train/weighted_lm_loss": 1.4511456489562988, + "objective/train/weights_max": 1.1041755676269531, + "objective/train/weights_min": 0.36851781606674194, + "theoretical_loss": 3.4928550673196033, + "tokens_seen": 1779302400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046541486117798107, + "loss": 0.0689, + "theoretical_loss": 3.4928332976174348, + "tokens_seen": 1779433472 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046533461723639863, + "loss": 0.0679, + "theoretical_loss": 3.492789764370068, + "tokens_seen": 1779695616 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046525437329481624, + "loss": 0.0694, + "theoretical_loss": 3.4927462393296986, + "tokens_seen": 1779957760 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046517412935323386, + "loss": 0.0721, + "theoretical_loss": 3.492702722493571, + "tokens_seen": 1780219904 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004650938854116514, + "loss": 0.0673, + "theoretical_loss": 3.4926592138589307, + "tokens_seen": 1780482048 + }, + { + "epoch": 0.54, + "learning_rate": 0.000465013641470069, + "loss": 0.0674, + "theoretical_loss": 3.4926157134230253, + "tokens_seen": 1780744192 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046493339752848664, + "loss": 0.07, + "theoretical_loss": 3.4925722211831025, + "tokens_seen": 1781006336 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004648531535869042, + "loss": 0.0687, + "theoretical_loss": 3.4925287371364124, + "tokens_seen": 1781268480 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046477290964532176, + "loss": 0.0661, + "theoretical_loss": 3.4924852612802066, + "tokens_seen": 1781530624 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046469266570373937, + "loss": 0.0691, + "theoretical_loss": 3.4924417936117376, + "tokens_seen": 1781792768 + }, + { + "epoch": 0.54, + "learning_rate": 0.000464612421762157, + "loss": 0.0705, + "theoretical_loss": 3.492398334128258, + "tokens_seen": 1782054912 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046453217782057454, + "loss": 0.0663, + "theoretical_loss": 3.492354882827023, + "tokens_seen": 1782317056 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.0007882177596911788, + "objective/train/docs_used": 650724, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5506643056869507, + "objective/train/original_loss": 1.5506644248962402, + "objective/train/theoretical_loss": 3.4923114397052903, + "objective/train/tokens_used": 1803039200, + "objective/train/value_avg": -0.00815582275390625, + "objective/train/value_loss": 0.00025313167134299874, + "objective/train/value_max": -0.0001323223114013672, + "objective/train/value_min": -0.2152099609375, + "objective/train/value_reward_corr": 0.6268371834840105, + "objective/train/value_std": 0.0126953125, + "objective/train/weight_avg": 1.000898003578186, + "objective/train/weighted_lm_loss": 1.551283597946167, + "objective/train/weights_max": 1.1620044708251953, + "objective/train/weights_min": 0.3706134557723999, + "theoretical_loss": 3.4923114397052903, + "tokens_seen": 1782579200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046445193387899215, + "loss": 0.0708, + "theoretical_loss": 3.4923114397052903, + "tokens_seen": 1782579200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046437168993740976, + "loss": 0.0692, + "theoretical_loss": 3.4922680047603167, + "tokens_seen": 1782841344 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004642914459958273, + "loss": 0.0717, + "theoretical_loss": 3.4922245779893615, + "tokens_seen": 1783103488 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046421120205424494, + "loss": 0.0726, + "theoretical_loss": 3.492181159389685, + "tokens_seen": 1783365632 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004641309581126625, + "loss": 0.0702, + "theoretical_loss": 3.4921377489585486, + "tokens_seen": 1783627776 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046405071417108005, + "loss": 0.0692, + "theoretical_loss": 3.4920943466932153, + "tokens_seen": 1783889920 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046397047022949767, + "loss": 0.0681, + "theoretical_loss": 3.49205095259095, + "tokens_seen": 1784152064 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004638902262879153, + "loss": 0.0685, + "theoretical_loss": 3.492007566649018, + "tokens_seen": 1784414208 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004638099823463329, + "loss": 0.0681, + "theoretical_loss": 3.491964188864686, + "tokens_seen": 1784676352 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046372973840475045, + "loss": 0.0728, + "theoretical_loss": 3.491920819235223, + "tokens_seen": 1784938496 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046364949446316806, + "loss": 0.0719, + "theoretical_loss": 3.491877457757898, + "tokens_seen": 1785200640 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004635692505215856, + "loss": 0.0682, + "theoretical_loss": 3.491834104429982, + "tokens_seen": 1785462784 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004634890065800032, + "loss": 0.0672, + "theoretical_loss": 3.491790759248747, + "tokens_seen": 1785724928 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.001205379725433886, + "objective/train/docs_used": 652000, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.570763111114502, + "objective/train/original_loss": 1.570763111114502, + "objective/train/theoretical_loss": 3.4917690897122826, + "objective/train/tokens_used": 1806316000, + "objective/train/value_avg": -0.00769805908203125, + "objective/train/value_loss": 0.0003391410573385656, + "objective/train/value_max": -4.988908767700195e-05, + "objective/train/value_min": -0.9736328125, + "objective/train/value_reward_corr": 0.8042611814725403, + "objective/train/value_std": 0.0225830078125, + "objective/train/weight_avg": 1.0013576745986938, + "objective/train/weighted_lm_loss": 1.5740770101547241, + "objective/train/weights_max": 1.6434879302978516, + "objective/train/weights_min": 0.38063594698905945, + "theoretical_loss": 3.4917690897122826, + "tokens_seen": 1785856000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004634087626384208, + "loss": 0.0706, + "theoretical_loss": 3.491747422211467, + "tokens_seen": 1785987072 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004633285186968384, + "loss": 0.0681, + "theoretical_loss": 3.491704093315416, + "tokens_seen": 1786249216 + }, + { + "epoch": 0.54, + "learning_rate": 0.000463248274755256, + "loss": 0.0693, + "theoretical_loss": 3.4916607725578714, + "tokens_seen": 1786511360 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004631680308136736, + "loss": 0.0688, + "theoretical_loss": 3.4916174599361103, + "tokens_seen": 1786773504 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004630877868720912, + "loss": 0.0723, + "theoretical_loss": 3.491574155447411, + "tokens_seen": 1787035648 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046300754293050875, + "loss": 0.0684, + "theoretical_loss": 3.491530859089054, + "tokens_seen": 1787297792 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004629272989889263, + "loss": 0.0669, + "theoretical_loss": 3.49148757085832, + "tokens_seen": 1787559936 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004628470550473439, + "loss": 0.0723, + "theoretical_loss": 3.4914442907524927, + "tokens_seen": 1787822080 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046276681110576153, + "loss": 0.0672, + "theoretical_loss": 3.4914010187688556, + "tokens_seen": 1788084224 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046268656716417914, + "loss": 0.0691, + "theoretical_loss": 3.4913577549046937, + "tokens_seen": 1788346368 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004626063232225967, + "loss": 0.0673, + "theoretical_loss": 3.491314499157294, + "tokens_seen": 1788608512 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004625260792810143, + "loss": 0.0682, + "theoretical_loss": 3.491271251523945, + "tokens_seen": 1788870656 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.0005922395503148437, + "objective/train/docs_used": 653179, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4670391082763672, + "objective/train/original_loss": 1.4670391082763672, + "objective/train/theoretical_loss": 3.491228012001935, + "objective/train/tokens_used": 1809592800, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.00044217510730959475, + "objective/train/value_max": -7.784366607666016e-05, + "objective/train/value_min": -0.64892578125, + "objective/train/value_reward_corr": 0.5852997173970007, + "objective/train/value_std": 0.0131988525390625, + "objective/train/weight_avg": 1.000777006149292, + "objective/train/weighted_lm_loss": 1.4677743911743164, + "objective/train/weights_max": 1.29947829246521, + "objective/train/weights_min": 0.36863452196121216, + "theoretical_loss": 3.491228012001935, + "tokens_seen": 1789132800 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004624458353394319, + "loss": 0.0687, + "theoretical_loss": 3.491228012001935, + "tokens_seen": 1789132800 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046236559139784943, + "loss": 0.0684, + "theoretical_loss": 3.4911847805885547, + "tokens_seen": 1789394944 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046228534745626704, + "loss": 0.0685, + "theoretical_loss": 3.491141557281096, + "tokens_seen": 1789657088 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046220510351468465, + "loss": 0.0728, + "theoretical_loss": 3.491098342076852, + "tokens_seen": 1789919232 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046212485957310227, + "loss": 0.0681, + "theoretical_loss": 3.4910551349731183, + "tokens_seen": 1790181376 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004620446156315198, + "loss": 0.0676, + "theoretical_loss": 3.4910119359671885, + "tokens_seen": 1790443520 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046196437168993744, + "loss": 0.0654, + "theoretical_loss": 3.490968745056361, + "tokens_seen": 1790705664 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046188412774835505, + "loss": 0.0685, + "theoretical_loss": 3.4909255622379343, + "tokens_seen": 1790967808 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046180388380677256, + "loss": 0.0687, + "theoretical_loss": 3.490882387509207, + "tokens_seen": 1791229952 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046172363986519017, + "loss": 0.07, + "theoretical_loss": 3.490839220867481, + "tokens_seen": 1791492096 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004616433959236078, + "loss": 0.0658, + "theoretical_loss": 3.490796062310058, + "tokens_seen": 1791754240 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046156315198202534, + "loss": 0.0681, + "theoretical_loss": 3.4907529118342415, + "tokens_seen": 1792016384 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046148290804044295, + "loss": 0.0677, + "theoretical_loss": 3.490709769437337, + "tokens_seen": 1792278528 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 0.0001855674636317417, + "objective/train/docs_used": 654198, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2086480855941772, + "objective/train/original_loss": 1.2086482048034668, + "objective/train/theoretical_loss": 3.4906882012676346, + "objective/train/tokens_used": 1812869600, + "objective/train/value_avg": -0.007686614990234375, + "objective/train/value_loss": 0.00019353711104486138, + "objective/train/value_max": -5.066394805908203e-05, + "objective/train/value_min": -0.783203125, + "objective/train/value_reward_corr": 0.6790004659897202, + "objective/train/value_std": 0.01259613037109375, + "objective/train/weight_avg": 1.0002752542495728, + "objective/train/weighted_lm_loss": 1.20932936668396, + "objective/train/weights_max": 1.4085056781768799, + "objective/train/weights_min": 0.4097484350204468, + "theoretical_loss": 3.4906882012676346, + "tokens_seen": 1792409600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046140266409886056, + "loss": 0.0655, + "theoretical_loss": 3.49066663511665, + "tokens_seen": 1792540672 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004613224201572782, + "loss": 0.0686, + "theoretical_loss": 3.4906235088694872, + "tokens_seen": 1792802816 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004612421762156957, + "loss": 0.0678, + "theoretical_loss": 3.4905803906931587, + "tokens_seen": 1793064960 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004611619322741133, + "loss": 0.0652, + "theoretical_loss": 3.4905372805849737, + "tokens_seen": 1793327104 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004610816883325309, + "loss": 0.0657, + "theoretical_loss": 3.490494178542243, + "tokens_seen": 1793589248 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046100144439094846, + "loss": 0.0661, + "theoretical_loss": 3.4904510845622805, + "tokens_seen": 1793851392 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004609212004493661, + "loss": 0.069, + "theoretical_loss": 3.490407998642399, + "tokens_seen": 1794113536 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004608409565077837, + "loss": 0.0692, + "theoretical_loss": 3.4903649207799137, + "tokens_seen": 1794375680 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004607607125662013, + "loss": 0.0691, + "theoretical_loss": 3.4903218509721414, + "tokens_seen": 1794637824 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046068046862461886, + "loss": 0.0674, + "theoretical_loss": 3.4902787892163993, + "tokens_seen": 1794899968 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004606002246830364, + "loss": 0.0684, + "theoretical_loss": 3.490235735510007, + "tokens_seen": 1795162112 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046051998074145403, + "loss": 0.0702, + "theoretical_loss": 3.490192689850284, + "tokens_seen": 1795424256 + }, + { + "epoch": 0.54, + "objective/train/advantage_avg": 6.26305063633481e-06, + "objective/train/docs_used": 655419, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3910480737686157, + "objective/train/original_loss": 1.3910481929779053, + "objective/train/theoretical_loss": 3.4901496522345528, + "objective/train/tokens_used": 1816146400, + "objective/train/value_avg": -0.00922393798828125, + "objective/train/value_loss": 0.00022351609368342906, + "objective/train/value_max": -4.756450653076172e-05, + "objective/train/value_min": -0.712890625, + "objective/train/value_reward_corr": 0.7772234877013644, + "objective/train/value_std": 0.01708984375, + "objective/train/weight_avg": 1.0001089572906494, + "objective/train/weighted_lm_loss": 1.390507698059082, + "objective/train/weights_max": 1.151555061340332, + "objective/train/weights_min": 0.38723233342170715, + "theoretical_loss": 3.4901496522345528, + "tokens_seen": 1795686400 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004604397367998716, + "loss": 0.0698, + "theoretical_loss": 3.4901496522345528, + "tokens_seen": 1795686400 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004603594928582892, + "loss": 0.0686, + "theoretical_loss": 3.490106622660136, + "tokens_seen": 1795948544 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004602792489167068, + "loss": 0.0693, + "theoretical_loss": 3.4900636011243567, + "tokens_seen": 1796210688 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046019900497512443, + "loss": 0.0705, + "theoretical_loss": 3.4900205876245414, + "tokens_seen": 1796472832 + }, + { + "epoch": 0.54, + "learning_rate": 0.000460118761033542, + "loss": 0.0705, + "theoretical_loss": 3.4899775821580166, + "tokens_seen": 1796734976 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046003851709195954, + "loss": 0.0689, + "theoretical_loss": 3.4899345847221097, + "tokens_seen": 1796997120 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045995827315037716, + "loss": 0.0665, + "theoretical_loss": 3.4898915953141505, + "tokens_seen": 1797259264 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004598780292087947, + "loss": 0.0678, + "theoretical_loss": 3.4898486139314695, + "tokens_seen": 1797521408 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045979778526721233, + "loss": 0.07, + "theoretical_loss": 3.489805640571398, + "tokens_seen": 1797783552 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045971754132562994, + "loss": 0.0664, + "theoretical_loss": 3.48976267523127, + "tokens_seen": 1798045696 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004596372973840475, + "loss": 0.068, + "theoretical_loss": 3.4897197179084185, + "tokens_seen": 1798307840 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004595570534424651, + "loss": 0.0701, + "theoretical_loss": 3.48967676860018, + "tokens_seen": 1798569984 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004594768095008827, + "loss": 0.0705, + "theoretical_loss": 3.4896338273038916, + "tokens_seen": 1798832128 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0007114228792488575, + "objective/train/docs_used": 656735, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3487783670425415, + "objective/train/original_loss": 1.348778247833252, + "objective/train/theoretical_loss": 3.4896123596593966, + "objective/train/tokens_used": 1819423200, + "objective/train/value_avg": -0.0080108642578125, + "objective/train/value_loss": 0.00022283625730779022, + "objective/train/value_max": -7.545948028564453e-05, + "objective/train/value_min": -0.61279296875, + "objective/train/value_reward_corr": 0.7115171592647844, + "objective/train/value_std": 0.014739990234375, + "objective/train/weight_avg": 1.0008171796798706, + "objective/train/weighted_lm_loss": 1.3492664098739624, + "objective/train/weights_max": 1.3917560577392578, + "objective/train/weights_min": 0.5280845165252686, + "theoretical_loss": 3.4896123596593966, + "tokens_seen": 1798963200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004593965655593003, + "loss": 0.0665, + "theoretical_loss": 3.4895908940168905, + "tokens_seen": 1799094272 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045931632161771784, + "loss": 0.0699, + "theoretical_loss": 3.489547968736517, + "tokens_seen": 1799356416 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045923607767613545, + "loss": 0.07, + "theoretical_loss": 3.4895050514601116, + "tokens_seen": 1799618560 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045915583373455307, + "loss": 0.0686, + "theoretical_loss": 3.4894621421850163, + "tokens_seen": 1799880704 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004590755897929706, + "loss": 0.0689, + "theoretical_loss": 3.489419240908574, + "tokens_seen": 1800142848 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045899534585138824, + "loss": 0.0706, + "theoretical_loss": 3.48937634762813, + "tokens_seen": 1800404992 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045891510190980585, + "loss": 0.0674, + "theoretical_loss": 3.489333462341029, + "tokens_seen": 1800667136 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004588348579682234, + "loss": 0.0685, + "theoretical_loss": 3.4892905850446185, + "tokens_seen": 1800929280 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045875461402664097, + "loss": 0.0675, + "theoretical_loss": 3.489247715736247, + "tokens_seen": 1801191424 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004586743700850586, + "loss": 0.0695, + "theoretical_loss": 3.489204854413264, + "tokens_seen": 1801453568 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004585941261434762, + "loss": 0.066, + "theoretical_loss": 3.48916200107302, + "tokens_seen": 1801715712 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045851388220189375, + "loss": 0.0666, + "theoretical_loss": 3.489119155712868, + "tokens_seen": 1801977856 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": -0.0004979989607818425, + "objective/train/docs_used": 657910, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4576196670532227, + "objective/train/original_loss": 1.4576196670532227, + "objective/train/theoretical_loss": 3.4890763183301607, + "objective/train/tokens_used": 1822700000, + "objective/train/value_avg": -0.01110076904296875, + "objective/train/value_loss": 0.000849848089274019, + "objective/train/value_max": -4.756450653076172e-05, + "objective/train/value_min": -0.74853515625, + "objective/train/value_reward_corr": 0.656073592097922, + "objective/train/value_std": 0.0252685546875, + "objective/train/weight_avg": 0.9998723268508911, + "objective/train/weighted_lm_loss": 1.4578678607940674, + "objective/train/weights_max": 2.0341408252716064, + "objective/train/weights_min": 0.37089207768440247, + "theoretical_loss": 3.4890763183301607, + "tokens_seen": 1802240000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045843363826031136, + "loss": 0.0681, + "theoretical_loss": 3.4890763183301607, + "tokens_seen": 1802240000 + }, + { + "epoch": 0.55, + "learning_rate": 0.000458353394318729, + "loss": 0.0693, + "theoretical_loss": 3.489033488922253, + "tokens_seen": 1802502144 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045827315037714653, + "loss": 0.0673, + "theoretical_loss": 3.4889906674865, + "tokens_seen": 1802764288 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004581929064355641, + "loss": 0.0719, + "theoretical_loss": 3.48894785402026, + "tokens_seen": 1803026432 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004581126624939817, + "loss": 0.0676, + "theoretical_loss": 3.4889050485208912, + "tokens_seen": 1803288576 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004580324185523993, + "loss": 0.0681, + "theoretical_loss": 3.4888622509857523, + "tokens_seen": 1803550720 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004579521746108169, + "loss": 0.0653, + "theoretical_loss": 3.488819461412205, + "tokens_seen": 1803812864 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004578719306692345, + "loss": 0.0706, + "theoretical_loss": 3.4887766797976116, + "tokens_seen": 1804075008 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004577916867276521, + "loss": 0.0704, + "theoretical_loss": 3.488733906139336, + "tokens_seen": 1804337152 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004577114427860697, + "loss": 0.0709, + "theoretical_loss": 3.4886911404347414, + "tokens_seen": 1804599296 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004576311988444872, + "loss": 0.0676, + "theoretical_loss": 3.4886483826811947, + "tokens_seen": 1804861440 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045755095490290483, + "loss": 0.0675, + "theoretical_loss": 3.4886056328760633, + "tokens_seen": 1805123584 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045747071096132244, + "loss": 0.0696, + "theoretical_loss": 3.4885628910167155, + "tokens_seen": 1805385728 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": -0.00019410904496908188, + "objective/train/docs_used": 658936, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.221300482749939, + "objective/train/original_loss": 1.2213003635406494, + "objective/train/theoretical_loss": 3.4885415230658885, + "objective/train/tokens_used": 1825976800, + "objective/train/value_avg": -0.008758544921875, + "objective/train/value_loss": 0.0004874709411524236, + "objective/train/value_max": -2.8431415557861328e-05, + "objective/train/value_min": -0.734375, + "objective/train/value_reward_corr": 0.7181355256849492, + "objective/train/value_std": 0.0221710205078125, + "objective/train/weight_avg": 1.0000203847885132, + "objective/train/weighted_lm_loss": 1.220677137374878, + "objective/train/weights_max": 1.7585545778274536, + "objective/train/weights_min": 0.37890908122062683, + "theoretical_loss": 3.4885415230658885, + "tokens_seen": 1805516800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045739046701974, + "loss": 0.0703, + "theoretical_loss": 3.488520157100521, + "tokens_seen": 1805647872 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004573102230781576, + "loss": 0.0703, + "theoretical_loss": 3.4884774311248505, + "tokens_seen": 1805910016 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045722997913657523, + "loss": 0.0676, + "theoretical_loss": 3.4884347130870768, + "tokens_seen": 1806172160 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004571497351949928, + "loss": 0.0662, + "theoretical_loss": 3.4883920029845727, + "tokens_seen": 1806434304 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045706949125341034, + "loss": 0.0689, + "theoretical_loss": 3.488349300814713, + "tokens_seen": 1806696448 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045698924731182796, + "loss": 0.0691, + "theoretical_loss": 3.4883066065748745, + "tokens_seen": 1806958592 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045690900337024557, + "loss": 0.0706, + "theoretical_loss": 3.488263920262434, + "tokens_seen": 1807220736 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045682875942866313, + "loss": 0.0686, + "theoretical_loss": 3.4882212418747693, + "tokens_seen": 1807482880 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045674851548708074, + "loss": 0.0685, + "theoretical_loss": 3.4881785714092617, + "tokens_seen": 1807745024 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045666827154549835, + "loss": 0.0703, + "theoretical_loss": 3.48813590886329, + "tokens_seen": 1808007168 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004565880276039159, + "loss": 0.0687, + "theoretical_loss": 3.488093254234238, + "tokens_seen": 1808269312 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045650778366233347, + "loss": 0.0683, + "theoretical_loss": 3.488050607519489, + "tokens_seen": 1808531456 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0005030391039326787, + "objective/train/docs_used": 660069, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3549247980117798, + "objective/train/original_loss": 1.3549246788024902, + "objective/train/theoretical_loss": 3.4880079687164267, + "objective/train/tokens_used": 1829253600, + "objective/train/value_avg": -0.0085906982421875, + "objective/train/value_loss": 0.0005426432471722364, + "objective/train/value_max": -3.88026237487793e-05, + "objective/train/value_min": -0.63037109375, + "objective/train/value_reward_corr": 0.6518784997929621, + "objective/train/value_std": 0.019317626953125, + "objective/train/weight_avg": 1.0007175207138062, + "objective/train/weighted_lm_loss": 1.355029821395874, + "objective/train/weights_max": 1.2896806001663208, + "objective/train/weights_min": 0.24797950685024261, + "theoretical_loss": 3.4880079687164267, + "tokens_seen": 1808793600 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004564275397207511, + "loss": 0.0699, + "theoretical_loss": 3.4880079687164267, + "tokens_seen": 1808793600 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004563472957791687, + "loss": 0.0677, + "theoretical_loss": 3.4879653378224384, + "tokens_seen": 1809055744 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045626705183758625, + "loss": 0.0695, + "theoretical_loss": 3.4879227148349106, + "tokens_seen": 1809317888 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045618680789600387, + "loss": 0.0664, + "theoretical_loss": 3.487880099751232, + "tokens_seen": 1809580032 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004561065639544215, + "loss": 0.0645, + "theoretical_loss": 3.487837492568792, + "tokens_seen": 1809842176 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045602632001283904, + "loss": 0.0697, + "theoretical_loss": 3.487794893284981, + "tokens_seen": 1810104320 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045594607607125665, + "loss": 0.0685, + "theoretical_loss": 3.487752301897192, + "tokens_seen": 1810366464 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004558658321296742, + "loss": 0.0701, + "theoretical_loss": 3.487709718402818, + "tokens_seen": 1810628608 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045578558818809177, + "loss": 0.0678, + "theoretical_loss": 3.4876671427992543, + "tokens_seen": 1810890752 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004557053442465094, + "loss": 0.0671, + "theoretical_loss": 3.4876245750838955, + "tokens_seen": 1811152896 + }, + { + "epoch": 0.55, + "learning_rate": 0.000455625100304927, + "loss": 0.0692, + "theoretical_loss": 3.48758201525414, + "tokens_seen": 1811415040 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004555448563633446, + "loss": 0.0668, + "theoretical_loss": 3.487539463307385, + "tokens_seen": 1811677184 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045546461242176216, + "loss": 0.068, + "theoretical_loss": 3.4874969192410306, + "tokens_seen": 1811939328 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0007302032900042832, + "objective/train/docs_used": 661289, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.294402837753296, + "objective/train/original_loss": 1.2944025993347168, + "objective/train/theoretical_loss": 3.4874756501621915, + "objective/train/tokens_used": 1832530400, + "objective/train/value_avg": -0.01059722900390625, + "objective/train/value_loss": 0.0002705962979234755, + "objective/train/value_max": -3.045797348022461e-05, + "objective/train/value_min": -0.693359375, + "objective/train/value_reward_corr": 0.7348594278962314, + "objective/train/value_std": 0.018096923828125, + "objective/train/weight_avg": 1.0008525848388672, + "objective/train/weighted_lm_loss": 1.2947546243667603, + "objective/train/weights_max": 1.1715208292007446, + "objective/train/weights_min": 0.3793371617794037, + "theoretical_loss": 3.4874756501621915, + "tokens_seen": 1812070400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004553843684801798, + "loss": 0.0674, + "theoretical_loss": 3.4874543830524782, + "tokens_seen": 1812201472 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045530412453859733, + "loss": 0.0693, + "theoretical_loss": 3.487411854739128, + "tokens_seen": 1812463616 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004552238805970149, + "loss": 0.0693, + "theoretical_loss": 3.487369334298386, + "tokens_seen": 1812725760 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004551436366554325, + "loss": 0.0674, + "theoretical_loss": 3.4873268217276543, + "tokens_seen": 1812987904 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004550633927138501, + "loss": 0.0697, + "theoretical_loss": 3.48728431702434, + "tokens_seen": 1813250048 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045498314877226773, + "loss": 0.0671, + "theoretical_loss": 3.4872418201858495, + "tokens_seen": 1813512192 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004549029048306853, + "loss": 0.069, + "theoretical_loss": 3.487199331209591, + "tokens_seen": 1813774336 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004548226608891029, + "loss": 0.0709, + "theoretical_loss": 3.487156850092974, + "tokens_seen": 1814036480 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004547424169475205, + "loss": 0.0717, + "theoretical_loss": 3.487114376833409, + "tokens_seen": 1814298624 + }, + { + "epoch": 0.55, + "learning_rate": 0.000454662173005938, + "loss": 0.0678, + "theoretical_loss": 3.487071911428308, + "tokens_seen": 1814560768 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045458192906435563, + "loss": 0.066, + "theoretical_loss": 3.487029453875085, + "tokens_seen": 1814822912 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045450168512277324, + "loss": 0.0706, + "theoretical_loss": 3.4869870041711524, + "tokens_seen": 1815085056 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0023117726668715477, + "objective/train/docs_used": 662349, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2869470119476318, + "objective/train/original_loss": 1.2869467735290527, + "objective/train/theoretical_loss": 3.4869445623139272, + "objective/train/tokens_used": 1835807200, + "objective/train/value_avg": -0.006900787353515625, + "objective/train/value_loss": 0.00016944836534094065, + "objective/train/value_max": -2.7120113372802734e-05, + "objective/train/value_min": -0.77392578125, + "objective/train/value_reward_corr": 0.7259664757718066, + "objective/train/value_std": 0.01531982421875, + "objective/train/weight_avg": 1.0023906230926514, + "objective/train/weighted_lm_loss": 1.2890067100524902, + "objective/train/weights_max": 1.332244634628296, + "objective/train/weights_min": 0.37565645575523376, + "theoretical_loss": 3.4869445623139272, + "tokens_seen": 1815347200 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045442144118119086, + "loss": 0.0685, + "theoretical_loss": 3.4869445623139272, + "tokens_seen": 1815347200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004543411972396084, + "loss": 0.0716, + "theoretical_loss": 3.4869021283008257, + "tokens_seen": 1815609344 + }, + { + "epoch": 0.55, + "learning_rate": 0.000454260953298026, + "loss": 0.0669, + "theoretical_loss": 3.4868597021292658, + "tokens_seen": 1815871488 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045418070935644364, + "loss": 0.0711, + "theoretical_loss": 3.4868172837966673, + "tokens_seen": 1816133632 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045410046541486114, + "loss": 0.0664, + "theoretical_loss": 3.48677487330045, + "tokens_seen": 1816395776 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045402022147327876, + "loss": 0.0682, + "theoretical_loss": 3.486732470638036, + "tokens_seen": 1816657920 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045393997753169637, + "loss": 0.0685, + "theoretical_loss": 3.4866900758068478, + "tokens_seen": 1816920064 + }, + { + "epoch": 0.55, + "learning_rate": 0.000453859733590114, + "loss": 0.0674, + "theoretical_loss": 3.4866476888043096, + "tokens_seen": 1817182208 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045377948964853154, + "loss": 0.0681, + "theoretical_loss": 3.486605309627847, + "tokens_seen": 1817444352 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045369924570694915, + "loss": 0.0693, + "theoretical_loss": 3.4865629382748864, + "tokens_seen": 1817706496 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045361900176536676, + "loss": 0.0699, + "theoretical_loss": 3.486520574742855, + "tokens_seen": 1817968640 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045353875782378427, + "loss": 0.0667, + "theoretical_loss": 3.486478219029183, + "tokens_seen": 1818230784 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004534585138822019, + "loss": 0.0698, + "theoretical_loss": 3.4864358711312997, + "tokens_seen": 1818492928 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0007954819593578577, + "objective/train/docs_used": 663450, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3562122583389282, + "objective/train/original_loss": 1.3562121391296387, + "objective/train/theoretical_loss": 3.486414700112476, + "objective/train/tokens_used": 1839084000, + "objective/train/value_avg": -0.00738525390625, + "objective/train/value_loss": 0.0002610041992738843, + "objective/train/value_max": -5.650520324707031e-05, + "objective/train/value_min": -0.3369140625, + "objective/train/value_reward_corr": 0.6166013932809289, + "objective/train/value_std": 0.0124969482421875, + "objective/train/weight_avg": 1.0009105205535889, + "objective/train/weighted_lm_loss": 1.3572161197662354, + "objective/train/weights_max": 1.1161935329437256, + "objective/train/weights_min": 0.36863452196121216, + "theoretical_loss": 3.486414700112476, + "tokens_seen": 1818624000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004533782699406195, + "loss": 0.0682, + "theoretical_loss": 3.4863935310466365, + "tokens_seen": 1818755072 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045329802599903705, + "loss": 0.069, + "theoretical_loss": 3.486351198772626, + "tokens_seen": 1819017216 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045321778205745467, + "loss": 0.068, + "theoretical_loss": 3.4863088743067023, + "tokens_seen": 1819279360 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004531375381158723, + "loss": 0.0707, + "theoretical_loss": 3.4862665576463003, + "tokens_seen": 1819541504 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004530572941742899, + "loss": 0.0677, + "theoretical_loss": 3.4862242487888566, + "tokens_seen": 1819803648 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045297705023270745, + "loss": 0.068, + "theoretical_loss": 3.486181947731808, + "tokens_seen": 1820065792 + }, + { + "epoch": 0.55, + "learning_rate": 0.000452896806291125, + "loss": 0.0688, + "theoretical_loss": 3.486139654472594, + "tokens_seen": 1820327936 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004528165623495426, + "loss": 0.0681, + "theoretical_loss": 3.486097369008654, + "tokens_seen": 1820590080 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004527363184079602, + "loss": 0.0702, + "theoretical_loss": 3.4860550913374286, + "tokens_seen": 1820852224 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004526560744663778, + "loss": 0.0708, + "theoretical_loss": 3.4860128214563613, + "tokens_seen": 1821114368 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004525758305247954, + "loss": 0.071, + "theoretical_loss": 3.4859705593628947, + "tokens_seen": 1821376512 + }, + { + "epoch": 0.55, + "learning_rate": 0.000452495586583213, + "loss": 0.0679, + "theoretical_loss": 3.4859283050544736, + "tokens_seen": 1821638656 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": -0.000316399586154148, + "objective/train/docs_used": 664649, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.22597074508667, + "objective/train/original_loss": 1.22597074508667, + "objective/train/theoretical_loss": 3.4858860585285445, + "objective/train/tokens_used": 1842360800, + "objective/train/value_avg": -0.00605010986328125, + "objective/train/value_loss": 0.00014420539082493633, + "objective/train/value_max": -3.916025161743164e-05, + "objective/train/value_min": -0.37353515625, + "objective/train/value_reward_corr": 0.7083982900799979, + "objective/train/value_std": 0.012542724609375, + "objective/train/weight_avg": 0.9997512102127075, + "objective/train/weighted_lm_loss": 1.224969506263733, + "objective/train/weights_max": 1.3325648307800293, + "objective/train/weights_min": 0.37342190742492676, + "theoretical_loss": 3.4858860585285445, + "tokens_seen": 1821900800 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004524153426416306, + "loss": 0.0679, + "theoretical_loss": 3.4858860585285445, + "tokens_seen": 1821900800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045233509870004813, + "loss": 0.0697, + "theoretical_loss": 3.485843819782554, + "tokens_seen": 1822162944 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045225485475846575, + "loss": 0.0719, + "theoretical_loss": 3.48580158881395, + "tokens_seen": 1822425088 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004521746108168833, + "loss": 0.0692, + "theoretical_loss": 3.4857593656201833, + "tokens_seen": 1822687232 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004520943668753009, + "loss": 0.0701, + "theoretical_loss": 3.4857171501987034, + "tokens_seen": 1822949376 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045201412293371853, + "loss": 0.067, + "theoretical_loss": 3.4856749425469635, + "tokens_seen": 1823211520 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045193387899213614, + "loss": 0.0739, + "theoretical_loss": 3.485632742662416, + "tokens_seen": 1823473664 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004518536350505537, + "loss": 0.0699, + "theoretical_loss": 3.4855905505425144, + "tokens_seen": 1823735808 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045177339110897126, + "loss": 0.0688, + "theoretical_loss": 3.485548366184716, + "tokens_seen": 1823997952 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045169314716738887, + "loss": 0.0711, + "theoretical_loss": 3.4855061895864763, + "tokens_seen": 1824260096 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045161290322580643, + "loss": 0.0692, + "theoretical_loss": 3.485464020745253, + "tokens_seen": 1824522240 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045153265928422404, + "loss": 0.0668, + "theoretical_loss": 3.4854218596585067, + "tokens_seen": 1824784384 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045145241534264165, + "loss": 0.0676, + "theoretical_loss": 3.485379706323697, + "tokens_seen": 1825046528 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0007323589525185525, + "objective/train/docs_used": 665640, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.218908667564392, + "objective/train/original_loss": 1.2189085483551025, + "objective/train/theoretical_loss": 3.4853586325624746, + "objective/train/tokens_used": 1845637600, + "objective/train/value_avg": -0.00598907470703125, + "objective/train/value_loss": 0.0001442520588170737, + "objective/train/value_max": -4.57763671875e-05, + "objective/train/value_min": -0.74755859375, + "objective/train/value_reward_corr": 0.608078860093181, + "objective/train/value_std": 0.01049041748046875, + "objective/train/weight_avg": 1.000801682472229, + "objective/train/weighted_lm_loss": 1.2201964855194092, + "objective/train/weights_max": 2.111837863922119, + "objective/train/weights_min": 0.38332873582839966, + "theoretical_loss": 3.4853586325624746, + "tokens_seen": 1825177600 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004513721714010592, + "loss": 0.0691, + "theoretical_loss": 3.4853375607382846, + "tokens_seen": 1825308672 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004512919274594768, + "loss": 0.0689, + "theoretical_loss": 3.4852954228997337, + "tokens_seen": 1825570816 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045121168351789444, + "loss": 0.0689, + "theoretical_loss": 3.485253292805507, + "tokens_seen": 1825832960 + }, + { + "epoch": 0.55, + "learning_rate": 0.000451131439576312, + "loss": 0.0703, + "theoretical_loss": 3.48521117045307, + "tokens_seen": 1826095104 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045105119563472956, + "loss": 0.0707, + "theoretical_loss": 3.4851690558398896, + "tokens_seen": 1826357248 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045097095169314717, + "loss": 0.0669, + "theoretical_loss": 3.4851269489634324, + "tokens_seen": 1826619392 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004508907077515648, + "loss": 0.0662, + "theoretical_loss": 3.4850848498211677, + "tokens_seen": 1826881536 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045081046380998234, + "loss": 0.0669, + "theoretical_loss": 3.4850427584105654, + "tokens_seen": 1827143680 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045073021986839995, + "loss": 0.0678, + "theoretical_loss": 3.4850006747290965, + "tokens_seen": 1827405824 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045064997592681756, + "loss": 0.0707, + "theoretical_loss": 3.4849585987742326, + "tokens_seen": 1827667968 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004505697319852351, + "loss": 0.0714, + "theoretical_loss": 3.4849165305434484, + "tokens_seen": 1827930112 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004504894880436527, + "loss": 0.0668, + "theoretical_loss": 3.4848744700342174, + "tokens_seen": 1828192256 + }, + { + "epoch": 0.55, + "objective/train/advantage_avg": 0.0006137097370810807, + "objective/train/docs_used": 666768, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2766127586364746, + "objective/train/original_loss": 1.276612639427185, + "objective/train/theoretical_loss": 3.484832417244016, + "objective/train/tokens_used": 1848914400, + "objective/train/value_avg": -0.0087432861328125, + "objective/train/value_loss": 0.0001864040532382205, + "objective/train/value_max": -8.887052536010742e-05, + "objective/train/value_min": -0.80712890625, + "objective/train/value_reward_corr": 0.7289446558793178, + "objective/train/value_std": 0.01708984375, + "objective/train/weight_avg": 1.0007046461105347, + "objective/train/weighted_lm_loss": 1.2769469022750854, + "objective/train/weights_max": 2.075507879257202, + "objective/train/weights_min": 0.3701034188270569, + "theoretical_loss": 3.484832417244016, + "tokens_seen": 1828454400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004504092441020703, + "loss": 0.0698, + "theoretical_loss": 3.484832417244016, + "tokens_seen": 1828454400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004503290001604879, + "loss": 0.0682, + "theoretical_loss": 3.484790372170321, + "tokens_seen": 1828716544 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045024875621890546, + "loss": 0.0669, + "theoretical_loss": 3.484748334810611, + "tokens_seen": 1828978688 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004501685122773231, + "loss": 0.0678, + "theoretical_loss": 3.484706305162365, + "tokens_seen": 1829240832 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004500882683357407, + "loss": 0.0672, + "theoretical_loss": 3.484664283223064, + "tokens_seen": 1829502976 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004500080243941583, + "loss": 0.0692, + "theoretical_loss": 3.484622268990189, + "tokens_seen": 1829765120 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004499277804525758, + "loss": 0.0692, + "theoretical_loss": 3.4845802624612237, + "tokens_seen": 1830027264 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004498475365109934, + "loss": 0.0677, + "theoretical_loss": 3.484538263633652, + "tokens_seen": 1830289408 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044976729256941103, + "loss": 0.0694, + "theoretical_loss": 3.484496272504959, + "tokens_seen": 1830551552 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004496870486278286, + "loss": 0.0677, + "theoretical_loss": 3.484454289072631, + "tokens_seen": 1830813696 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004496068046862462, + "loss": 0.0693, + "theoretical_loss": 3.4844123133341567, + "tokens_seen": 1831075840 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004495265607446638, + "loss": 0.0673, + "theoretical_loss": 3.484370345287024, + "tokens_seen": 1831337984 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044944631680308143, + "loss": 0.0666, + "theoretical_loss": 3.484328384928723, + "tokens_seen": 1831600128 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.0005676686996594071, + "objective/train/docs_used": 668070, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.306203007698059, + "objective/train/original_loss": 1.3062028884887695, + "objective/train/theoretical_loss": 3.484307407632101, + "objective/train/tokens_used": 1852191200, + "objective/train/value_avg": -0.00788116455078125, + "objective/train/value_loss": 0.00015821759006939828, + "objective/train/value_max": -5.8770179748535156e-05, + "objective/train/value_min": -0.2489013671875, + "objective/train/value_reward_corr": 0.7492681530981704, + "objective/train/value_std": 0.0144500732421875, + "objective/train/weight_avg": 1.0006389617919922, + "objective/train/weighted_lm_loss": 1.307263731956482, + "objective/train/weights_max": 1.1343940496444702, + "objective/train/weights_min": 0.3684152066707611, + "theoretical_loss": 3.484307407632101, + "tokens_seen": 1831731200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044936607286149893, + "loss": 0.0674, + "theoretical_loss": 3.484286432256745, + "tokens_seen": 1831862272 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044928582891991654, + "loss": 0.0686, + "theoretical_loss": 3.484244487268583, + "tokens_seen": 1832124416 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044920558497833416, + "loss": 0.0689, + "theoretical_loss": 3.4842025499617297, + "tokens_seen": 1832386560 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004491253410367517, + "loss": 0.0716, + "theoretical_loss": 3.4841606203336806, + "tokens_seen": 1832648704 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044904509709516933, + "loss": 0.0665, + "theoretical_loss": 3.4841186983819306, + "tokens_seen": 1832910848 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044896485315358694, + "loss": 0.0681, + "theoretical_loss": 3.4840767841039777, + "tokens_seen": 1833172992 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004488846092120045, + "loss": 0.0696, + "theoretical_loss": 3.48403487749732, + "tokens_seen": 1833435136 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044880436527042206, + "loss": 0.0707, + "theoretical_loss": 3.4839929785594563, + "tokens_seen": 1833697280 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044872412132883967, + "loss": 0.0678, + "theoretical_loss": 3.4839510872878883, + "tokens_seen": 1833959424 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004486438773872573, + "loss": 0.0674, + "theoretical_loss": 3.483909203680117, + "tokens_seen": 1834221568 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044856363344567484, + "loss": 0.0696, + "theoretical_loss": 3.483867327733645, + "tokens_seen": 1834483712 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044848338950409245, + "loss": 0.0675, + "theoretical_loss": 3.4838254594459777, + "tokens_seen": 1834745856 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": -0.00016138824867084622, + "objective/train/docs_used": 669245, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.327626347541809, + "objective/train/original_loss": 1.3276262283325195, + "objective/train/theoretical_loss": 3.483783598814619, + "objective/train/tokens_used": 1855468000, + "objective/train/value_avg": -0.00666046142578125, + "objective/train/value_loss": 0.0002919305698014796, + "objective/train/value_max": -4.166364669799805e-05, + "objective/train/value_min": -0.806640625, + "objective/train/value_reward_corr": 0.6681307867211738, + "objective/train/value_std": 0.016143798828125, + "objective/train/weight_avg": 0.9999749064445496, + "objective/train/weighted_lm_loss": 1.327237606048584, + "objective/train/weights_max": 2.2403690814971924, + "objective/train/weights_min": 0.3757309913635254, + "theoretical_loss": 3.483783598814619, + "tokens_seen": 1835008000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044840314556251007, + "loss": 0.0686, + "theoretical_loss": 3.483783598814619, + "tokens_seen": 1835008000 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004483229016209276, + "loss": 0.0689, + "theoretical_loss": 3.4837417458370767, + "tokens_seen": 1835270144 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044824265767934524, + "loss": 0.0671, + "theoretical_loss": 3.483699900510857, + "tokens_seen": 1835532288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004481624137377628, + "loss": 0.0692, + "theoretical_loss": 3.4836580628334697, + "tokens_seen": 1835794432 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004480821697961804, + "loss": 0.0719, + "theoretical_loss": 3.4836162328024245, + "tokens_seen": 1836056576 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044800192585459797, + "loss": 0.0684, + "theoretical_loss": 3.4835744104152324, + "tokens_seen": 1836318720 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004479216819130156, + "loss": 0.0697, + "theoretical_loss": 3.483532595669406, + "tokens_seen": 1836580864 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004478414379714332, + "loss": 0.0652, + "theoretical_loss": 3.4834907885624586, + "tokens_seen": 1836843008 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044776119402985075, + "loss": 0.0658, + "theoretical_loss": 3.4834489890919045, + "tokens_seen": 1837105152 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044768095008826836, + "loss": 0.0675, + "theoretical_loss": 3.48340719725526, + "tokens_seen": 1837367296 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004476007061466859, + "loss": 0.0696, + "theoretical_loss": 3.4833654130500413, + "tokens_seen": 1837629440 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004475204622051035, + "loss": 0.0665, + "theoretical_loss": 3.4833236364737674, + "tokens_seen": 1837891584 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004474402182635211, + "loss": 0.0678, + "theoretical_loss": 3.4832818675239574, + "tokens_seen": 1838153728 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.00036707372055388987, + "objective/train/docs_used": 670495, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.408747673034668, + "objective/train/original_loss": 1.408747673034668, + "objective/train/theoretical_loss": 3.483260985908201, + "objective/train/tokens_used": 1858744800, + "objective/train/value_avg": -0.00868988037109375, + "objective/train/value_loss": 0.0002044513530563563, + "objective/train/value_max": -6.109476089477539e-05, + "objective/train/value_min": -0.31201171875, + "objective/train/value_reward_corr": 0.7900703831940288, + "objective/train/value_std": 0.0180511474609375, + "objective/train/weight_avg": 1.0004642009735107, + "objective/train/weighted_lm_loss": 1.4089860916137695, + "objective/train/weights_max": 1.1311835050582886, + "objective/train/weights_min": 0.37661492824554443, + "theoretical_loss": 3.483260985908201, + "tokens_seen": 1838284800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004473599743219387, + "loss": 0.0687, + "theoretical_loss": 3.483240106198131, + "tokens_seen": 1838415872 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004472797303803563, + "loss": 0.0709, + "theoretical_loss": 3.483198352493811, + "tokens_seen": 1838678016 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004471994864387739, + "loss": 0.0718, + "theoretical_loss": 3.4831566064085187, + "tokens_seen": 1838940160 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004471192424971915, + "loss": 0.0679, + "theoretical_loss": 3.483114867939779, + "tokens_seen": 1839202304 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044703899855560905, + "loss": 0.0676, + "theoretical_loss": 3.4830731370851167, + "tokens_seen": 1839464448 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004469587546140266, + "loss": 0.0675, + "theoretical_loss": 3.483031413842058, + "tokens_seen": 1839726592 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004468785106724442, + "loss": 0.07, + "theoretical_loss": 3.4829896982081303, + "tokens_seen": 1839988736 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044679826673086183, + "loss": 0.07, + "theoretical_loss": 3.4829479901808624, + "tokens_seen": 1840250880 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044671802278927944, + "loss": 0.0669, + "theoretical_loss": 3.482906289757784, + "tokens_seen": 1840513024 + }, + { + "epoch": 0.56, + "learning_rate": 0.000446637778847697, + "loss": 0.0699, + "theoretical_loss": 3.482864596936425, + "tokens_seen": 1840775168 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004465575349061146, + "loss": 0.0684, + "theoretical_loss": 3.482822911714318, + "tokens_seen": 1841037312 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044647729096453223, + "loss": 0.0688, + "theoretical_loss": 3.4827812340889963, + "tokens_seen": 1841299456 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.00044453368172980845, + "objective/train/docs_used": 671743, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.554997444152832, + "objective/train/original_loss": 1.554997205734253, + "objective/train/theoretical_loss": 3.4827395640579946, + "objective/train/tokens_used": 1862021600, + "objective/train/value_avg": -0.0125579833984375, + "objective/train/value_loss": 0.0002647726214490831, + "objective/train/value_max": -6.604194641113281e-05, + "objective/train/value_min": -0.376953125, + "objective/train/value_reward_corr": 0.9078510782178396, + "objective/train/value_std": 0.03216552734375, + "objective/train/weight_avg": 1.00057053565979, + "objective/train/weighted_lm_loss": 1.5566171407699585, + "objective/train/weights_max": 1.1502026319503784, + "objective/train/weights_min": 0.38612306118011475, + "theoretical_loss": 3.4827395640579946, + "tokens_seen": 1841561600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044639704702294973, + "loss": 0.069, + "theoretical_loss": 3.4827395640579946, + "tokens_seen": 1841561600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044631680308136734, + "loss": 0.0655, + "theoretical_loss": 3.4826979016188475, + "tokens_seen": 1841823744 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044623655913978496, + "loss": 0.0691, + "theoretical_loss": 3.482656246769092, + "tokens_seen": 1842085888 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044615631519820257, + "loss": 0.0685, + "theoretical_loss": 3.482614599506266, + "tokens_seen": 1842348032 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044607607125662013, + "loss": 0.0697, + "theoretical_loss": 3.482572959827908, + "tokens_seen": 1842610176 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044599582731503774, + "loss": 0.07, + "theoretical_loss": 3.482531327731558, + "tokens_seen": 1842872320 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044591558337345535, + "loss": 0.0688, + "theoretical_loss": 3.4824897032147577, + "tokens_seen": 1843134464 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044583533943187286, + "loss": 0.0674, + "theoretical_loss": 3.482448086275049, + "tokens_seen": 1843396608 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044575509549029047, + "loss": 0.0658, + "theoretical_loss": 3.4824064769099756, + "tokens_seen": 1843658752 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004456748515487081, + "loss": 0.0666, + "theoretical_loss": 3.4823648751170824, + "tokens_seen": 1843920896 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004455946076071257, + "loss": 0.0665, + "theoretical_loss": 3.482323280893915, + "tokens_seen": 1844183040 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044551436366554325, + "loss": 0.0668, + "theoretical_loss": 3.4822816942380195, + "tokens_seen": 1844445184 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044543411972396087, + "loss": 0.067, + "theoretical_loss": 3.4822401151469453, + "tokens_seen": 1844707328 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.0007432449492625892, + "objective/train/docs_used": 672951, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3937199115753174, + "objective/train/original_loss": 1.393720030784607, + "objective/train/theoretical_loss": 3.4822193284374503, + "objective/train/tokens_used": 1865298400, + "objective/train/value_avg": -0.0093536376953125, + "objective/train/value_loss": 0.0005561860743910074, + "objective/train/value_max": -5.608797073364258e-05, + "objective/train/value_min": -0.68896484375, + "objective/train/value_reward_corr": 0.6907717640017331, + "objective/train/value_std": 0.0215911865234375, + "objective/train/weight_avg": 1.0009503364562988, + "objective/train/weighted_lm_loss": 1.393741488456726, + "objective/train/weights_max": 1.7102184295654297, + "objective/train/weights_min": 0.06439780443906784, + "theoretical_loss": 3.4822193284374503, + "tokens_seen": 1844838400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004453538757823785, + "loss": 0.0702, + "theoretical_loss": 3.4821985436182405, + "tokens_seen": 1844969472 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044527363184079604, + "loss": 0.0654, + "theoretical_loss": 3.4821569796494565, + "tokens_seen": 1845231616 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004451933878992136, + "loss": 0.066, + "theoretical_loss": 3.482115423238144, + "tokens_seen": 1845493760 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004451131439576312, + "loss": 0.0711, + "theoretical_loss": 3.4820738743818556, + "tokens_seen": 1845755904 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044503290001604877, + "loss": 0.0696, + "theoretical_loss": 3.4820323330781457, + "tokens_seen": 1846018048 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004449526560744664, + "loss": 0.0677, + "theoretical_loss": 3.481990799324568, + "tokens_seen": 1846280192 + }, + { + "epoch": 0.56, + "learning_rate": 0.000444872412132884, + "loss": 0.0687, + "theoretical_loss": 3.4819492731186807, + "tokens_seen": 1846542336 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004447921681913016, + "loss": 0.0668, + "theoretical_loss": 3.481907754458039, + "tokens_seen": 1846804480 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044471192424971916, + "loss": 0.0684, + "theoretical_loss": 3.4818662433402014, + "tokens_seen": 1847066624 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004446316803081367, + "loss": 0.0687, + "theoretical_loss": 3.4818247397627284, + "tokens_seen": 1847328768 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044455143636655433, + "loss": 0.0668, + "theoretical_loss": 3.48178324372318, + "tokens_seen": 1847590912 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004444711924249719, + "loss": 0.0648, + "theoretical_loss": 3.481741755219118, + "tokens_seen": 1847853056 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.00023911338939797133, + "objective/train/docs_used": 673981, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3846668004989624, + "objective/train/original_loss": 1.3846670389175415, + "objective/train/theoretical_loss": 3.481700274248105, + "objective/train/tokens_used": 1868575200, + "objective/train/value_avg": -0.007419586181640625, + "objective/train/value_loss": 0.00018692106823436916, + "objective/train/value_max": -3.5643577575683594e-05, + "objective/train/value_min": -0.2083740234375, + "objective/train/value_reward_corr": 0.6955404663018411, + "objective/train/value_std": 0.0128326416015625, + "objective/train/weight_avg": 1.0003236532211304, + "objective/train/weighted_lm_loss": 1.3853992223739624, + "objective/train/weights_max": 1.1434308290481567, + "objective/train/weights_min": 0.36988183856010437, + "theoretical_loss": 3.481700274248105, + "tokens_seen": 1848115200 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004443909484833895, + "loss": 0.0708, + "theoretical_loss": 3.481700274248105, + "tokens_seen": 1848115200 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004443107045418071, + "loss": 0.0708, + "theoretical_loss": 3.481658800807706, + "tokens_seen": 1848377344 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044423046060022473, + "loss": 0.0705, + "theoretical_loss": 3.4816173348954846, + "tokens_seen": 1848639488 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004441502166586423, + "loss": 0.0697, + "theoretical_loss": 3.481575876509008, + "tokens_seen": 1848901632 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044406997271705985, + "loss": 0.0726, + "theoretical_loss": 3.4815344256458434, + "tokens_seen": 1849163776 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044398972877547746, + "loss": 0.0702, + "theoretical_loss": 3.4814929823035596, + "tokens_seen": 1849425920 + }, + { + "epoch": 0.56, + "learning_rate": 0.000443909484833895, + "loss": 0.0709, + "theoretical_loss": 3.481451546479726, + "tokens_seen": 1849688064 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044382924089231263, + "loss": 0.0686, + "theoretical_loss": 3.4814101181719135, + "tokens_seen": 1849950208 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044374899695073024, + "loss": 0.0693, + "theoretical_loss": 3.4813686973776936, + "tokens_seen": 1850212352 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044366875300914786, + "loss": 0.0671, + "theoretical_loss": 3.48132728409464, + "tokens_seen": 1850474496 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004435885090675654, + "loss": 0.0713, + "theoretical_loss": 3.4812858783203264, + "tokens_seen": 1850736640 + }, + { + "epoch": 0.56, + "learning_rate": 0.000443508265125983, + "loss": 0.0717, + "theoretical_loss": 3.481244480052329, + "tokens_seen": 1850998784 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004434280211844006, + "loss": 0.0675, + "theoretical_loss": 3.4812030892882224, + "tokens_seen": 1851260928 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": -0.0012252043234184384, + "objective/train/docs_used": 675199, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.43165922164917, + "objective/train/original_loss": 1.4316593408584595, + "objective/train/theoretical_loss": 3.4811823967193716, + "objective/train/tokens_used": 1871852000, + "objective/train/value_avg": -0.00992584228515625, + "objective/train/value_loss": 0.00022598551004193723, + "objective/train/value_max": -2.7120113372802734e-05, + "objective/train/value_min": -0.332763671875, + "objective/train/value_reward_corr": 0.7498402724827826, + "objective/train/value_std": 0.0165863037109375, + "objective/train/weight_avg": 0.9988827109336853, + "objective/train/weighted_lm_loss": 1.4289666414260864, + "objective/train/weights_max": 1.112414002418518, + "objective/train/weights_min": 0.3692946434020996, + "theoretical_loss": 3.4811823967193716, + "tokens_seen": 1851392000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044334777724281814, + "loss": 0.0672, + "theoretical_loss": 3.4811617060255857, + "tokens_seen": 1851523072 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044326753330123576, + "loss": 0.069, + "theoretical_loss": 3.481120330261997, + "tokens_seen": 1851785216 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044318728935965337, + "loss": 0.0677, + "theoretical_loss": 3.4810789619950366, + "tokens_seen": 1852047360 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004431070454180709, + "loss": 0.0679, + "theoretical_loss": 3.481037601222285, + "tokens_seen": 1852309504 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044302680147648854, + "loss": 0.0691, + "theoretical_loss": 3.480996247941324, + "tokens_seen": 1852571648 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044294655753490615, + "loss": 0.0683, + "theoretical_loss": 3.4809549021497372, + "tokens_seen": 1852833792 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004428663135933237, + "loss": 0.069, + "theoretical_loss": 3.480913563845109, + "tokens_seen": 1853095936 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044278606965174127, + "loss": 0.0659, + "theoretical_loss": 3.480872233025024, + "tokens_seen": 1853358080 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004427058257101589, + "loss": 0.0693, + "theoretical_loss": 3.4808309096870698, + "tokens_seen": 1853620224 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004426255817685765, + "loss": 0.0666, + "theoretical_loss": 3.480789593828834, + "tokens_seen": 1853882368 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044254533782699405, + "loss": 0.0697, + "theoretical_loss": 3.4807482854479037, + "tokens_seen": 1854144512 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044246509388541167, + "loss": 0.0692, + "theoretical_loss": 3.4807069845418708, + "tokens_seen": 1854406656 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.0017240240704268217, + "objective/train/docs_used": 676483, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4873814582824707, + "objective/train/original_loss": 1.4873812198638916, + "objective/train/theoretical_loss": 3.4806656911083254, + "objective/train/tokens_used": 1875128800, + "objective/train/value_avg": -0.00719451904296875, + "objective/train/value_loss": 0.00026378282927908003, + "objective/train/value_max": -4.988908767700195e-05, + "objective/train/value_min": -0.61474609375, + "objective/train/value_reward_corr": 0.6747173744723667, + "objective/train/value_std": 0.01470184326171875, + "objective/train/weight_avg": 1.001841425895691, + "objective/train/weighted_lm_loss": 1.489801287651062, + "objective/train/weights_max": 1.463398814201355, + "objective/train/weights_min": 0.37753555178642273, + "theoretical_loss": 3.4806656911083254, + "tokens_seen": 1854668800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004423848499438293, + "loss": 0.0721, + "theoretical_loss": 3.4806656911083254, + "tokens_seen": 1854668800 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044230460600224684, + "loss": 0.069, + "theoretical_loss": 3.48062440514486, + "tokens_seen": 1854930944 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004422243620606644, + "loss": 0.0694, + "theoretical_loss": 3.4805831266490674, + "tokens_seen": 1855193088 + }, + { + "epoch": 0.56, + "learning_rate": 0.000442144118119082, + "loss": 0.0668, + "theoretical_loss": 3.480541855618542, + "tokens_seen": 1855455232 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004420638741774996, + "loss": 0.0668, + "theoretical_loss": 3.4805005920508796, + "tokens_seen": 1855717376 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004419836302359172, + "loss": 0.0689, + "theoretical_loss": 3.480459335943676, + "tokens_seen": 1855979520 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004419033862943348, + "loss": 0.0684, + "theoretical_loss": 3.4804180872945305, + "tokens_seen": 1856241664 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004418231423527524, + "loss": 0.0653, + "theoretical_loss": 3.4803768461010405, + "tokens_seen": 1856503808 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044174289841117, + "loss": 0.0709, + "theoretical_loss": 3.4803356123608062, + "tokens_seen": 1856765952 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004416626544695875, + "loss": 0.0674, + "theoretical_loss": 3.480294386071429, + "tokens_seen": 1857028096 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044158241052800513, + "loss": 0.068, + "theoretical_loss": 3.4802531672305106, + "tokens_seen": 1857290240 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044150216658642275, + "loss": 0.0702, + "theoretical_loss": 3.480211955835654, + "tokens_seen": 1857552384 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004414219226448403, + "loss": 0.0687, + "theoretical_loss": 3.4801707518844647, + "tokens_seen": 1857814528 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.00042786714038811624, + "objective/train/docs_used": 677697, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2605946063995361, + "objective/train/original_loss": 1.2605946063995361, + "objective/train/theoretical_loss": 3.4801501526994967, + "objective/train/tokens_used": 1878405600, + "objective/train/value_avg": -0.00734710693359375, + "objective/train/value_loss": 0.00021454686066135764, + "objective/train/value_max": -3.069639205932617e-05, + "objective/train/value_min": -0.1751708984375, + "objective/train/value_reward_corr": 0.6534736056748658, + "objective/train/value_std": 0.01160430908203125, + "objective/train/weight_avg": 1.0005258321762085, + "objective/train/weighted_lm_loss": 1.2597084045410156, + "objective/train/weights_max": 1.098419189453125, + "objective/train/weights_min": 0.3697379231452942, + "theoretical_loss": 3.4801501526994967, + "tokens_seen": 1857945600 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004413416787032579, + "loss": 0.0655, + "theoretical_loss": 3.480129555374547, + "tokens_seen": 1858076672 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044126143476167553, + "loss": 0.0692, + "theoretical_loss": 3.4800883663035083, + "tokens_seen": 1858338816 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004411811908200931, + "loss": 0.0654, + "theoretical_loss": 3.4800471846689556, + "tokens_seen": 1858600960 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044110094687851065, + "loss": 0.0707, + "theoretical_loss": 3.4800060104684984, + "tokens_seen": 1858863104 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044102070293692826, + "loss": 0.0642, + "theoretical_loss": 3.4799648436997463, + "tokens_seen": 1859125248 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044094045899534587, + "loss": 0.067, + "theoretical_loss": 3.47992368436031, + "tokens_seen": 1859387392 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044086021505376343, + "loss": 0.0662, + "theoretical_loss": 3.4798825324478018, + "tokens_seen": 1859649536 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044077997111218104, + "loss": 0.0699, + "theoretical_loss": 3.4798413879598353, + "tokens_seen": 1859911680 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044069972717059865, + "loss": 0.0658, + "theoretical_loss": 3.4798002508940242, + "tokens_seen": 1860173824 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004406194832290162, + "loss": 0.0697, + "theoretical_loss": 3.479759121247984, + "tokens_seen": 1860435968 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004405392392874338, + "loss": 0.0692, + "theoretical_loss": 3.479717999019332, + "tokens_seen": 1860698112 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004404589953458514, + "loss": 0.0669, + "theoretical_loss": 3.4796768842056847, + "tokens_seen": 1860960256 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": -0.0001284509344259277, + "objective/train/docs_used": 678968, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.393393874168396, + "objective/train/original_loss": 1.3933937549591064, + "objective/train/theoretical_loss": 3.4796357768046615, + "objective/train/tokens_used": 1881682400, + "objective/train/value_avg": -0.00817108154296875, + "objective/train/value_loss": 0.0005822849925607443, + "objective/train/value_max": -5.227327346801758e-05, + "objective/train/value_min": -0.6240234375, + "objective/train/value_reward_corr": 0.5789475016654009, + "objective/train/value_std": 0.01580810546875, + "objective/train/weight_avg": 1.0001150369644165, + "objective/train/weighted_lm_loss": 1.3927175998687744, + "objective/train/weights_max": 1.866422414779663, + "objective/train/weights_min": 0.2532411217689514, + "theoretical_loss": 3.4796357768046615, + "tokens_seen": 1861222400 + }, + { + "epoch": 0.56, + "learning_rate": 0.000440378751404269, + "loss": 0.0701, + "theoretical_loss": 3.4796357768046615, + "tokens_seen": 1861222400 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044029850746268656, + "loss": 0.0674, + "theoretical_loss": 3.4795946768138823, + "tokens_seen": 1861484544 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044021826352110417, + "loss": 0.0687, + "theoretical_loss": 3.4795535842309677, + "tokens_seen": 1861746688 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004401380195795218, + "loss": 0.0715, + "theoretical_loss": 3.4795124990535395, + "tokens_seen": 1862008832 + }, + { + "epoch": 0.56, + "learning_rate": 0.00044005777563793934, + "loss": 0.0658, + "theoretical_loss": 3.479471421279222, + "tokens_seen": 1862270976 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043997753169635695, + "loss": 0.0693, + "theoretical_loss": 3.4794303509056377, + "tokens_seen": 1862533120 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004398972877547745, + "loss": 0.068, + "theoretical_loss": 3.479389287930413, + "tokens_seen": 1862795264 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004398170438131921, + "loss": 0.0683, + "theoretical_loss": 3.4793482323511746, + "tokens_seen": 1863057408 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004397367998716097, + "loss": 0.071, + "theoretical_loss": 3.479307184165549, + "tokens_seen": 1863319552 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004396565559300273, + "loss": 0.0677, + "theoretical_loss": 3.4792661433711656, + "tokens_seen": 1863581696 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004395763119884449, + "loss": 0.0688, + "theoretical_loss": 3.479225109965653, + "tokens_seen": 1863843840 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043949606804686246, + "loss": 0.0707, + "theoretical_loss": 3.4791840839466435, + "tokens_seen": 1864105984 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004394158241052801, + "loss": 0.069, + "theoretical_loss": 3.479143065311768, + "tokens_seen": 1864368128 + }, + { + "epoch": 0.56, + "objective/train/advantage_avg": 0.0003821462451014668, + "objective/train/docs_used": 680280, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1618165969848633, + "objective/train/original_loss": 1.1618165969848633, + "objective/train/theoretical_loss": 3.479122558762641, + "objective/train/tokens_used": 1884959200, + "objective/train/value_avg": -0.005954742431640625, + "objective/train/value_loss": 0.0002789311984088272, + "objective/train/value_max": -6.014108657836914e-05, + "objective/train/value_min": -0.3486328125, + "objective/train/value_reward_corr": 0.5803413402854933, + "objective/train/value_std": 0.0120849609375, + "objective/train/weight_avg": 1.000502347946167, + "objective/train/weighted_lm_loss": 1.162131667137146, + "objective/train/weights_max": 1.3961803913116455, + "objective/train/weights_min": 0.39436399936676025, + "theoretical_loss": 3.479122558762641, + "tokens_seen": 1864499200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043933558016369764, + "loss": 0.0667, + "theoretical_loss": 3.47910205405866, + "tokens_seen": 1864630272 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004392553362221152, + "loss": 0.0649, + "theoretical_loss": 3.479061050184953, + "tokens_seen": 1864892416 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004391750922805328, + "loss": 0.0689, + "theoretical_loss": 3.4790200536882825, + "tokens_seen": 1865154560 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004390948483389504, + "loss": 0.0687, + "theoretical_loss": 3.4789790645662846, + "tokens_seen": 1865416704 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043901460439736803, + "loss": 0.07, + "theoretical_loss": 3.478938082816597, + "tokens_seen": 1865678848 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004389343604557856, + "loss": 0.0681, + "theoretical_loss": 3.4788971084368576, + "tokens_seen": 1865940992 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004388541165142032, + "loss": 0.0706, + "theoretical_loss": 3.478856141424706, + "tokens_seen": 1866203136 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004387738725726208, + "loss": 0.0698, + "theoretical_loss": 3.478815181777783, + "tokens_seen": 1866465280 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004386936286310383, + "loss": 0.0666, + "theoretical_loss": 3.4787742294937303, + "tokens_seen": 1866727424 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043861338468945593, + "loss": 0.0689, + "theoretical_loss": 3.4787332845701906, + "tokens_seen": 1866989568 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043853314074787354, + "loss": 0.0672, + "theoretical_loss": 3.4786923470048077, + "tokens_seen": 1867251712 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043845289680629116, + "loss": 0.0699, + "theoretical_loss": 3.478651416795227, + "tokens_seen": 1867513856 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.0006913833203725517, + "objective/train/docs_used": 681429, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.311995267868042, + "objective/train/original_loss": 1.311995267868042, + "objective/train/theoretical_loss": 3.478610493939094, + "objective/train/tokens_used": 1888236000, + "objective/train/value_avg": -0.00714111328125, + "objective/train/value_loss": 0.00026110518956556916, + "objective/train/value_max": -3.88026237487793e-05, + "objective/train/value_min": -0.63818359375, + "objective/train/value_reward_corr": 0.824775167023786, + "objective/train/value_std": 0.02099609375, + "objective/train/weight_avg": 1.0008121728897095, + "objective/train/weighted_lm_loss": 1.3127737045288086, + "objective/train/weights_max": 1.6890244483947754, + "objective/train/weights_min": 0.3925867974758148, + "theoretical_loss": 3.478610493939094, + "tokens_seen": 1867776000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004383726528647087, + "loss": 0.0672, + "theoretical_loss": 3.478610493939094, + "tokens_seen": 1867776000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043829240892312633, + "loss": 0.0685, + "theoretical_loss": 3.4785695784340556, + "tokens_seen": 1868038144 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043821216498154394, + "loss": 0.0667, + "theoretical_loss": 3.4785286702777602, + "tokens_seen": 1868300288 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043813192103996145, + "loss": 0.0662, + "theoretical_loss": 3.4784877694678573, + "tokens_seen": 1868562432 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043805167709837906, + "loss": 0.0695, + "theoretical_loss": 3.478446876001997, + "tokens_seen": 1868824576 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043797143315679667, + "loss": 0.0705, + "theoretical_loss": 3.4784059898778312, + "tokens_seen": 1869086720 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004378911892152143, + "loss": 0.0695, + "theoretical_loss": 3.4783651110930123, + "tokens_seen": 1869348864 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043781094527363184, + "loss": 0.0698, + "theoretical_loss": 3.478324239645193, + "tokens_seen": 1869611008 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043773070133204945, + "loss": 0.0715, + "theoretical_loss": 3.478283375532029, + "tokens_seen": 1869873152 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043765045739046707, + "loss": 0.0702, + "theoretical_loss": 3.4782425187511756, + "tokens_seen": 1870135296 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043757021344888457, + "loss": 0.0688, + "theoretical_loss": 3.47820166930029, + "tokens_seen": 1870397440 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004374899695073022, + "loss": 0.0675, + "theoretical_loss": 3.47816082717703, + "tokens_seen": 1870659584 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004374097255657198, + "loss": 0.0664, + "theoretical_loss": 3.478119992379054, + "tokens_seen": 1870921728 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.000833529862575233, + "objective/train/docs_used": 682627, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3730967044830322, + "objective/train/original_loss": 1.3730967044830322, + "objective/train/theoretical_loss": 3.4780995777263164, + "objective/train/tokens_used": 1891512800, + "objective/train/value_avg": -0.00873565673828125, + "objective/train/value_loss": 0.0007085340330377221, + "objective/train/value_max": -7.426738739013672e-05, + "objective/train/value_min": -0.892578125, + "objective/train/value_reward_corr": 0.7890690165374458, + "objective/train/value_std": 0.02630615234375, + "objective/train/weight_avg": 1.0011346340179443, + "objective/train/weighted_lm_loss": 1.3734325170516968, + "objective/train/weights_max": 1.620538592338562, + "objective/train/weights_min": 0.3686436712741852, + "theoretical_loss": 3.4780995777263164, + "tokens_seen": 1871052800 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004373294816241374, + "loss": 0.0721, + "theoretical_loss": 3.478079164904022, + "tokens_seen": 1871183872 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043724923768255497, + "loss": 0.0711, + "theoretical_loss": 3.4780383447495966, + "tokens_seen": 1871446016 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004371689937409726, + "loss": 0.0669, + "theoretical_loss": 3.477997531913439, + "tokens_seen": 1871708160 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004370887497993902, + "loss": 0.0716, + "theoretical_loss": 3.477956726393212, + "tokens_seen": 1871970304 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043700850585780775, + "loss": 0.0702, + "theoretical_loss": 3.477915928186581, + "tokens_seen": 1872232448 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004369282619162253, + "loss": 0.0695, + "theoretical_loss": 3.4778751372912105, + "tokens_seen": 1872494592 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004368480179746429, + "loss": 0.0682, + "theoretical_loss": 3.4778343537047673, + "tokens_seen": 1872756736 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004367677740330605, + "loss": 0.0687, + "theoretical_loss": 3.4777935774249196, + "tokens_seen": 1873018880 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004366875300914781, + "loss": 0.0705, + "theoretical_loss": 3.4777528084493348, + "tokens_seen": 1873281024 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004366072861498957, + "loss": 0.0683, + "theoretical_loss": 3.477712046775684, + "tokens_seen": 1873543168 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004365270422083133, + "loss": 0.0694, + "theoretical_loss": 3.477671292401637, + "tokens_seen": 1873805312 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004364467982667309, + "loss": 0.068, + "theoretical_loss": 3.477630545324866, + "tokens_seen": 1874067456 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": -0.0009515096899122, + "objective/train/docs_used": 683764, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4821826219558716, + "objective/train/original_loss": 1.4821826219558716, + "objective/train/theoretical_loss": 3.477589805543044, + "objective/train/tokens_used": 1894789600, + "objective/train/value_avg": -0.0078277587890625, + "objective/train/value_loss": 0.00039342811214737594, + "objective/train/value_max": -5.269050598144531e-05, + "objective/train/value_min": -0.91845703125, + "objective/train/value_reward_corr": 0.6983727521608852, + "objective/train/value_std": 0.0157318115234375, + "objective/train/weight_avg": 0.9992176294326782, + "objective/train/weighted_lm_loss": 1.4811550378799438, + "objective/train/weights_max": 1.3486071825027466, + "objective/train/weights_min": 0.3824990689754486, + "theoretical_loss": 3.477589805543044, + "tokens_seen": 1874329600 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043636655432514843, + "loss": 0.0708, + "theoretical_loss": 3.477589805543044, + "tokens_seen": 1874329600 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043628631038356605, + "loss": 0.0719, + "theoretical_loss": 3.477549073053845, + "tokens_seen": 1874591744 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004362060664419836, + "loss": 0.0701, + "theoretical_loss": 3.477508347854944, + "tokens_seen": 1874853888 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004361258225004012, + "loss": 0.0688, + "theoretical_loss": 3.477467629944017, + "tokens_seen": 1875116032 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043604557855881883, + "loss": 0.0667, + "theoretical_loss": 3.4774269193187406, + "tokens_seen": 1875378176 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043596533461723644, + "loss": 0.0692, + "theoretical_loss": 3.4773862159767943, + "tokens_seen": 1875640320 + }, + { + "epoch": 0.57, + "learning_rate": 0.000435885090675654, + "loss": 0.0695, + "theoretical_loss": 3.4773455199158567, + "tokens_seen": 1875902464 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004358048467340716, + "loss": 0.0716, + "theoretical_loss": 3.4773048311336083, + "tokens_seen": 1876164608 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004357246027924892, + "loss": 0.0681, + "theoretical_loss": 3.4772641496277306, + "tokens_seen": 1876426752 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043564435885090673, + "loss": 0.0674, + "theoretical_loss": 3.4772234753959057, + "tokens_seen": 1876688896 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043556411490932434, + "loss": 0.0689, + "theoretical_loss": 3.477182808435818, + "tokens_seen": 1876951040 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043548387096774196, + "loss": 0.0675, + "theoretical_loss": 3.477142148745151, + "tokens_seen": 1877213184 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043540362702615957, + "loss": 0.0698, + "theoretical_loss": 3.477101496321591, + "tokens_seen": 1877475328 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.00011132625513710082, + "objective/train/docs_used": 684931, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.389501690864563, + "objective/train/original_loss": 1.3895015716552734, + "objective/train/theoretical_loss": 3.4770811728342537, + "objective/train/tokens_used": 1898066400, + "objective/train/value_avg": -0.0081787109375, + "objective/train/value_loss": 0.00024089377257041633, + "objective/train/value_max": -4.267692565917969e-05, + "objective/train/value_min": -0.45849609375, + "objective/train/value_reward_corr": 0.6890634759535107, + "objective/train/value_std": 0.01363372802734375, + "objective/train/weight_avg": 1.0002244710922241, + "objective/train/weighted_lm_loss": 1.3898746967315674, + "objective/train/weights_max": 1.43800687789917, + "objective/train/weights_min": 0.4132646918296814, + "theoretical_loss": 3.4770811728342537, + "tokens_seen": 1877606400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043532338308457713, + "loss": 0.0703, + "theoretical_loss": 3.4770608511628254, + "tokens_seen": 1877737472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043524313914299474, + "loss": 0.0727, + "theoretical_loss": 3.477020213266541, + "tokens_seen": 1877999616 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004351628952014123, + "loss": 0.0717, + "theoretical_loss": 3.476979582630427, + "tokens_seen": 1878261760 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043508265125982986, + "loss": 0.0704, + "theoretical_loss": 3.4769389592521733, + "tokens_seen": 1878523904 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043500240731824747, + "loss": 0.0729, + "theoretical_loss": 3.476898343129471, + "tokens_seen": 1878786048 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004349221633766651, + "loss": 0.0666, + "theoretical_loss": 3.476857734260012, + "tokens_seen": 1879048192 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043484191943508264, + "loss": 0.0696, + "theoretical_loss": 3.4768171326414894, + "tokens_seen": 1879310336 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043476167549350025, + "loss": 0.0682, + "theoretical_loss": 3.4767765382715976, + "tokens_seen": 1879572480 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043468143155191787, + "loss": 0.0701, + "theoretical_loss": 3.4767359511480316, + "tokens_seen": 1879834624 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004346011876103354, + "loss": 0.0697, + "theoretical_loss": 3.4766953712684874, + "tokens_seen": 1880096768 + }, + { + "epoch": 0.57, + "learning_rate": 0.000434520943668753, + "loss": 0.068, + "theoretical_loss": 3.476654798630663, + "tokens_seen": 1880358912 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004344406997271706, + "loss": 0.0687, + "theoretical_loss": 3.476614233232256, + "tokens_seen": 1880621056 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.0005197719438001513, + "objective/train/docs_used": 686045, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3149397373199463, + "objective/train/original_loss": 1.3149397373199463, + "objective/train/theoretical_loss": 3.476573675070966, + "objective/train/tokens_used": 1901343200, + "objective/train/value_avg": -0.00659942626953125, + "objective/train/value_loss": 0.00010198719974141568, + "objective/train/value_max": -4.166364669799805e-05, + "objective/train/value_min": -0.342041015625, + "objective/train/value_reward_corr": 0.7580199579982837, + "objective/train/value_std": 0.011993408203125, + "objective/train/weight_avg": 1.0005700588226318, + "objective/train/weighted_lm_loss": 1.3154451847076416, + "objective/train/weights_max": 1.1764196157455444, + "objective/train/weights_min": 0.7539494037628174, + "theoretical_loss": 3.476573675070966, + "tokens_seen": 1880883200 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004343604557855882, + "loss": 0.0678, + "theoretical_loss": 3.476573675070966, + "tokens_seen": 1880883200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043428021184400577, + "loss": 0.0708, + "theoretical_loss": 3.4765331241444937, + "tokens_seen": 1881145344 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004341999679024234, + "loss": 0.0669, + "theoretical_loss": 3.4764925804505404, + "tokens_seen": 1881407488 + }, + { + "epoch": 0.57, + "learning_rate": 0.000434119723960841, + "loss": 0.0671, + "theoretical_loss": 3.476452043986809, + "tokens_seen": 1881669632 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004340394800192586, + "loss": 0.0672, + "theoretical_loss": 3.4764115147510033, + "tokens_seen": 1881931776 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004339592360776761, + "loss": 0.0708, + "theoretical_loss": 3.4763709927408266, + "tokens_seen": 1882193920 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004338789921360937, + "loss": 0.0681, + "theoretical_loss": 3.4763304779539865, + "tokens_seen": 1882456064 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043379874819451133, + "loss": 0.0716, + "theoretical_loss": 3.476289970388188, + "tokens_seen": 1882718208 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004337185042529289, + "loss": 0.0697, + "theoretical_loss": 3.4762494700411404, + "tokens_seen": 1882980352 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004336382603113465, + "loss": 0.0682, + "theoretical_loss": 3.476208976910552, + "tokens_seen": 1883242496 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004335580163697641, + "loss": 0.0692, + "theoretical_loss": 3.476168490994132, + "tokens_seen": 1883504640 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043347777242818173, + "loss": 0.0694, + "theoretical_loss": 3.4761280122895926, + "tokens_seen": 1883766784 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043339752848659923, + "loss": 0.0709, + "theoretical_loss": 3.476087540794645, + "tokens_seen": 1884028928 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": -0.00022904000070411712, + "objective/train/docs_used": 687181, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.41051185131073, + "objective/train/original_loss": 1.4105117321014404, + "objective/train/theoretical_loss": 3.4760673077500535, + "objective/train/tokens_used": 1904620000, + "objective/train/value_avg": -0.005123138427734375, + "objective/train/value_loss": 0.00014444446424022317, + "objective/train/value_max": -3.6776065826416016e-05, + "objective/train/value_min": -0.370849609375, + "objective/train/value_reward_corr": 0.6669141158683306, + "objective/train/value_std": 0.01131439208984375, + "objective/train/weight_avg": 0.9998379349708557, + "objective/train/weighted_lm_loss": 1.410766363143921, + "objective/train/weights_max": 1.1633652448654175, + "objective/train/weights_min": 0.3695588707923889, + "theoretical_loss": 3.4760673077500535, + "tokens_seen": 1884160000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043331728454501685, + "loss": 0.071, + "theoretical_loss": 3.476047076507002, + "tokens_seen": 1884291072 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043323704060343446, + "loss": 0.069, + "theoretical_loss": 3.4760066194243784, + "tokens_seen": 1884553216 + }, + { + "epoch": 0.57, + "learning_rate": 0.000433156796661852, + "loss": 0.073, + "theoretical_loss": 3.4759661695444892, + "tokens_seen": 1884815360 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043307655272026963, + "loss": 0.0714, + "theoretical_loss": 3.47592572686505, + "tokens_seen": 1885077504 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043299630877868724, + "loss": 0.0712, + "theoretical_loss": 3.4758852913837783, + "tokens_seen": 1885339648 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004329160648371048, + "loss": 0.0673, + "theoretical_loss": 3.475844863098393, + "tokens_seen": 1885601792 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043283582089552236, + "loss": 0.0685, + "theoretical_loss": 3.475804442006612, + "tokens_seen": 1885863936 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043275557695393997, + "loss": 0.072, + "theoretical_loss": 3.4757640281061573, + "tokens_seen": 1886126080 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004326753330123576, + "loss": 0.0695, + "theoretical_loss": 3.4757236213947484, + "tokens_seen": 1886388224 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043259508907077514, + "loss": 0.069, + "theoretical_loss": 3.4756832218701095, + "tokens_seen": 1886650368 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043251484512919276, + "loss": 0.0686, + "theoretical_loss": 3.475642829529963, + "tokens_seen": 1886912512 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043243460118761037, + "loss": 0.0688, + "theoretical_loss": 3.4756024443720337, + "tokens_seen": 1887174656 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": -6.266022683121264e-05, + "objective/train/docs_used": 688313, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5270166397094727, + "objective/train/original_loss": 1.5270166397094727, + "objective/train/theoretical_loss": 3.4755620663940467, + "objective/train/tokens_used": 1907896800, + "objective/train/value_avg": -0.005428314208984375, + "objective/train/value_loss": 0.00046849262434989214, + "objective/train/value_max": -3.147125244140625e-05, + "objective/train/value_min": -0.85888671875, + "objective/train/value_reward_corr": 0.546287861758024, + "objective/train/value_std": 0.0126953125, + "objective/train/weight_avg": 1.000136375427246, + "objective/train/weighted_lm_loss": 1.5277141332626343, + "objective/train/weights_max": 1.4028693437576294, + "objective/train/weights_min": 0.39049577713012695, + "theoretical_loss": 3.4755620663940467, + "tokens_seen": 1887436800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043235435724602793, + "loss": 0.0693, + "theoretical_loss": 3.4755620663940467, + "tokens_seen": 1887436800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043227411330444554, + "loss": 0.0713, + "theoretical_loss": 3.4755216955937294, + "tokens_seen": 1887698944 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004321938693628631, + "loss": 0.0674, + "theoretical_loss": 3.4754813319688087, + "tokens_seen": 1887961088 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004321136254212807, + "loss": 0.0683, + "theoretical_loss": 3.4754409755170133, + "tokens_seen": 1888223232 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043203338147969827, + "loss": 0.0686, + "theoretical_loss": 3.4754006262360733, + "tokens_seen": 1888485376 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004319531375381159, + "loss": 0.0639, + "theoretical_loss": 3.4753602841237186, + "tokens_seen": 1888747520 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004318728935965335, + "loss": 0.0673, + "theoretical_loss": 3.4753199491776816, + "tokens_seen": 1889009664 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043179264965495105, + "loss": 0.0675, + "theoretical_loss": 3.475279621395695, + "tokens_seen": 1889271808 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043171240571336867, + "loss": 0.0682, + "theoretical_loss": 3.4752393007754923, + "tokens_seen": 1889533952 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004316321617717862, + "loss": 0.0695, + "theoretical_loss": 3.4751989873148084, + "tokens_seen": 1889796096 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043155191783020384, + "loss": 0.0718, + "theoretical_loss": 3.4751586810113793, + "tokens_seen": 1890058240 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004314716738886214, + "loss": 0.0669, + "theoretical_loss": 3.4751183818629414, + "tokens_seen": 1890320384 + }, + { + "epoch": 0.57, + "learning_rate": 0.000431391429947039, + "loss": 0.0691, + "theoretical_loss": 3.4750780898672335, + "tokens_seen": 1890582528 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.0010180543176829815, + "objective/train/docs_used": 689504, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3283250331878662, + "objective/train/original_loss": 1.3283251523971558, + "objective/train/theoretical_loss": 3.4750579465509466, + "objective/train/tokens_used": 1911173600, + "objective/train/value_avg": -0.007610321044921875, + "objective/train/value_loss": 0.0002268047392135486, + "objective/train/value_max": -3.618001937866211e-05, + "objective/train/value_min": -0.36572265625, + "objective/train/value_reward_corr": 0.656805733107698, + "objective/train/value_std": 0.01404571533203125, + "objective/train/weight_avg": 1.0011183023452759, + "objective/train/weighted_lm_loss": 1.3299323320388794, + "objective/train/weights_max": 1.2525979280471802, + "objective/train/weights_min": 0.37432897090911865, + "theoretical_loss": 3.4750579465509466, + "tokens_seen": 1890713600 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004313111860054566, + "loss": 0.0654, + "theoretical_loss": 3.475037805021994, + "tokens_seen": 1890844672 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004312309420638742, + "loss": 0.0673, + "theoretical_loss": 3.4749975273249625, + "tokens_seen": 1891106816 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004311506981222918, + "loss": 0.0696, + "theoretical_loss": 3.4749572567738807, + "tokens_seen": 1891368960 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004310704541807094, + "loss": 0.0694, + "theoretical_loss": 3.4749169933664903, + "tokens_seen": 1891631104 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004309902102391269, + "loss": 0.0672, + "theoretical_loss": 3.4748767371005345, + "tokens_seen": 1891893248 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004309099662975445, + "loss": 0.0686, + "theoretical_loss": 3.474836487973757, + "tokens_seen": 1892155392 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043082972235596213, + "loss": 0.0699, + "theoretical_loss": 3.474796245983903, + "tokens_seen": 1892417536 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043074947841437975, + "loss": 0.0704, + "theoretical_loss": 3.4747560111287195, + "tokens_seen": 1892679680 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004306692344727973, + "loss": 0.0674, + "theoretical_loss": 3.4747157834059523, + "tokens_seen": 1892941824 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004305889905312149, + "loss": 0.068, + "theoretical_loss": 3.47467556281335, + "tokens_seen": 1893203968 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043050874658963253, + "loss": 0.07, + "theoretical_loss": 3.474635349348662, + "tokens_seen": 1893466112 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043042850264805003, + "loss": 0.0684, + "theoretical_loss": 3.4745951430096387, + "tokens_seen": 1893728256 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.0006099278689362109, + "objective/train/docs_used": 690600, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4099087715148926, + "objective/train/original_loss": 1.4099085330963135, + "objective/train/theoretical_loss": 3.474554943794031, + "objective/train/tokens_used": 1914450400, + "objective/train/value_avg": -0.0048828125, + "objective/train/value_loss": 9.77657618932426e-05, + "objective/train/value_max": -5.346536636352539e-05, + "objective/train/value_min": -0.19580078125, + "objective/train/value_reward_corr": 0.629735219499377, + "objective/train/value_std": 0.00786590576171875, + "objective/train/weight_avg": 1.000654697418213, + "objective/train/weighted_lm_loss": 1.4121841192245483, + "objective/train/weights_max": 1.0596908330917358, + "objective/train/weights_min": 0.3795166611671448, + "theoretical_loss": 3.474554943794031, + "tokens_seen": 1893990400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043034825870646765, + "loss": 0.0689, + "theoretical_loss": 3.474554943794031, + "tokens_seen": 1893990400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043026801476488526, + "loss": 0.0698, + "theoretical_loss": 3.4745147516995916, + "tokens_seen": 1894252544 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043018777082330287, + "loss": 0.0689, + "theoretical_loss": 3.474474566724073, + "tokens_seen": 1894514688 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043010752688172043, + "loss": 0.0692, + "theoretical_loss": 3.4744343888652303, + "tokens_seen": 1894776832 + }, + { + "epoch": 0.57, + "learning_rate": 0.00043002728294013804, + "loss": 0.0665, + "theoretical_loss": 3.474394218120818, + "tokens_seen": 1895038976 + }, + { + "epoch": 0.57, + "learning_rate": 0.00042994703899855565, + "loss": 0.0691, + "theoretical_loss": 3.474354054488593, + "tokens_seen": 1895301120 + }, + { + "epoch": 0.57, + "learning_rate": 0.00042986679505697316, + "loss": 0.0678, + "theoretical_loss": 3.4743138979663133, + "tokens_seen": 1895563264 + }, + { + "epoch": 0.57, + "learning_rate": 0.00042978655111539077, + "loss": 0.0721, + "theoretical_loss": 3.4742737485517354, + "tokens_seen": 1895825408 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004297063071738084, + "loss": 0.0684, + "theoretical_loss": 3.4742336062426205, + "tokens_seen": 1896087552 + }, + { + "epoch": 0.57, + "learning_rate": 0.000429626063232226, + "loss": 0.0683, + "theoretical_loss": 3.4741934710367284, + "tokens_seen": 1896349696 + }, + { + "epoch": 0.57, + "learning_rate": 0.00042954581929064356, + "loss": 0.0695, + "theoretical_loss": 3.47415334293182, + "tokens_seen": 1896611840 + }, + { + "epoch": 0.57, + "learning_rate": 0.00042946557534906117, + "loss": 0.0684, + "theoretical_loss": 3.4741132219256583, + "tokens_seen": 1896873984 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004293853314074788, + "loss": 0.0688, + "theoretical_loss": 3.4740731080160066, + "tokens_seen": 1897136128 + }, + { + "epoch": 0.57, + "objective/train/advantage_avg": 0.0012116730213165283, + "objective/train/docs_used": 691800, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4639194011688232, + "objective/train/original_loss": 1.4639195203781128, + "objective/train/theoretical_loss": 3.474053053721673, + "objective/train/tokens_used": 1917727200, + "objective/train/value_avg": -0.00799560546875, + "objective/train/value_loss": 0.0002491466875653714, + "objective/train/value_max": -0.0001233816146850586, + "objective/train/value_min": -0.67626953125, + "objective/train/value_reward_corr": 0.6715546928250509, + "objective/train/value_std": 0.01512908935546875, + "objective/train/weight_avg": 1.001329779624939, + "objective/train/weighted_lm_loss": 1.4651811122894287, + "objective/train/weights_max": 1.9665279388427734, + "objective/train/weights_min": 0.37346750497817993, + "theoretical_loss": 3.474053053721673, + "tokens_seen": 1897267200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00042930508746589634, + "loss": 0.0719, + "theoretical_loss": 3.4740330012006293, + "tokens_seen": 1897398272 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004292248435243139, + "loss": 0.0682, + "theoretical_loss": 3.473992901477292, + "tokens_seen": 1897660416 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004291445995827315, + "loss": 0.0672, + "theoretical_loss": 3.4739528088437606, + "tokens_seen": 1897922560 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004290643556411491, + "loss": 0.0672, + "theoretical_loss": 3.4739127232978033, + "tokens_seen": 1898184704 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004289841116995667, + "loss": 0.0656, + "theoretical_loss": 3.473872644837189, + "tokens_seen": 1898446848 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004289038677579843, + "loss": 0.0691, + "theoretical_loss": 3.4738325734596858, + "tokens_seen": 1898708992 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004288236238164019, + "loss": 0.0652, + "theoretical_loss": 3.473792509163066, + "tokens_seen": 1898971136 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042874337987481946, + "loss": 0.0677, + "theoretical_loss": 3.4737524519450993, + "tokens_seen": 1899233280 + }, + { + "epoch": 0.58, + "learning_rate": 0.000428663135933237, + "loss": 0.0679, + "theoretical_loss": 3.4737124018035597, + "tokens_seen": 1899495424 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042858289199165464, + "loss": 0.0673, + "theoretical_loss": 3.47367235873622, + "tokens_seen": 1899757568 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004285026480500722, + "loss": 0.0674, + "theoretical_loss": 3.4736323227408548, + "tokens_seen": 1900019712 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004284224041084898, + "loss": 0.0684, + "theoretical_loss": 3.4735922938152397, + "tokens_seen": 1900281856 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": -0.00030587660148739815, + "objective/train/docs_used": 693007, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.30275297164917, + "objective/train/original_loss": 1.302753210067749, + "objective/train/theoretical_loss": 3.4735522719571517, + "objective/train/tokens_used": 1921004000, + "objective/train/value_avg": -0.006526947021484375, + "objective/train/value_loss": 0.00030584644991904497, + "objective/train/value_max": -2.8192996978759766e-05, + "objective/train/value_min": -0.94970703125, + "objective/train/value_reward_corr": 0.6970315136260118, + "objective/train/value_std": 0.01509857177734375, + "objective/train/weight_avg": 0.9998296499252319, + "objective/train/weighted_lm_loss": 1.3028963804244995, + "objective/train/weights_max": 1.5824660062789917, + "objective/train/weights_min": 0.3694700598716736, + "theoretical_loss": 3.4735522719571517, + "tokens_seen": 1900544000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004283421601669074, + "loss": 0.0676, + "theoretical_loss": 3.4735522719571517, + "tokens_seen": 1900544000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042826191622532503, + "loss": 0.0669, + "theoretical_loss": 3.473512257164368, + "tokens_seen": 1900806144 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004281816722837426, + "loss": 0.0678, + "theoretical_loss": 3.4734722494346673, + "tokens_seen": 1901068288 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042810142834216015, + "loss": 0.0683, + "theoretical_loss": 3.473432248765829, + "tokens_seen": 1901330432 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042802118440057776, + "loss": 0.0692, + "theoretical_loss": 3.473392255155634, + "tokens_seen": 1901592576 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004279409404589953, + "loss": 0.0691, + "theoretical_loss": 3.4733522686018636, + "tokens_seen": 1901854720 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042786069651741293, + "loss": 0.0694, + "theoretical_loss": 3.4733122891023007, + "tokens_seen": 1902116864 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042778045257583054, + "loss": 0.0679, + "theoretical_loss": 3.473272316654729, + "tokens_seen": 1902379008 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042770020863424816, + "loss": 0.0709, + "theoretical_loss": 3.4732323512569323, + "tokens_seen": 1902641152 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004276199646926657, + "loss": 0.0677, + "theoretical_loss": 3.473192392906697, + "tokens_seen": 1902903296 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042753972075108333, + "loss": 0.0688, + "theoretical_loss": 3.473152441601809, + "tokens_seen": 1903165440 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004274594768095009, + "loss": 0.0721, + "theoretical_loss": 3.473112497340057, + "tokens_seen": 1903427584 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042737923286791845, + "loss": 0.0684, + "theoretical_loss": 3.473072560119229, + "tokens_seen": 1903689728 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.002401332836598158, + "objective/train/docs_used": 694210, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3564651012420654, + "objective/train/original_loss": 1.356465220451355, + "objective/train/theoretical_loss": 3.4730525941484705, + "objective/train/tokens_used": 1924280800, + "objective/train/value_avg": -0.00836181640625, + "objective/train/value_loss": 0.0003155863960273564, + "objective/train/value_max": -3.5643577575683594e-05, + "objective/train/value_min": -0.505859375, + "objective/train/value_reward_corr": 0.528134268748609, + "objective/train/value_std": 0.01629638671875, + "objective/train/weight_avg": 1.002550482749939, + "objective/train/weighted_lm_loss": 1.359156608581543, + "objective/train/weights_max": 1.6584101915359497, + "objective/train/weights_min": 0.3761554956436157, + "theoretical_loss": 3.4730525941484705, + "tokens_seen": 1903820800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042729898892633606, + "loss": 0.0699, + "theoretical_loss": 3.473032629937114, + "tokens_seen": 1903951872 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042721874498475367, + "loss": 0.0679, + "theoretical_loss": 3.472992706791504, + "tokens_seen": 1904214016 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004271385010431713, + "loss": 0.0686, + "theoretical_loss": 3.472952790680189, + "tokens_seen": 1904476160 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042705825710158884, + "loss": 0.0688, + "theoretical_loss": 3.472912881600963, + "tokens_seen": 1904738304 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042697801316000645, + "loss": 0.0686, + "theoretical_loss": 3.4728729795516182, + "tokens_seen": 1905000448 + }, + { + "epoch": 0.58, + "learning_rate": 0.000426897769218424, + "loss": 0.0696, + "theoretical_loss": 3.4728330845299507, + "tokens_seen": 1905262592 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042681752527684157, + "loss": 0.0692, + "theoretical_loss": 3.472793196533755, + "tokens_seen": 1905524736 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004267372813352592, + "loss": 0.0705, + "theoretical_loss": 3.472753315560828, + "tokens_seen": 1905786880 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004266570373936768, + "loss": 0.0672, + "theoretical_loss": 3.4727134416089678, + "tokens_seen": 1906049024 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042657679345209435, + "loss": 0.0712, + "theoretical_loss": 3.472673574675972, + "tokens_seen": 1906311168 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042649654951051197, + "loss": 0.0667, + "theoretical_loss": 3.472633714759641, + "tokens_seen": 1906573312 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004264163055689296, + "loss": 0.0694, + "theoretical_loss": 3.4725938618577743, + "tokens_seen": 1906835456 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.0019277179380878806, + "objective/train/docs_used": 695522, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3308924436569214, + "objective/train/original_loss": 1.3308923244476318, + "objective/train/theoretical_loss": 3.4725540159681745, + "objective/train/tokens_used": 1927557600, + "objective/train/value_avg": -0.006687164306640625, + "objective/train/value_loss": 0.00017574307275936007, + "objective/train/value_max": -3.510713577270508e-05, + "objective/train/value_min": -0.62060546875, + "objective/train/value_reward_corr": 0.7532226507887959, + "objective/train/value_std": 0.016326904296875, + "objective/train/weight_avg": 1.0020089149475098, + "objective/train/weighted_lm_loss": 1.3338085412979126, + "objective/train/weights_max": 1.386031150817871, + "objective/train/weights_min": 0.36823785305023193, + "theoretical_loss": 3.4725540159681745, + "tokens_seen": 1907097600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004263360616273472, + "loss": 0.0689, + "theoretical_loss": 3.4725540159681745, + "tokens_seen": 1907097600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004262558176857647, + "loss": 0.0669, + "theoretical_loss": 3.4725141770886436, + "tokens_seen": 1907359744 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004261755737441823, + "loss": 0.0665, + "theoretical_loss": 3.4724743452169857, + "tokens_seen": 1907621888 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004260953298025999, + "loss": 0.0678, + "theoretical_loss": 3.4724345203510047, + "tokens_seen": 1907884032 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004260150858610175, + "loss": 0.0679, + "theoretical_loss": 3.472394702488506, + "tokens_seen": 1908146176 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004259348419194351, + "loss": 0.0672, + "theoretical_loss": 3.4723548916272966, + "tokens_seen": 1908408320 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004258545979778527, + "loss": 0.0692, + "theoretical_loss": 3.4723150877651836, + "tokens_seen": 1908670464 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004257743540362703, + "loss": 0.068, + "theoretical_loss": 3.472275290899976, + "tokens_seen": 1908932608 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004256941100946878, + "loss": 0.069, + "theoretical_loss": 3.472235501029483, + "tokens_seen": 1909194752 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042561386615310543, + "loss": 0.0698, + "theoretical_loss": 3.4721957181515144, + "tokens_seen": 1909456896 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042553362221152305, + "loss": 0.0695, + "theoretical_loss": 3.472155942263883, + "tokens_seen": 1909719040 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004254533782699406, + "loss": 0.0699, + "theoretical_loss": 3.4721161733643995, + "tokens_seen": 1909981184 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004253731343283582, + "loss": 0.0674, + "theoretical_loss": 3.472076411450878, + "tokens_seen": 1910243328 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.0001537144707981497, + "objective/train/docs_used": 696705, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2774269580841064, + "objective/train/original_loss": 1.2774269580841064, + "objective/train/theoretical_loss": 3.4720565331131708, + "objective/train/tokens_used": 1930834400, + "objective/train/value_avg": -0.008636474609375, + "objective/train/value_loss": 0.00040704250568524003, + "objective/train/value_max": -3.534555435180664e-05, + "objective/train/value_min": -0.73388671875, + "objective/train/value_reward_corr": 0.7005505863181416, + "objective/train/value_std": 0.01953125, + "objective/train/weight_avg": 1.0003374814987183, + "objective/train/weighted_lm_loss": 1.2776480913162231, + "objective/train/weights_max": 1.9289265871047974, + "objective/train/weights_min": 0.3710477352142334, + "theoretical_loss": 3.4720565331131708, + "tokens_seen": 1910374400 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042529289038677583, + "loss": 0.0678, + "theoretical_loss": 3.472036656521134, + "tokens_seen": 1910505472 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042521264644519344, + "loss": 0.0661, + "theoretical_loss": 3.4719969085729816, + "tokens_seen": 1910767616 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042513240250361095, + "loss": 0.0715, + "theoretical_loss": 3.4719571676042373, + "tokens_seen": 1911029760 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042505215856202856, + "loss": 0.0708, + "theoretical_loss": 3.4719174336127185, + "tokens_seen": 1911291904 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004249719146204462, + "loss": 0.0691, + "theoretical_loss": 3.471877706596244, + "tokens_seen": 1911554048 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042489167067886373, + "loss": 0.0668, + "theoretical_loss": 3.4718379865526323, + "tokens_seen": 1911816192 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042481142673728134, + "loss": 0.0704, + "theoretical_loss": 3.4717982734797044, + "tokens_seen": 1912078336 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042473118279569896, + "loss": 0.0694, + "theoretical_loss": 3.4717585673752813, + "tokens_seen": 1912340480 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004246509388541165, + "loss": 0.0691, + "theoretical_loss": 3.4717188682371853, + "tokens_seen": 1912602624 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042457069491253413, + "loss": 0.0678, + "theoretical_loss": 3.471679176063239, + "tokens_seen": 1912864768 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004244904509709517, + "loss": 0.0682, + "theoretical_loss": 3.4716394908512678, + "tokens_seen": 1913126912 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004244102070293693, + "loss": 0.0659, + "theoretical_loss": 3.471599812599095, + "tokens_seen": 1913389056 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": -5.6982433306984603e-05, + "objective/train/docs_used": 697951, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4868850708007812, + "objective/train/original_loss": 1.4868848323822021, + "objective/train/theoretical_loss": 3.4715601413045487, + "objective/train/tokens_used": 1934111200, + "objective/train/value_avg": -0.00873565673828125, + "objective/train/value_loss": 0.00032236077822744846, + "objective/train/value_max": -2.586841583251953e-05, + "objective/train/value_min": -0.73828125, + "objective/train/value_reward_corr": 0.5984469149683921, + "objective/train/value_std": 0.0132904052734375, + "objective/train/weight_avg": 1.0000770092010498, + "objective/train/weighted_lm_loss": 1.4859241247177124, + "objective/train/weights_max": 1.268136978149414, + "objective/train/weights_min": 0.22390565276145935, + "theoretical_loss": 3.4715601413045487, + "tokens_seen": 1913651200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042432996308778686, + "loss": 0.0713, + "theoretical_loss": 3.4715601413045487, + "tokens_seen": 1913651200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042424971914620447, + "loss": 0.0685, + "theoretical_loss": 3.4715204769654555, + "tokens_seen": 1913913344 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004241694752046221, + "loss": 0.0693, + "theoretical_loss": 3.471480819579643, + "tokens_seen": 1914175488 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042408923126303964, + "loss": 0.0685, + "theoretical_loss": 3.4714411691449403, + "tokens_seen": 1914437632 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042400898732145725, + "loss": 0.0675, + "theoretical_loss": 3.4714015256591777, + "tokens_seen": 1914699776 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004239287433798748, + "loss": 0.0715, + "theoretical_loss": 3.4713618891201863, + "tokens_seen": 1914961920 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004238484994382924, + "loss": 0.0693, + "theoretical_loss": 3.471322259525798, + "tokens_seen": 1915224064 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042376825549671, + "loss": 0.0695, + "theoretical_loss": 3.4712826368738456, + "tokens_seen": 1915486208 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004236880115551276, + "loss": 0.0684, + "theoretical_loss": 3.471243021162163, + "tokens_seen": 1915748352 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004236077676135452, + "loss": 0.0673, + "theoretical_loss": 3.4712034123885855, + "tokens_seen": 1916010496 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042352752367196277, + "loss": 0.066, + "theoretical_loss": 3.471163810550949, + "tokens_seen": 1916272640 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004234472797303804, + "loss": 0.0695, + "theoretical_loss": 3.47112421564709, + "tokens_seen": 1916534784 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042336703578879794, + "loss": 0.0673, + "theoretical_loss": 3.4710846276748466, + "tokens_seen": 1916796928 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.0002990372304338962, + "objective/train/docs_used": 699035, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3075224161148071, + "objective/train/original_loss": 1.3075222969055176, + "objective/train/theoretical_loss": 3.471064836287405, + "objective/train/tokens_used": 1937388000, + "objective/train/value_avg": -0.006961822509765625, + "objective/train/value_loss": 0.000213931969483383, + "objective/train/value_max": -4.100799560546875e-05, + "objective/train/value_min": -0.340087890625, + "objective/train/value_reward_corr": 0.5923001788406644, + "objective/train/value_std": 0.01080322265625, + "objective/train/weight_avg": 1.0003918409347534, + "objective/train/weighted_lm_loss": 1.3085472583770752, + "objective/train/weights_max": 1.317122220993042, + "objective/train/weights_min": 0.38067081570625305, + "theoretical_loss": 3.471064836287405, + "tokens_seen": 1916928000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042328679184721555, + "loss": 0.068, + "theoretical_loss": 3.4710450466320575, + "tokens_seen": 1917059072 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004232065479056331, + "loss": 0.0678, + "theoretical_loss": 3.471005472516562, + "tokens_seen": 1917321216 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004231263039640507, + "loss": 0.0687, + "theoretical_loss": 3.4709659053262016, + "tokens_seen": 1917583360 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042304606002246833, + "loss": 0.0713, + "theoretical_loss": 3.470926345058818, + "tokens_seen": 1917845504 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004229658160808859, + "loss": 0.0666, + "theoretical_loss": 3.4708867917122532, + "tokens_seen": 1918107648 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004228855721393035, + "loss": 0.0695, + "theoretical_loss": 3.470847245284351, + "tokens_seen": 1918369792 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004228053281977211, + "loss": 0.0669, + "theoretical_loss": 3.4708077057729567, + "tokens_seen": 1918631936 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004227250842561386, + "loss": 0.069, + "theoretical_loss": 3.4707681731759155, + "tokens_seen": 1918894080 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042264484031455623, + "loss": 0.0679, + "theoretical_loss": 3.4707286474910735, + "tokens_seen": 1919156224 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042256459637297385, + "loss": 0.0664, + "theoretical_loss": 3.4706891287162787, + "tokens_seen": 1919418368 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042248435243139146, + "loss": 0.069, + "theoretical_loss": 3.470649616849379, + "tokens_seen": 1919680512 + }, + { + "epoch": 0.58, + "learning_rate": 0.000422404108489809, + "loss": 0.0683, + "theoretical_loss": 3.4706101118882247, + "tokens_seen": 1919942656 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.0010100979125127196, + "objective/train/docs_used": 700307, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4696931838989258, + "objective/train/original_loss": 1.4696931838989258, + "objective/train/theoretical_loss": 3.4705706138306653, + "objective/train/tokens_used": 1940664800, + "objective/train/value_avg": -0.004924774169921875, + "objective/train/value_loss": 0.0002015416685026139, + "objective/train/value_max": -4.57763671875e-05, + "objective/train/value_min": -0.230712890625, + "objective/train/value_reward_corr": 0.49469162323894733, + "objective/train/value_std": 0.00820159912109375, + "objective/train/weight_avg": 1.0010887384414673, + "objective/train/weighted_lm_loss": 1.4720734357833862, + "objective/train/weights_max": 1.1817450523376465, + "objective/train/weights_min": 0.22504496574401855, + "theoretical_loss": 3.4705706138306653, + "tokens_seen": 1920204800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042232386454822663, + "loss": 0.0705, + "theoretical_loss": 3.4705706138306653, + "tokens_seen": 1920204800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042224362060664424, + "loss": 0.0687, + "theoretical_loss": 3.4705311226745525, + "tokens_seen": 1920466944 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042216337666506175, + "loss": 0.0678, + "theoretical_loss": 3.470491638417739, + "tokens_seen": 1920729088 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042208313272347936, + "loss": 0.0665, + "theoretical_loss": 3.470452161058078, + "tokens_seen": 1920991232 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042200288878189697, + "loss": 0.0695, + "theoretical_loss": 3.470412690593423, + "tokens_seen": 1921253376 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004219226448403146, + "loss": 0.0665, + "theoretical_loss": 3.4703732270216303, + "tokens_seen": 1921515520 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042184240089873214, + "loss": 0.067, + "theoretical_loss": 3.470333770340555, + "tokens_seen": 1921777664 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042176215695714976, + "loss": 0.0681, + "theoretical_loss": 3.4702943205480548, + "tokens_seen": 1922039808 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042168191301556737, + "loss": 0.0678, + "theoretical_loss": 3.470254877641988, + "tokens_seen": 1922301952 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042160166907398493, + "loss": 0.0687, + "theoretical_loss": 3.470215441620213, + "tokens_seen": 1922564096 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004215214251324025, + "loss": 0.0714, + "theoretical_loss": 3.47017601248059, + "tokens_seen": 1922826240 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004214411811908201, + "loss": 0.068, + "theoretical_loss": 3.4701365902209798, + "tokens_seen": 1923088384 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004213609372492377, + "loss": 0.0708, + "theoretical_loss": 3.4700971748392453, + "tokens_seen": 1923350528 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 4.2215957364533097e-05, + "objective/train/docs_used": 701572, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3477340936660767, + "objective/train/original_loss": 1.3477338552474976, + "objective/train/theoretical_loss": 3.4700774697269128, + "objective/train/tokens_used": 1943941600, + "objective/train/value_avg": -0.0045928955078125, + "objective/train/value_loss": 0.0001257354742847383, + "objective/train/value_max": -3.3736228942871094e-05, + "objective/train/value_min": -0.418701171875, + "objective/train/value_reward_corr": 0.56594521479194, + "objective/train/value_std": 0.007476806640625, + "objective/train/weight_avg": 1.000099539756775, + "objective/train/weighted_lm_loss": 1.3483197689056396, + "objective/train/weights_max": 1.2155424356460571, + "objective/train/weights_min": 0.3688117563724518, + "theoretical_loss": 3.4700774697269128, + "tokens_seen": 1923481600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042128069330765527, + "loss": 0.0677, + "theoretical_loss": 3.470057766333248, + "tokens_seen": 1923612672 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004212004493660729, + "loss": 0.0706, + "theoretical_loss": 3.4700183647008522, + "tokens_seen": 1923874816 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004211202054244905, + "loss": 0.0676, + "theoretical_loss": 3.4699789699399233, + "tokens_seen": 1924136960 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042103996148290805, + "loss": 0.0659, + "theoretical_loss": 3.469939582048326, + "tokens_seen": 1924399104 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004209597175413256, + "loss": 0.0676, + "theoretical_loss": 3.469900201023928, + "tokens_seen": 1924661248 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004208794735997432, + "loss": 0.065, + "theoretical_loss": 3.469860826864596, + "tokens_seen": 1924923392 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004207992296581608, + "loss": 0.0689, + "theoretical_loss": 3.4698214595681995, + "tokens_seen": 1925185536 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004207189857165784, + "loss": 0.0679, + "theoretical_loss": 3.4697820991326074, + "tokens_seen": 1925447680 + }, + { + "epoch": 0.58, + "learning_rate": 0.000420638741774996, + "loss": 0.0658, + "theoretical_loss": 3.46974274555569, + "tokens_seen": 1925709824 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004205584978334136, + "loss": 0.0701, + "theoretical_loss": 3.4697033988353194, + "tokens_seen": 1925971968 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004204782538918312, + "loss": 0.0666, + "theoretical_loss": 3.4696640589693675, + "tokens_seen": 1926234112 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042039800995024874, + "loss": 0.0679, + "theoretical_loss": 3.4696247259557076, + "tokens_seen": 1926496256 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.0005393868195824325, + "objective/train/docs_used": 702709, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5641299486160278, + "objective/train/original_loss": 1.5641303062438965, + "objective/train/theoretical_loss": 3.469585399792215, + "objective/train/tokens_used": 1947218400, + "objective/train/value_avg": -0.01006317138671875, + "objective/train/value_loss": 0.0004965075058862567, + "objective/train/value_max": -3.24249267578125e-05, + "objective/train/value_min": -0.732421875, + "objective/train/value_reward_corr": 0.7204884420073756, + "objective/train/value_std": 0.0225067138671875, + "objective/train/weight_avg": 1.0007641315460205, + "objective/train/weighted_lm_loss": 1.5642176866531372, + "objective/train/weights_max": 1.5964356660842896, + "objective/train/weights_min": 0.3893653154373169, + "theoretical_loss": 3.469585399792215, + "tokens_seen": 1926758400 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042031776600866635, + "loss": 0.0706, + "theoretical_loss": 3.469585399792215, + "tokens_seen": 1926758400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004202375220670839, + "loss": 0.0665, + "theoretical_loss": 3.4695460804767633, + "tokens_seen": 1927020544 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004201572781255015, + "loss": 0.0693, + "theoretical_loss": 3.4695067680072293, + "tokens_seen": 1927282688 + }, + { + "epoch": 0.58, + "learning_rate": 0.00042007703418391913, + "loss": 0.0708, + "theoretical_loss": 3.469467462381491, + "tokens_seen": 1927544832 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041999679024233675, + "loss": 0.0682, + "theoretical_loss": 3.469428163597426, + "tokens_seen": 1927806976 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004199165463007543, + "loss": 0.0675, + "theoretical_loss": 3.4693888716529124, + "tokens_seen": 1928069120 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004198363023591719, + "loss": 0.0705, + "theoretical_loss": 3.4693495865458313, + "tokens_seen": 1928331264 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004197560584175895, + "loss": 0.0679, + "theoretical_loss": 3.4693103082740633, + "tokens_seen": 1928593408 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041967581447600703, + "loss": 0.0718, + "theoretical_loss": 3.4692710368354898, + "tokens_seen": 1928855552 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041959557053442465, + "loss": 0.0683, + "theoretical_loss": 3.469231772227994, + "tokens_seen": 1929117696 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041951532659284226, + "loss": 0.0666, + "theoretical_loss": 3.4691925144494604, + "tokens_seen": 1929379840 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041943508265125987, + "loss": 0.0716, + "theoretical_loss": 3.4691532634977724, + "tokens_seen": 1929641984 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041935483870967743, + "loss": 0.069, + "theoretical_loss": 3.469114019370816, + "tokens_seen": 1929904128 + }, + { + "epoch": 0.58, + "objective/train/advantage_avg": 0.0007605802384205163, + "objective/train/docs_used": 703903, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4023168087005615, + "objective/train/original_loss": 1.4023168087005615, + "objective/train/theoretical_loss": 3.4690943998659516, + "objective/train/tokens_used": 1950495200, + "objective/train/value_avg": -0.00800323486328125, + "objective/train/value_loss": 0.00025014992570504546, + "objective/train/value_max": -4.684925079345703e-05, + "objective/train/value_min": -0.84228515625, + "objective/train/value_reward_corr": 0.686540956128751, + "objective/train/value_std": 0.0177459716796875, + "objective/train/weight_avg": 1.0008822679519653, + "objective/train/weighted_lm_loss": 1.4031236171722412, + "objective/train/weights_max": 2.208437204360962, + "objective/train/weights_min": 0.37882235646247864, + "theoretical_loss": 3.4690943998659516, + "tokens_seen": 1930035200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00041927459476809504, + "loss": 0.0687, + "theoretical_loss": 3.4690747820664782, + "tokens_seen": 1930166272 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004191943508265126, + "loss": 0.0673, + "theoretical_loss": 3.469035551582646, + "tokens_seen": 1930428416 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041911410688493016, + "loss": 0.068, + "theoretical_loss": 3.468996327917208, + "tokens_seen": 1930690560 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041903386294334777, + "loss": 0.0695, + "theoretical_loss": 3.468957111068054, + "tokens_seen": 1930952704 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004189536190017654, + "loss": 0.067, + "theoretical_loss": 3.468917901033074, + "tokens_seen": 1931214848 + }, + { + "epoch": 0.59, + "learning_rate": 0.000418873375060183, + "loss": 0.0695, + "theoretical_loss": 3.468878697810159, + "tokens_seen": 1931476992 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041879313111860056, + "loss": 0.0699, + "theoretical_loss": 3.468839501397202, + "tokens_seen": 1931739136 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041871288717701817, + "loss": 0.0702, + "theoretical_loss": 3.4688003117920956, + "tokens_seen": 1932001280 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004186326432354357, + "loss": 0.0673, + "theoretical_loss": 3.4687611289927336, + "tokens_seen": 1932263424 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004185523992938533, + "loss": 0.0683, + "theoretical_loss": 3.468721952997012, + "tokens_seen": 1932525568 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004184721553522709, + "loss": 0.0684, + "theoretical_loss": 3.4686827838028256, + "tokens_seen": 1932787712 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004183919114106885, + "loss": 0.0692, + "theoretical_loss": 3.4686436214080727, + "tokens_seen": 1933049856 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.00045794775360263884, + "objective/train/docs_used": 705162, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3477123975753784, + "objective/train/original_loss": 1.3477123975753784, + "objective/train/theoretical_loss": 3.4686044658106496, + "objective/train/tokens_used": 1953772000, + "objective/train/value_avg": -0.0085296630859375, + "objective/train/value_loss": 0.00025771657237783074, + "objective/train/value_max": -6.401538848876953e-05, + "objective/train/value_min": -0.287841796875, + "objective/train/value_reward_corr": 0.7388518675620712, + "objective/train/value_std": 0.016693115234375, + "objective/train/weight_avg": 1.000576376914978, + "objective/train/weighted_lm_loss": 1.3479864597320557, + "objective/train/weights_max": 1.1910806894302368, + "objective/train/weights_min": 0.36963358521461487, + "theoretical_loss": 3.4686044658106496, + "tokens_seen": 1933312000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041831166746910607, + "loss": 0.0664, + "theoretical_loss": 3.4686044658106496, + "tokens_seen": 1933312000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004182314235275237, + "loss": 0.0669, + "theoretical_loss": 3.468565317008456, + "tokens_seen": 1933574144 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004181511795859413, + "loss": 0.0713, + "theoretical_loss": 3.4685261749993908, + "tokens_seen": 1933836288 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004180709356443589, + "loss": 0.0658, + "theoretical_loss": 3.4684870397813556, + "tokens_seen": 1934098432 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004179906917027764, + "loss": 0.0698, + "theoretical_loss": 3.4684479113522517, + "tokens_seen": 1934360576 + }, + { + "epoch": 0.59, + "learning_rate": 0.000417910447761194, + "loss": 0.0688, + "theoretical_loss": 3.4684087897099816, + "tokens_seen": 1934622720 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041783020381961164, + "loss": 0.0684, + "theoretical_loss": 3.4683696748524486, + "tokens_seen": 1934884864 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004177499598780292, + "loss": 0.0709, + "theoretical_loss": 3.468330566777557, + "tokens_seen": 1935147008 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004176697159364468, + "loss": 0.0677, + "theoretical_loss": 3.4682914654832118, + "tokens_seen": 1935409152 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004175894719948644, + "loss": 0.0651, + "theoretical_loss": 3.46825237096732, + "tokens_seen": 1935671296 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041750922805328203, + "loss": 0.067, + "theoretical_loss": 3.4682132832277883, + "tokens_seen": 1935933440 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041742898411169954, + "loss": 0.069, + "theoretical_loss": 3.4681742022625253, + "tokens_seen": 1936195584 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041734874017011715, + "loss": 0.0737, + "theoretical_loss": 3.468135128069439, + "tokens_seen": 1936457728 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.0009253130410797894, + "objective/train/docs_used": 706032, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.303908348083496, + "objective/train/original_loss": 1.303908109664917, + "objective/train/theoretical_loss": 3.4681155935118095, + "objective/train/tokens_used": 1957048800, + "objective/train/value_avg": -0.00952911376953125, + "objective/train/value_loss": 0.00012180649355286732, + "objective/train/value_max": -6.812810897827148e-05, + "objective/train/value_min": -0.23779296875, + "objective/train/value_reward_corr": 0.7694273438548433, + "objective/train/value_std": 0.01409149169921875, + "objective/train/weight_avg": 1.0009859800338745, + "objective/train/weighted_lm_loss": 1.3054463863372803, + "objective/train/weights_max": 1.2155041694641113, + "objective/train/weights_min": 0.8359750509262085, + "theoretical_loss": 3.4681155935118095, + "tokens_seen": 1936588800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041726849622853476, + "loss": 0.0689, + "theoretical_loss": 3.4680960606464404, + "tokens_seen": 1936719872 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004171882522869523, + "loss": 0.0692, + "theoretical_loss": 3.4680569999914397, + "tokens_seen": 1936982016 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041710800834536993, + "loss": 0.0678, + "theoretical_loss": 3.468017946102349, + "tokens_seen": 1937244160 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041702776440378755, + "loss": 0.0692, + "theoretical_loss": 3.467978898977081, + "tokens_seen": 1937506304 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041694752046220516, + "loss": 0.0699, + "theoretical_loss": 3.4679398586135486, + "tokens_seen": 1937768448 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004168672765206227, + "loss": 0.0678, + "theoretical_loss": 3.467900825009668, + "tokens_seen": 1938030592 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004167870325790403, + "loss": 0.0695, + "theoretical_loss": 3.4678617981633533, + "tokens_seen": 1938292736 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004167067886374579, + "loss": 0.0682, + "theoretical_loss": 3.467822778072521, + "tokens_seen": 1938554880 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041662654469587545, + "loss": 0.0678, + "theoretical_loss": 3.4677837647350898, + "tokens_seen": 1938817024 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041654630075429306, + "loss": 0.0681, + "theoretical_loss": 3.467744758148976, + "tokens_seen": 1939079168 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041646605681271067, + "loss": 0.0684, + "theoretical_loss": 3.4677057583121007, + "tokens_seen": 1939341312 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041638581287112823, + "loss": 0.0688, + "theoretical_loss": 3.467666765222383, + "tokens_seen": 1939603456 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.00017141261196229607, + "objective/train/docs_used": 707259, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3940285444259644, + "objective/train/original_loss": 1.3940284252166748, + "objective/train/theoretical_loss": 3.4676277788777443, + "objective/train/tokens_used": 1960325600, + "objective/train/value_avg": -0.009735107421875, + "objective/train/value_loss": 0.00045788646093569696, + "objective/train/value_max": -4.035234451293945e-05, + "objective/train/value_min": -0.48828125, + "objective/train/value_reward_corr": 0.5960321884519733, + "objective/train/value_std": 0.0169525146484375, + "objective/train/weight_avg": 1.0003608465194702, + "objective/train/weighted_lm_loss": 1.394312858581543, + "objective/train/weights_max": 1.5537357330322266, + "objective/train/weights_min": 0.23123404383659363, + "theoretical_loss": 3.4676277788777443, + "tokens_seen": 1939865600 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041630556892954584, + "loss": 0.0683, + "theoretical_loss": 3.4676277788777443, + "tokens_seen": 1939865600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004162253249879634, + "loss": 0.0683, + "theoretical_loss": 3.467588799276106, + "tokens_seen": 1940127744 + }, + { + "epoch": 0.59, + "learning_rate": 0.000416145081046381, + "loss": 0.068, + "theoretical_loss": 3.4675498264153912, + "tokens_seen": 1940389888 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041606483710479857, + "loss": 0.0695, + "theoretical_loss": 3.4675108602935243, + "tokens_seen": 1940652032 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004159845931632162, + "loss": 0.0662, + "theoretical_loss": 3.4674719009084294, + "tokens_seen": 1940914176 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004159043492216338, + "loss": 0.0693, + "theoretical_loss": 3.4674329482580326, + "tokens_seen": 1941176320 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041582410528005135, + "loss": 0.0666, + "theoretical_loss": 3.46739400234026, + "tokens_seen": 1941438464 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041574386133846897, + "loss": 0.0705, + "theoretical_loss": 3.467355063153039, + "tokens_seen": 1941700608 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004156636173968865, + "loss": 0.0647, + "theoretical_loss": 3.467316130694299, + "tokens_seen": 1941962752 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041558337345530414, + "loss": 0.0664, + "theoretical_loss": 3.467277204961968, + "tokens_seen": 1942224896 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004155031295137217, + "loss": 0.067, + "theoretical_loss": 3.467238285953977, + "tokens_seen": 1942487040 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004154228855721393, + "loss": 0.0696, + "theoretical_loss": 3.467199373668257, + "tokens_seen": 1942749184 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004153426416305569, + "loss": 0.0669, + "theoretical_loss": 3.4671604681027404, + "tokens_seen": 1943011328 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.0002338496851734817, + "objective/train/docs_used": 708379, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3313257694244385, + "objective/train/original_loss": 1.3313257694244385, + "objective/train/theoretical_loss": 3.4671410178394115, + "objective/train/tokens_used": 1963602400, + "objective/train/value_avg": -0.0081024169921875, + "objective/train/value_loss": 0.0004016077728010714, + "objective/train/value_max": -6.866455078125e-05, + "objective/train/value_min": -0.88134765625, + "objective/train/value_reward_corr": 0.6732135705871113, + "objective/train/value_std": 0.0167083740234375, + "objective/train/weight_avg": 1.000407338142395, + "objective/train/weighted_lm_loss": 1.3311095237731934, + "objective/train/weights_max": 1.3798331022262573, + "objective/train/weights_min": 0.3778006136417389, + "theoretical_loss": 3.4671410178394115, + "tokens_seen": 1943142400 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004152623976889745, + "loss": 0.0706, + "theoretical_loss": 3.467121569255359, + "tokens_seen": 1943273472 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004151821537473921, + "loss": 0.0679, + "theoretical_loss": 3.4670826771240484, + "tokens_seen": 1943535616 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004151019098058097, + "loss": 0.071, + "theoretical_loss": 3.4670437917067423, + "tokens_seen": 1943797760 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041502166586422726, + "loss": 0.0686, + "theoretical_loss": 3.4670049130013765, + "tokens_seen": 1944059904 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004149414219226448, + "loss": 0.0657, + "theoretical_loss": 3.466966041005888, + "tokens_seen": 1944322048 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041486117798106244, + "loss": 0.0695, + "theoretical_loss": 3.4669271757182143, + "tokens_seen": 1944584192 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041478093403948005, + "loss": 0.0688, + "theoretical_loss": 3.466888317136293, + "tokens_seen": 1944846336 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004147006900978976, + "loss": 0.0678, + "theoretical_loss": 3.466849465258065, + "tokens_seen": 1945108480 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004146204461563152, + "loss": 0.068, + "theoretical_loss": 3.4668106200814695, + "tokens_seen": 1945370624 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041454020221473283, + "loss": 0.0677, + "theoretical_loss": 3.466771781604448, + "tokens_seen": 1945632768 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041445995827315034, + "loss": 0.0663, + "theoretical_loss": 3.4667329498249426, + "tokens_seen": 1945894912 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041437971433156795, + "loss": 0.0673, + "theoretical_loss": 3.466694124740896, + "tokens_seen": 1946157056 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.0006498626316897571, + "objective/train/docs_used": 709628, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3628689050674438, + "objective/train/original_loss": 1.3628687858581543, + "objective/train/theoretical_loss": 3.466655306350253, + "objective/train/tokens_used": 1966879200, + "objective/train/value_avg": -0.00696563720703125, + "objective/train/value_loss": 0.0002535603125579655, + "objective/train/value_max": -5.829334259033203e-05, + "objective/train/value_min": -0.6953125, + "objective/train/value_reward_corr": 0.7165104923699819, + "objective/train/value_std": 0.017547607421875, + "objective/train/weight_avg": 1.0007662773132324, + "objective/train/weighted_lm_loss": 1.3636950254440308, + "objective/train/weights_max": 1.7379988431930542, + "objective/train/weights_min": 0.38149651885032654, + "theoretical_loss": 3.466655306350253, + "tokens_seen": 1946419200 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041429947038998556, + "loss": 0.0681, + "theoretical_loss": 3.466655306350253, + "tokens_seen": 1946419200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004142192264484032, + "loss": 0.068, + "theoretical_loss": 3.4666164946509572, + "tokens_seen": 1946681344 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041413898250682073, + "loss": 0.0695, + "theoretical_loss": 3.466577689640955, + "tokens_seen": 1946943488 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041405873856523834, + "loss": 0.0694, + "theoretical_loss": 3.4665388913181934, + "tokens_seen": 1947205632 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041397849462365596, + "loss": 0.0666, + "theoretical_loss": 3.4665000996806192, + "tokens_seen": 1947467776 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041389825068207346, + "loss": 0.0685, + "theoretical_loss": 3.466461314726182, + "tokens_seen": 1947729920 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004138180067404911, + "loss": 0.0671, + "theoretical_loss": 3.4664225364528294, + "tokens_seen": 1947992064 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004137377627989087, + "loss": 0.0696, + "theoretical_loss": 3.466383764858513, + "tokens_seen": 1948254208 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004136575188573263, + "loss": 0.0689, + "theoretical_loss": 3.466344999941184, + "tokens_seen": 1948516352 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041357727491574386, + "loss": 0.0684, + "theoretical_loss": 3.4663062416987938, + "tokens_seen": 1948778496 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041349703097416147, + "loss": 0.0698, + "theoretical_loss": 3.4662674901292956, + "tokens_seen": 1949040640 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004134167870325791, + "loss": 0.0638, + "theoretical_loss": 3.4662287452306435, + "tokens_seen": 1949302784 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041333654309099664, + "loss": 0.0681, + "theoretical_loss": 3.466190007000792, + "tokens_seen": 1949564928 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.0006561456248164177, + "objective/train/docs_used": 710798, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3777148723602295, + "objective/train/original_loss": 1.3777148723602295, + "objective/train/theoretical_loss": 3.4661706403860277, + "objective/train/tokens_used": 1970156000, + "objective/train/value_avg": -0.00855255126953125, + "objective/train/value_loss": 0.00019631710893008858, + "objective/train/value_max": -5.346536636352539e-05, + "objective/train/value_min": -0.379150390625, + "objective/train/value_reward_corr": 0.7235816315595381, + "objective/train/value_std": 0.0145263671875, + "objective/train/weight_avg": 1.0007461309432983, + "objective/train/weighted_lm_loss": 1.379103422164917, + "objective/train/weights_max": 1.2311376333236694, + "objective/train/weights_min": 0.3972630202770233, + "theoretical_loss": 3.4661706403860277, + "tokens_seen": 1949696000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004132562991494142, + "loss": 0.0712, + "theoretical_loss": 3.466151275437697, + "tokens_seen": 1949827072 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004131760552078318, + "loss": 0.066, + "theoretical_loss": 3.4661125505393153, + "tokens_seen": 1950089216 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004130958112662494, + "loss": 0.0688, + "theoretical_loss": 3.4660738323036036, + "tokens_seen": 1950351360 + }, + { + "epoch": 0.59, + "learning_rate": 0.000413015567324667, + "loss": 0.0704, + "theoretical_loss": 3.466035120728521, + "tokens_seen": 1950613504 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004129353233830846, + "loss": 0.0669, + "theoretical_loss": 3.465996415812027, + "tokens_seen": 1950875648 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004128550794415022, + "loss": 0.0698, + "theoretical_loss": 3.4659577175520813, + "tokens_seen": 1951137792 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041277483549991977, + "loss": 0.0668, + "theoretical_loss": 3.4659190259466444, + "tokens_seen": 1951399936 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004126945915583373, + "loss": 0.0672, + "theoretical_loss": 3.46588034099368, + "tokens_seen": 1951662080 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041261434761675494, + "loss": 0.069, + "theoretical_loss": 3.465841662691149, + "tokens_seen": 1951924224 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004125341036751725, + "loss": 0.0673, + "theoretical_loss": 3.465802991037016, + "tokens_seen": 1952186368 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004124538597335901, + "loss": 0.0702, + "theoretical_loss": 3.4657643260292463, + "tokens_seen": 1952448512 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004123736157920077, + "loss": 0.0688, + "theoretical_loss": 3.4657256676658053, + "tokens_seen": 1952710656 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 0.0003945839125663042, + "objective/train/docs_used": 712021, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.313441514968872, + "objective/train/original_loss": 1.3134416341781616, + "objective/train/theoretical_loss": 3.4656870159446584, + "objective/train/tokens_used": 1973432800, + "objective/train/value_avg": -0.00843048095703125, + "objective/train/value_loss": 0.0001438377657905221, + "objective/train/value_max": -4.988908767700195e-05, + "objective/train/value_min": -0.321044921875, + "objective/train/value_reward_corr": 0.715787808037206, + "objective/train/value_std": 0.0128021240234375, + "objective/train/weight_avg": 1.0004626512527466, + "objective/train/weighted_lm_loss": 1.313431739807129, + "objective/train/weights_max": 1.2099876403808594, + "objective/train/weights_min": 0.4170147180557251, + "theoretical_loss": 3.4656870159446584, + "tokens_seen": 1952972800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041229337185042533, + "loss": 0.0688, + "theoretical_loss": 3.4656870159446584, + "tokens_seen": 1952972800 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004122131279088429, + "loss": 0.0676, + "theoretical_loss": 3.465648370863774, + "tokens_seen": 1953234944 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004121328839672605, + "loss": 0.0663, + "theoretical_loss": 3.46560973242112, + "tokens_seen": 1953497088 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041205264002567806, + "loss": 0.0695, + "theoretical_loss": 3.4655711006146657, + "tokens_seen": 1953759232 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004119723960840956, + "loss": 0.0703, + "theoretical_loss": 3.465532475442381, + "tokens_seen": 1954021376 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041189215214251323, + "loss": 0.0686, + "theoretical_loss": 3.4654938569022375, + "tokens_seen": 1954283520 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041181190820093085, + "loss": 0.0685, + "theoretical_loss": 3.4654552449922056, + "tokens_seen": 1954545664 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041173166425934846, + "loss": 0.0698, + "theoretical_loss": 3.4654166397102593, + "tokens_seen": 1954807808 + }, + { + "epoch": 0.59, + "learning_rate": 0.000411651420317766, + "loss": 0.0692, + "theoretical_loss": 3.4653780410543717, + "tokens_seen": 1955069952 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041157117637618363, + "loss": 0.0688, + "theoretical_loss": 3.465339449022517, + "tokens_seen": 1955332096 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004114909324346012, + "loss": 0.0703, + "theoretical_loss": 3.4653008636126716, + "tokens_seen": 1955594240 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041141068849301875, + "loss": 0.0684, + "theoretical_loss": 3.4652622848228107, + "tokens_seen": 1955856384 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041133044455143636, + "loss": 0.0689, + "theoretical_loss": 3.4652237126509124, + "tokens_seen": 1956118528 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": -0.0006969102541916072, + "objective/train/docs_used": 713197, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3060723543167114, + "objective/train/original_loss": 1.3060722351074219, + "objective/train/theoretical_loss": 3.465204429046067, + "objective/train/tokens_used": 1976709600, + "objective/train/value_avg": -0.006160736083984375, + "objective/train/value_loss": 0.0003185392706654966, + "objective/train/value_max": -4.267692565917969e-05, + "objective/train/value_min": -0.393798828125, + "objective/train/value_reward_corr": 0.663370985817207, + "objective/train/value_std": 0.01360321044921875, + "objective/train/weight_avg": 0.9994413256645203, + "objective/train/weighted_lm_loss": 1.3050378561019897, + "objective/train/weights_max": 1.3730344772338867, + "objective/train/weights_min": 0.3702559173107147, + "theoretical_loss": 3.465204429046067, + "tokens_seen": 1956249600 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041125020060985397, + "loss": 0.0693, + "theoretical_loss": 3.465185147094954, + "tokens_seen": 1956380672 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004111699566682716, + "loss": 0.0694, + "theoretical_loss": 3.465146588152915, + "tokens_seen": 1956642816 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041108971272668914, + "loss": 0.068, + "theoretical_loss": 3.465108035822775, + "tokens_seen": 1956904960 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041100946878510676, + "loss": 0.0701, + "theoretical_loss": 3.4650694901025147, + "tokens_seen": 1957167104 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004109292248435243, + "loss": 0.067, + "theoretical_loss": 3.4650309509901156, + "tokens_seen": 1957429248 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004108489809019419, + "loss": 0.0722, + "theoretical_loss": 3.4649924184835603, + "tokens_seen": 1957691392 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004107687369603595, + "loss": 0.0705, + "theoretical_loss": 3.4649538925808328, + "tokens_seen": 1957953536 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004106884930187771, + "loss": 0.0689, + "theoretical_loss": 3.4649153732799167, + "tokens_seen": 1958215680 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004106082490771947, + "loss": 0.0674, + "theoretical_loss": 3.464876860578797, + "tokens_seen": 1958477824 + }, + { + "epoch": 0.59, + "learning_rate": 0.00041052800513561227, + "loss": 0.0702, + "theoretical_loss": 3.46483835447546, + "tokens_seen": 1958739968 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004104477611940299, + "loss": 0.0676, + "theoretical_loss": 3.464799854967893, + "tokens_seen": 1959002112 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004103675172524475, + "loss": 0.0697, + "theoretical_loss": 3.464761362054084, + "tokens_seen": 1959264256 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": -0.0016641940455883741, + "objective/train/docs_used": 714425, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4409244060516357, + "objective/train/original_loss": 1.4409245252609253, + "objective/train/theoretical_loss": 3.4647228757320203, + "objective/train/tokens_used": 1979986400, + "objective/train/value_avg": -0.0134735107421875, + "objective/train/value_loss": 0.0007892985595390201, + "objective/train/value_max": -4.100799560546875e-05, + "objective/train/value_min": -0.98095703125, + "objective/train/value_reward_corr": 0.7946839372874429, + "objective/train/value_std": 0.033355712890625, + "objective/train/weight_avg": 0.9987018704414368, + "objective/train/weighted_lm_loss": 1.4399670362472534, + "objective/train/weights_max": 2.14656138420105, + "objective/train/weights_min": 0.3712770938873291, + "theoretical_loss": 3.4647228757320203, + "tokens_seen": 1959526400 + }, + { + "epoch": 0.59, + "learning_rate": 0.000410287273310865, + "loss": 0.0681, + "theoretical_loss": 3.4647228757320203, + "tokens_seen": 1959526400 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004102070293692826, + "loss": 0.0698, + "theoretical_loss": 3.4646843959996927, + "tokens_seen": 1959788544 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004101267854277002, + "loss": 0.0676, + "theoretical_loss": 3.464645922855091, + "tokens_seen": 1960050688 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004100465414861178, + "loss": 0.0699, + "theoretical_loss": 3.464607456296207, + "tokens_seen": 1960312832 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004099662975445354, + "loss": 0.0708, + "theoretical_loss": 3.464568996321033, + "tokens_seen": 1960574976 + }, + { + "epoch": 0.59, + "learning_rate": 0.000409886053602953, + "loss": 0.0702, + "theoretical_loss": 3.4645305429275624, + "tokens_seen": 1960837120 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004098058096613706, + "loss": 0.0694, + "theoretical_loss": 3.464492096113788, + "tokens_seen": 1961099264 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004097255657197881, + "loss": 0.0695, + "theoretical_loss": 3.4644536558777057, + "tokens_seen": 1961361408 + }, + { + "epoch": 0.59, + "learning_rate": 0.00040964532177820574, + "loss": 0.0701, + "theoretical_loss": 3.4644152222173106, + "tokens_seen": 1961623552 + }, + { + "epoch": 0.59, + "learning_rate": 0.00040956507783662335, + "loss": 0.0694, + "theoretical_loss": 3.4643767951305997, + "tokens_seen": 1961885696 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004094848338950409, + "loss": 0.0666, + "theoretical_loss": 3.4643383746155703, + "tokens_seen": 1962147840 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004094045899534585, + "loss": 0.0677, + "theoretical_loss": 3.464299960670221, + "tokens_seen": 1962409984 + }, + { + "epoch": 0.59, + "learning_rate": 0.00040932434601187613, + "loss": 0.0692, + "theoretical_loss": 3.464261553292551, + "tokens_seen": 1962672128 + }, + { + "epoch": 0.59, + "objective/train/advantage_avg": 9.944574412656948e-05, + "objective/train/docs_used": 715564, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2323715686798096, + "objective/train/original_loss": 1.2323713302612305, + "objective/train/theoretical_loss": 3.464242352065971, + "objective/train/tokens_used": 1983263200, + "objective/train/value_avg": -0.0050811767578125, + "objective/train/value_loss": 0.00013055642193648964, + "objective/train/value_max": -4.13060188293457e-05, + "objective/train/value_min": -0.2900390625, + "objective/train/value_reward_corr": 0.657465772340293, + "objective/train/value_std": 0.00870513916015625, + "objective/train/weight_avg": 1.0001602172851562, + "objective/train/weighted_lm_loss": 1.2325642108917236, + "objective/train/weights_max": 1.0780946016311646, + "objective/train/weights_min": 0.3836330473423004, + "theoretical_loss": 3.464242352065971, + "tokens_seen": 1962803200 + }, + { + "epoch": 0.59, + "learning_rate": 0.00040924410207029375, + "loss": 0.0697, + "theoretical_loss": 3.4642231524805607, + "tokens_seen": 1962934272 + }, + { + "epoch": 0.59, + "learning_rate": 0.00040916385812871125, + "loss": 0.0704, + "theoretical_loss": 3.4641847582322507, + "tokens_seen": 1963196416 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040908361418712886, + "loss": 0.0711, + "theoretical_loss": 3.4641463705456226, + "tokens_seen": 1963458560 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004090033702455465, + "loss": 0.0704, + "theoretical_loss": 3.4641079894186797, + "tokens_seen": 1963720704 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040892312630396403, + "loss": 0.0674, + "theoretical_loss": 3.4640696148494254, + "tokens_seen": 1963982848 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040884288236238165, + "loss": 0.068, + "theoretical_loss": 3.464031246835864, + "tokens_seen": 1964244992 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040876263842079926, + "loss": 0.0684, + "theoretical_loss": 3.4639928853760016, + "tokens_seen": 1964507136 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040868239447921687, + "loss": 0.0689, + "theoretical_loss": 3.463954530467844, + "tokens_seen": 1964769280 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040860215053763443, + "loss": 0.0671, + "theoretical_loss": 3.463916182109398, + "tokens_seen": 1965031424 + }, + { + "epoch": 0.6, + "learning_rate": 0.000408521906596052, + "loss": 0.0686, + "theoretical_loss": 3.463877840298672, + "tokens_seen": 1965293568 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004084416626544696, + "loss": 0.0683, + "theoretical_loss": 3.4638395050336745, + "tokens_seen": 1965555712 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040836141871288716, + "loss": 0.0697, + "theoretical_loss": 3.4638011763124164, + "tokens_seen": 1965817856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7394139103740143, + "debugging/entropy-1-grams": 5.1329642330953185, + "debugging/length": 486.55555555555554, + "debugging/num_segments": 9, + "debugging/raw_token_scores_avg": 0.005027866922318935, + "debugging/raw_token_scores_std": 0.022307263687253, + "debugging/score": 0.00915515707107717, + "debugging/score_std": 0.01661134296838666, + "epoch": 0.6, + "objective/train/advantage_avg": 0.0009648797567933798, + "objective/train/docs_used": 716760, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4830039739608765, + "objective/train/original_loss": 1.483003854751587, + "objective/train/theoretical_loss": 3.4637628541329066, + "objective/train/tokens_used": 1986540000, + "objective/train/value_avg": -0.005992889404296875, + "objective/train/value_loss": 0.000370719499187544, + "objective/train/value_max": -4.947185516357422e-05, + "objective/train/value_min": -0.92724609375, + "objective/train/value_reward_corr": 0.5103701637010936, + "objective/train/value_std": 0.0126495361328125, + "objective/train/weight_avg": 1.0010908842086792, + "objective/train/weighted_lm_loss": 1.4844170808792114, + "objective/train/weights_max": 2.2417259216308594, + "objective/train/weights_min": 0.06188541650772095, + "theoretical_loss": 3.4637628541329066, + "tokens_seen": 1966080000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040828117477130477, + "loss": 0.0681, + "theoretical_loss": 3.4637628541329066, + "tokens_seen": 1966080000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004082009308297224, + "loss": 0.0665, + "theoretical_loss": 3.4637245384931576, + "tokens_seen": 1966342144 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040812068688813994, + "loss": 0.0694, + "theoretical_loss": 3.4636862293911816, + "tokens_seen": 1966604288 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040804044294655756, + "loss": 0.0678, + "theoretical_loss": 3.463647926824992, + "tokens_seen": 1966866432 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004079601990049751, + "loss": 0.0692, + "theoretical_loss": 3.4636096307926016, + "tokens_seen": 1967128576 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004078799550633927, + "loss": 0.0694, + "theoretical_loss": 3.4635713412920275, + "tokens_seen": 1967390720 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004077997111218103, + "loss": 0.0703, + "theoretical_loss": 3.4635330583212838, + "tokens_seen": 1967652864 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004077194671802279, + "loss": 0.0693, + "theoretical_loss": 3.463494781878388, + "tokens_seen": 1967915008 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004076392232386455, + "loss": 0.0716, + "theoretical_loss": 3.4634565119613576, + "tokens_seen": 1968177152 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040755897929706307, + "loss": 0.0698, + "theoretical_loss": 3.463418248568211, + "tokens_seen": 1968439296 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004074787353554807, + "loss": 0.0672, + "theoretical_loss": 3.463379991696967, + "tokens_seen": 1968701440 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004073984914138983, + "loss": 0.0682, + "theoretical_loss": 3.463341741345646, + "tokens_seen": 1968963584 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040731824747231585, + "loss": 0.0663, + "theoretical_loss": 3.4633034975122694, + "tokens_seen": 1969225728 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0015214718878269196, + "objective/train/docs_used": 718017, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4403096437454224, + "objective/train/original_loss": 1.4403096437454224, + "objective/train/theoretical_loss": 3.4632843780391918, + "objective/train/tokens_used": 1989816800, + "objective/train/value_avg": -0.005222320556640625, + "objective/train/value_loss": 0.0001249258202733472, + "objective/train/value_max": -1.7344951629638672e-05, + "objective/train/value_min": -0.24560546875, + "objective/train/value_reward_corr": 0.6257195538265397, + "objective/train/value_std": 0.00986480712890625, + "objective/train/weight_avg": 1.0015816688537598, + "objective/train/weighted_lm_loss": 1.442775845527649, + "objective/train/weights_max": 1.1413390636444092, + "objective/train/weights_min": 0.7176816463470459, + "theoretical_loss": 3.4632843780391918, + "tokens_seen": 1969356800 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004072380035307334, + "loss": 0.0693, + "theoretical_loss": 3.4632652601948593, + "tokens_seen": 1969487872 + }, + { + "epoch": 0.6, + "learning_rate": 0.000407157759589151, + "loss": 0.0684, + "theoretical_loss": 3.463227029391437, + "tokens_seen": 1969750016 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040707751564756864, + "loss": 0.0695, + "theoretical_loss": 3.4631888051000272, + "tokens_seen": 1970012160 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004069972717059862, + "loss": 0.0671, + "theoretical_loss": 3.4631505873186548, + "tokens_seen": 1970274304 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004069170277644038, + "loss": 0.0671, + "theoretical_loss": 3.463112376045344, + "tokens_seen": 1970536448 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004068367838228214, + "loss": 0.0678, + "theoretical_loss": 3.463074171278122, + "tokens_seen": 1970798592 + }, + { + "epoch": 0.6, + "learning_rate": 0.000406756539881239, + "loss": 0.0678, + "theoretical_loss": 3.4630359730150153, + "tokens_seen": 1971060736 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040667629593965654, + "loss": 0.0695, + "theoretical_loss": 3.4629977812540518, + "tokens_seen": 1971322880 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040659605199807415, + "loss": 0.0711, + "theoretical_loss": 3.46295959599326, + "tokens_seen": 1971585024 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040651580805649176, + "loss": 0.0696, + "theoretical_loss": 3.4629214172306706, + "tokens_seen": 1971847168 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004064355641149093, + "loss": 0.0704, + "theoretical_loss": 3.462883244964313, + "tokens_seen": 1972109312 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040635532017332693, + "loss": 0.0662, + "theoretical_loss": 3.4628450791922187, + "tokens_seen": 1972371456 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0003923242911696434, + "objective/train/docs_used": 719209, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2555794715881348, + "objective/train/original_loss": 1.2555794715881348, + "objective/train/theoretical_loss": 3.4628069199124205, + "objective/train/tokens_used": 1993093600, + "objective/train/value_avg": -0.0065765380859375, + "objective/train/value_loss": 0.000190413833479397, + "objective/train/value_max": -4.13060188293457e-05, + "objective/train/value_min": -0.201171875, + "objective/train/value_reward_corr": 0.557396649936048, + "objective/train/value_std": 0.009490966796875, + "objective/train/weight_avg": 1.000475287437439, + "objective/train/weighted_lm_loss": 1.256247878074646, + "objective/train/weights_max": 1.151779294013977, + "objective/train/weights_min": 0.36899471282958984, + "theoretical_loss": 3.4628069199124205, + "tokens_seen": 1972633600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040627507623174455, + "loss": 0.0658, + "theoretical_loss": 3.4628069199124205, + "tokens_seen": 1972633600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040619483229016205, + "loss": 0.0676, + "theoretical_loss": 3.4627687671229515, + "tokens_seen": 1972895744 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040611458834857966, + "loss": 0.0686, + "theoretical_loss": 3.462730620821845, + "tokens_seen": 1973157888 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004060343444069973, + "loss": 0.0695, + "theoretical_loss": 3.462692481007136, + "tokens_seen": 1973420032 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004059541004654149, + "loss": 0.0678, + "theoretical_loss": 3.46265434767686, + "tokens_seen": 1973682176 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040587385652383245, + "loss": 0.0679, + "theoretical_loss": 3.4626162208290534, + "tokens_seen": 1973944320 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040579361258225006, + "loss": 0.0691, + "theoretical_loss": 3.462578100461754, + "tokens_seen": 1974206464 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040571336864066767, + "loss": 0.0689, + "theoretical_loss": 3.4625399865730007, + "tokens_seen": 1974468608 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040563312469908523, + "loss": 0.0671, + "theoretical_loss": 3.462501879160831, + "tokens_seen": 1974730752 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004055528807575028, + "loss": 0.0701, + "theoretical_loss": 3.462463778223285, + "tokens_seen": 1974992896 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004054726368159204, + "loss": 0.0682, + "theoretical_loss": 3.462425683758404, + "tokens_seen": 1975255040 + }, + { + "epoch": 0.6, + "learning_rate": 0.000405392392874338, + "loss": 0.0723, + "theoretical_loss": 3.4623875957642305, + "tokens_seen": 1975517184 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040531214893275557, + "loss": 0.0699, + "theoretical_loss": 3.462349514238805, + "tokens_seen": 1975779328 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0036880422849208117, + "objective/train/docs_used": 720450, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5445085763931274, + "objective/train/original_loss": 1.544508457183838, + "objective/train/theoretical_loss": 3.462330475901262, + "objective/train/tokens_used": 1996370400, + "objective/train/value_avg": -0.01209259033203125, + "objective/train/value_loss": 0.0005820911610499024, + "objective/train/value_max": -5.519390106201172e-05, + "objective/train/value_min": -0.8037109375, + "objective/train/value_reward_corr": 0.5987648461866432, + "objective/train/value_std": 0.0245361328125, + "objective/train/weight_avg": 1.0039633512496948, + "objective/train/weighted_lm_loss": 1.5502508878707886, + "objective/train/weights_max": 2.015129804611206, + "objective/train/weights_min": 0.34483224153518677, + "theoretical_loss": 3.462330475901262, + "tokens_seen": 1975910400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004052319049911732, + "loss": 0.0705, + "theoretical_loss": 3.462311439180173, + "tokens_seen": 1976041472 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004051516610495908, + "loss": 0.0677, + "theoretical_loss": 3.4622733705863764, + "tokens_seen": 1976303616 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040507141710800835, + "loss": 0.0653, + "theoretical_loss": 3.462235308455462, + "tokens_seen": 1976565760 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004049911731664259, + "loss": 0.0686, + "theoretical_loss": 3.4621972527854745, + "tokens_seen": 1976827904 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004049109292248435, + "loss": 0.0705, + "theoretical_loss": 3.462159203574461, + "tokens_seen": 1977090048 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040483068528326114, + "loss": 0.0686, + "theoretical_loss": 3.46212116082047, + "tokens_seen": 1977352192 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004047504413416787, + "loss": 0.0697, + "theoretical_loss": 3.4620831245215484, + "tokens_seen": 1977614336 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004046701974000963, + "loss": 0.069, + "theoretical_loss": 3.462045094675747, + "tokens_seen": 1977876480 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004045899534585139, + "loss": 0.0683, + "theoretical_loss": 3.462007071281114, + "tokens_seen": 1978138624 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004045097095169315, + "loss": 0.0666, + "theoretical_loss": 3.461969054335703, + "tokens_seen": 1978400768 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040442946557534904, + "loss": 0.0687, + "theoretical_loss": 3.461931043837563, + "tokens_seen": 1978662912 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040434922163376665, + "loss": 0.0682, + "theoretical_loss": 3.4618930397847487, + "tokens_seen": 1978925056 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0012460628058761358, + "objective/train/docs_used": 721537, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4055533409118652, + "objective/train/original_loss": 1.4055533409118652, + "objective/train/theoretical_loss": 3.461855042175312, + "objective/train/tokens_used": 1999647200, + "objective/train/value_avg": -0.005329132080078125, + "objective/train/value_loss": 0.0001690825738478452, + "objective/train/value_max": -6.014108657836914e-05, + "objective/train/value_min": -0.61181640625, + "objective/train/value_reward_corr": 0.6281343766334138, + "objective/train/value_std": 0.01203155517578125, + "objective/train/weight_avg": 1.0013235807418823, + "objective/train/weighted_lm_loss": 1.4071850776672363, + "objective/train/weights_max": 1.8437772989273071, + "objective/train/weights_min": 0.37389513850212097, + "theoretical_loss": 3.461855042175312, + "tokens_seen": 1979187200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004042689776921842, + "loss": 0.0678, + "theoretical_loss": 3.461855042175312, + "tokens_seen": 1979187200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004041887337506018, + "loss": 0.0687, + "theoretical_loss": 3.4618170510073085, + "tokens_seen": 1979449344 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040410848980901944, + "loss": 0.0696, + "theoretical_loss": 3.4617790662787935, + "tokens_seen": 1979711488 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040402824586743705, + "loss": 0.0725, + "theoretical_loss": 3.4617410879878223, + "tokens_seen": 1979973632 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004039480019258546, + "loss": 0.0694, + "theoretical_loss": 3.461703116132452, + "tokens_seen": 1980235776 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004038677579842722, + "loss": 0.0696, + "theoretical_loss": 3.46166515071074, + "tokens_seen": 1980497920 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004037875140426898, + "loss": 0.0684, + "theoretical_loss": 3.461627191720745, + "tokens_seen": 1980760064 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040370727010110734, + "loss": 0.0694, + "theoretical_loss": 3.461589239160528, + "tokens_seen": 1981022208 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040362702615952495, + "loss": 0.0681, + "theoretical_loss": 3.4615512930281467, + "tokens_seen": 1981284352 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040354678221794256, + "loss": 0.0676, + "theoretical_loss": 3.4615133533216635, + "tokens_seen": 1981546496 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004034665382763602, + "loss": 0.0681, + "theoretical_loss": 3.4614754200391404, + "tokens_seen": 1981808640 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040338629433477773, + "loss": 0.0657, + "theoretical_loss": 3.4614374931786402, + "tokens_seen": 1982070784 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040330605039319534, + "loss": 0.0668, + "theoretical_loss": 3.461399572738226, + "tokens_seen": 1982332928 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0009382269927300513, + "objective/train/docs_used": 722739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.329759955406189, + "objective/train/original_loss": 1.3297600746154785, + "objective/train/theoretical_loss": 3.4613806149249466, + "objective/train/tokens_used": 2002924000, + "objective/train/value_avg": -0.0085906982421875, + "objective/train/value_loss": 0.00015004277520347387, + "objective/train/value_max": -3.218650817871094e-05, + "objective/train/value_min": -0.362548828125, + "objective/train/value_reward_corr": 0.81791631267802, + "objective/train/value_std": 0.0171661376953125, + "objective/train/weight_avg": 1.0010125637054443, + "objective/train/weighted_lm_loss": 1.3312724828720093, + "objective/train/weights_max": 1.30023992061615, + "objective/train/weights_min": 0.8231871724128723, + "theoretical_loss": 3.4613806149249466, + "tokens_seen": 1982464000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004032258064516129, + "loss": 0.0687, + "theoretical_loss": 3.461361658715963, + "tokens_seen": 1982595072 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040314556251003046, + "loss": 0.0695, + "theoretical_loss": 3.4613237511099157, + "tokens_seen": 1982857216 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004030653185684481, + "loss": 0.0684, + "theoretical_loss": 3.4612858499181502, + "tokens_seen": 1983119360 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004029850746268657, + "loss": 0.0701, + "theoretical_loss": 3.4612479551387345, + "tokens_seen": 1983381504 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004029048306852833, + "loss": 0.0691, + "theoretical_loss": 3.461210066769736, + "tokens_seen": 1983643648 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040282458674370086, + "loss": 0.0688, + "theoretical_loss": 3.4611721848092225, + "tokens_seen": 1983905792 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040274434280211847, + "loss": 0.0691, + "theoretical_loss": 3.461134309255265, + "tokens_seen": 1984167936 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004026640988605361, + "loss": 0.0679, + "theoretical_loss": 3.4610964401059325, + "tokens_seen": 1984430080 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004025838549189536, + "loss": 0.0692, + "theoretical_loss": 3.461058577359297, + "tokens_seen": 1984692224 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004025036109773712, + "loss": 0.0683, + "theoretical_loss": 3.4610207210134294, + "tokens_seen": 1984954368 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004024233670357888, + "loss": 0.07, + "theoretical_loss": 3.4609828710664035, + "tokens_seen": 1985216512 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004023431230942064, + "loss": 0.0674, + "theoretical_loss": 3.460945027516293, + "tokens_seen": 1985478656 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0006598348263651133, + "objective/train/docs_used": 723995, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3495676517486572, + "objective/train/original_loss": 1.3495676517486572, + "objective/train/theoretical_loss": 3.460907190361172, + "objective/train/tokens_used": 2006200800, + "objective/train/value_avg": -0.01053619384765625, + "objective/train/value_loss": 0.0005898875533603132, + "objective/train/value_max": -5.4776668548583984e-05, + "objective/train/value_min": -0.9677734375, + "objective/train/value_reward_corr": 0.6037339334248357, + "objective/train/value_std": 0.02178955078125, + "objective/train/weight_avg": 1.0009301900863647, + "objective/train/weighted_lm_loss": 1.3499261140823364, + "objective/train/weights_max": 2.356096029281616, + "objective/train/weights_min": 0.3713054358959198, + "theoretical_loss": 3.460907190361172, + "tokens_seen": 1985740800 + }, + { + "epoch": 0.6, + "learning_rate": 0.000402262879152624, + "loss": 0.0675, + "theoretical_loss": 3.460907190361172, + "tokens_seen": 1985740800 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004021826352110416, + "loss": 0.068, + "theoretical_loss": 3.460869359599116, + "tokens_seen": 1986002944 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004021023912694592, + "loss": 0.0682, + "theoretical_loss": 3.460831535228201, + "tokens_seen": 1986265088 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004020221473278767, + "loss": 0.0694, + "theoretical_loss": 3.4607937172465046, + "tokens_seen": 1986527232 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004019419033862943, + "loss": 0.0671, + "theoretical_loss": 3.4607559056521033, + "tokens_seen": 1986789376 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040186165944471194, + "loss": 0.0689, + "theoretical_loss": 3.4607181004430774, + "tokens_seen": 1987051520 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004017814155031295, + "loss": 0.0683, + "theoretical_loss": 3.460680301617505, + "tokens_seen": 1987313664 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004017011715615471, + "loss": 0.069, + "theoretical_loss": 3.460642509173468, + "tokens_seen": 1987575808 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004016209276199647, + "loss": 0.0719, + "theoretical_loss": 3.4606047231090455, + "tokens_seen": 1987837952 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040154068367838233, + "loss": 0.0657, + "theoretical_loss": 3.4605669434223216, + "tokens_seen": 1988100096 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040146043973679984, + "loss": 0.0672, + "theoretical_loss": 3.4605291701113776, + "tokens_seen": 1988362240 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040138019579521745, + "loss": 0.0662, + "theoretical_loss": 3.4604914031742977, + "tokens_seen": 1988624384 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040129995185363506, + "loss": 0.0672, + "theoretical_loss": 3.460453642609166, + "tokens_seen": 1988886528 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0007240816485136747, + "objective/train/docs_used": 725238, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5036556720733643, + "objective/train/original_loss": 1.5036556720733643, + "objective/train/theoretical_loss": 3.4604347647154827, + "objective/train/tokens_used": 2009477600, + "objective/train/value_avg": -0.005214691162109375, + "objective/train/value_loss": 8.880392124410719e-05, + "objective/train/value_max": -2.6285648345947266e-05, + "objective/train/value_min": -0.1871337890625, + "objective/train/value_reward_corr": 0.6710600827464257, + "objective/train/value_std": 0.00882720947265625, + "objective/train/weight_avg": 1.000767707824707, + "objective/train/weighted_lm_loss": 1.5045437812805176, + "objective/train/weights_max": 1.109333872795105, + "objective/train/weights_min": 0.7807214856147766, + "theoretical_loss": 3.4604347647154827, + "tokens_seen": 1989017600 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004012197079120526, + "loss": 0.0694, + "theoretical_loss": 3.4604158884140683, + "tokens_seen": 1989148672 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040113946397047023, + "loss": 0.0685, + "theoretical_loss": 3.460378140587091, + "tokens_seen": 1989410816 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040105922002888785, + "loss": 0.0694, + "theoretical_loss": 3.46034039912632, + "tokens_seen": 1989672960 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040097897608730546, + "loss": 0.0662, + "theoretical_loss": 3.460302664029844, + "tokens_seen": 1989935104 + }, + { + "epoch": 0.6, + "learning_rate": 0.000400898732145723, + "loss": 0.0708, + "theoretical_loss": 3.4602649352957515, + "tokens_seen": 1990197248 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004008184882041406, + "loss": 0.0676, + "theoretical_loss": 3.460227212922131, + "tokens_seen": 1990459392 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004007382442625582, + "loss": 0.0693, + "theoretical_loss": 3.4601894969070743, + "tokens_seen": 1990721536 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040065800032097575, + "loss": 0.067, + "theoretical_loss": 3.460151787248672, + "tokens_seen": 1990983680 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040057775637939336, + "loss": 0.0682, + "theoretical_loss": 3.460114083945015, + "tokens_seen": 1991245824 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040049751243781097, + "loss": 0.069, + "theoretical_loss": 3.4600763869941966, + "tokens_seen": 1991507968 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004004172684962286, + "loss": 0.0687, + "theoretical_loss": 3.460038696394311, + "tokens_seen": 1991770112 + }, + { + "epoch": 0.6, + "learning_rate": 0.00040033702455464614, + "loss": 0.0706, + "theoretical_loss": 3.4600010121434517, + "tokens_seen": 1992032256 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.00020876179041806608, + "objective/train/docs_used": 726441, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.400139570236206, + "objective/train/original_loss": 1.400139570236206, + "objective/train/theoretical_loss": 3.4599633342397142, + "objective/train/tokens_used": 2012754400, + "objective/train/value_avg": -0.008514404296875, + "objective/train/value_loss": 0.00025450316024944186, + "objective/train/value_max": -5.9664249420166016e-05, + "objective/train/value_min": -0.28369140625, + "objective/train/value_reward_corr": 0.7200322547456051, + "objective/train/value_std": 0.0171051025390625, + "objective/train/weight_avg": 1.0003241300582886, + "objective/train/weighted_lm_loss": 1.4006654024124146, + "objective/train/weights_max": 1.277927041053772, + "objective/train/weights_min": 0.36856982111930847, + "theoretical_loss": 3.4599633342397142, + "tokens_seen": 1992294400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004002567806130637, + "loss": 0.068, + "theoretical_loss": 3.4599633342397142, + "tokens_seen": 1992294400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004001765366714813, + "loss": 0.0683, + "theoretical_loss": 3.4599256626811945, + "tokens_seen": 1992556544 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004000962927298989, + "loss": 0.0682, + "theoretical_loss": 3.45988799746599, + "tokens_seen": 1992818688 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004000160487883165, + "loss": 0.0675, + "theoretical_loss": 3.4598503385921977, + "tokens_seen": 1993080832 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003999358048467341, + "loss": 0.0676, + "theoretical_loss": 3.4598126860579166, + "tokens_seen": 1993342976 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039985556090515166, + "loss": 0.0673, + "theoretical_loss": 3.4597750398612455, + "tokens_seen": 1993605120 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039977531696356927, + "loss": 0.0702, + "theoretical_loss": 3.459737400000284, + "tokens_seen": 1993867264 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039969507302198683, + "loss": 0.0675, + "theoretical_loss": 3.4596997664731344, + "tokens_seen": 1994129408 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039961482908040444, + "loss": 0.0686, + "theoretical_loss": 3.4596621392778983, + "tokens_seen": 1994391552 + }, + { + "epoch": 0.6, + "learning_rate": 0.000399534585138822, + "loss": 0.0686, + "theoretical_loss": 3.459624518412677, + "tokens_seen": 1994653696 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003994543411972396, + "loss": 0.0667, + "theoretical_loss": 3.459586903875575, + "tokens_seen": 1994915840 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003993740972556572, + "loss": 0.0691, + "theoretical_loss": 3.4595492956646963, + "tokens_seen": 1995177984 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003992938533140748, + "loss": 0.0675, + "theoretical_loss": 3.459511693778146, + "tokens_seen": 1995440128 + }, + { + "epoch": 0.6, + "objective/train/advantage_avg": 0.0013966768747195601, + "objective/train/docs_used": 727673, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2055573463439941, + "objective/train/original_loss": 1.2055573463439941, + "objective/train/theoretical_loss": 3.4594928952059014, + "objective/train/tokens_used": 2016031200, + "objective/train/value_avg": -0.00856781005859375, + "objective/train/value_loss": 0.00022097454348113388, + "objective/train/value_max": -4.57763671875e-05, + "objective/train/value_min": -0.63623046875, + "objective/train/value_reward_corr": 0.7161873647776913, + "objective/train/value_std": 0.01617431640625, + "objective/train/weight_avg": 1.0014972686767578, + "objective/train/weighted_lm_loss": 1.2068300247192383, + "objective/train/weights_max": 1.8893455266952515, + "objective/train/weights_min": 0.3683340847492218, + "theoretical_loss": 3.4594928952059014, + "tokens_seen": 1995571200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003992136093724924, + "loss": 0.068, + "theoretical_loss": 3.4594740982140295, + "tokens_seen": 1995702272 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039913336543091, + "loss": 0.0699, + "theoretical_loss": 3.459436508970454, + "tokens_seen": 1995964416 + }, + { + "epoch": 0.6, + "learning_rate": 0.00039905312148932757, + "loss": 0.0689, + "theoretical_loss": 3.4593989260455267, + "tokens_seen": 1996226560 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003989728775477451, + "loss": 0.0678, + "theoretical_loss": 3.459361349437356, + "tokens_seen": 1996488704 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039889263360616274, + "loss": 0.0696, + "theoretical_loss": 3.459323779144051, + "tokens_seen": 1996750848 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039881238966458035, + "loss": 0.0678, + "theoretical_loss": 3.4592862151637216, + "tokens_seen": 1997012992 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003987321457229979, + "loss": 0.0672, + "theoretical_loss": 3.4592486574944785, + "tokens_seen": 1997275136 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003986519017814155, + "loss": 0.0692, + "theoretical_loss": 3.4592111061344335, + "tokens_seen": 1997537280 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039857165783983313, + "loss": 0.0701, + "theoretical_loss": 3.4591735610816983, + "tokens_seen": 1997799424 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003984914138982507, + "loss": 0.069, + "theoretical_loss": 3.459136022334387, + "tokens_seen": 1998061568 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039841116995666825, + "loss": 0.0715, + "theoretical_loss": 3.4590984898906134, + "tokens_seen": 1998323712 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039833092601508586, + "loss": 0.0672, + "theoretical_loss": 3.4590609637484913, + "tokens_seen": 1998585856 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": -0.0010950957657769322, + "objective/train/docs_used": 728818, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.404526948928833, + "objective/train/original_loss": 1.404526948928833, + "objective/train/theoretical_loss": 3.459023443906138, + "objective/train/tokens_used": 2019308000, + "objective/train/value_avg": -0.0084686279296875, + "objective/train/value_loss": 0.00020435434998944402, + "objective/train/value_max": -2.014636993408203e-05, + "objective/train/value_min": -0.281494140625, + "objective/train/value_reward_corr": 0.7389789184439145, + "objective/train/value_std": 0.014190673828125, + "objective/train/weight_avg": 0.9990012645721436, + "objective/train/weighted_lm_loss": 1.4025185108184814, + "objective/train/weights_max": 1.1698321104049683, + "objective/train/weights_min": 0.3728525638580322, + "theoretical_loss": 3.459023443906138, + "tokens_seen": 1998848000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003982506820735035, + "loss": 0.0686, + "theoretical_loss": 3.459023443906138, + "tokens_seen": 1998848000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039817043813192103, + "loss": 0.0676, + "theoretical_loss": 3.458985930361668, + "tokens_seen": 1999110144 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039809019419033865, + "loss": 0.068, + "theoretical_loss": 3.4589484231132, + "tokens_seen": 1999372288 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039800995024875626, + "loss": 0.0668, + "theoretical_loss": 3.4589109221588514, + "tokens_seen": 1999634432 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003979297063071738, + "loss": 0.0721, + "theoretical_loss": 3.4588734274967416, + "tokens_seen": 1999896576 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003978494623655914, + "loss": 0.07, + "theoretical_loss": 3.4588359391249894, + "tokens_seen": 2000158720 + }, + { + "epoch": 0.61, + "learning_rate": 0.000397769218424009, + "loss": 0.0681, + "theoretical_loss": 3.458798457041716, + "tokens_seen": 2000420864 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003976889744824266, + "loss": 0.0684, + "theoretical_loss": 3.4587609812450424, + "tokens_seen": 2000683008 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039760873054084416, + "loss": 0.0691, + "theoretical_loss": 3.4587235117330906, + "tokens_seen": 2000945152 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039752848659926177, + "loss": 0.0662, + "theoretical_loss": 3.4586860485039836, + "tokens_seen": 2001207296 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003974482426576794, + "loss": 0.0684, + "theoretical_loss": 3.4586485915558454, + "tokens_seen": 2001469440 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039736799871609694, + "loss": 0.0697, + "theoretical_loss": 3.4586111408868, + "tokens_seen": 2001731584 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003972877547745145, + "loss": 0.0688, + "theoretical_loss": 3.4585736964949727, + "tokens_seen": 2001993728 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.0014532208442687988, + "objective/train/docs_used": 729965, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.308320164680481, + "objective/train/original_loss": 1.308320164680481, + "objective/train/theoretical_loss": 3.4585549766524304, + "objective/train/tokens_used": 2022584800, + "objective/train/value_avg": -0.00588226318359375, + "objective/train/value_loss": 0.00016835334827192128, + "objective/train/value_max": -4.7206878662109375e-05, + "objective/train/value_min": -0.2203369140625, + "objective/train/value_reward_corr": 0.6780754803158118, + "objective/train/value_std": 0.01114654541015625, + "objective/train/weight_avg": 1.0015290975570679, + "objective/train/weighted_lm_loss": 1.310949683189392, + "objective/train/weights_max": 1.2337805032730103, + "objective/train/weights_min": 0.3807870149612427, + "theoretical_loss": 3.4585549766524304, + "tokens_seen": 2002124800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003972075108329321, + "loss": 0.0666, + "theoretical_loss": 3.45853625837849, + "tokens_seen": 2002255872 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003971272668913497, + "loss": 0.0663, + "theoretical_loss": 3.458498826535479, + "tokens_seen": 2002518016 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003970470229497673, + "loss": 0.0699, + "theoretical_loss": 3.4584614009640666, + "tokens_seen": 2002780160 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003969667790081849, + "loss": 0.0666, + "theoretical_loss": 3.4584239816623823, + "tokens_seen": 2003042304 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003968865350666025, + "loss": 0.0688, + "theoretical_loss": 3.4583865686285544, + "tokens_seen": 2003304448 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039680629112502007, + "loss": 0.0705, + "theoretical_loss": 3.4583491618607134, + "tokens_seen": 2003566592 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039672604718343763, + "loss": 0.0687, + "theoretical_loss": 3.458311761356991, + "tokens_seen": 2003828736 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039664580324185524, + "loss": 0.0695, + "theoretical_loss": 3.4582743671155183, + "tokens_seen": 2004090880 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039656555930027285, + "loss": 0.0694, + "theoretical_loss": 3.458236979134428, + "tokens_seen": 2004353024 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003964853153586904, + "loss": 0.0671, + "theoretical_loss": 3.458199597411853, + "tokens_seen": 2004615168 + }, + { + "epoch": 0.61, + "learning_rate": 0.000396405071417108, + "loss": 0.069, + "theoretical_loss": 3.4581622219459276, + "tokens_seen": 2004877312 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039632482747552564, + "loss": 0.0656, + "theoretical_loss": 3.4581248527347874, + "tokens_seen": 2005139456 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.00025961673236452043, + "objective/train/docs_used": 731237, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3400050401687622, + "objective/train/original_loss": 1.3400051593780518, + "objective/train/theoretical_loss": 3.458087489776567, + "objective/train/tokens_used": 2025861600, + "objective/train/value_avg": -0.0065155029296875, + "objective/train/value_loss": 0.00015271175652742386, + "objective/train/value_max": -1.7642974853515625e-05, + "objective/train/value_min": -0.496826171875, + "objective/train/value_reward_corr": 0.73502062968239, + "objective/train/value_std": 0.0140533447265625, + "objective/train/weight_avg": 1.0003293752670288, + "objective/train/weighted_lm_loss": 1.3405778408050537, + "objective/train/weights_max": 1.4610846042633057, + "objective/train/weights_min": 0.40551888942718506, + "theoretical_loss": 3.458087489776567, + "tokens_seen": 2005401600 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003962445835339432, + "loss": 0.0674, + "theoretical_loss": 3.458087489776567, + "tokens_seen": 2005401600 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003961643395923608, + "loss": 0.0677, + "theoretical_loss": 3.458050133069404, + "tokens_seen": 2005663744 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039608409565077837, + "loss": 0.0685, + "theoretical_loss": 3.4580127826114353, + "tokens_seen": 2005925888 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003960038517091959, + "loss": 0.0672, + "theoretical_loss": 3.457975438400799, + "tokens_seen": 2006188032 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039592360776761354, + "loss": 0.0656, + "theoretical_loss": 3.4579381004356344, + "tokens_seen": 2006450176 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039584336382603115, + "loss": 0.0684, + "theoretical_loss": 3.4579007687140804, + "tokens_seen": 2006712320 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039576311988444876, + "loss": 0.0701, + "theoretical_loss": 3.4578634432342783, + "tokens_seen": 2006974464 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003956828759428663, + "loss": 0.0701, + "theoretical_loss": 3.4578261239943693, + "tokens_seen": 2007236608 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039560263200128393, + "loss": 0.0705, + "theoretical_loss": 3.4577888109924952, + "tokens_seen": 2007498752 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003955223880597015, + "loss": 0.0688, + "theoretical_loss": 3.4577515042267994, + "tokens_seen": 2007760896 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039544214411811905, + "loss": 0.0655, + "theoretical_loss": 3.457714203695425, + "tokens_seen": 2008023040 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039536190017653666, + "loss": 0.0669, + "theoretical_loss": 3.4576769093965174, + "tokens_seen": 2008285184 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003952816562349543, + "loss": 0.0681, + "theoretical_loss": 3.4576396213282212, + "tokens_seen": 2008547328 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.000601315638050437, + "objective/train/docs_used": 732466, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3620326519012451, + "objective/train/original_loss": 1.3620326519012451, + "objective/train/theoretical_loss": 3.457620979629973, + "objective/train/tokens_used": 2029138400, + "objective/train/value_avg": -0.00832366943359375, + "objective/train/value_loss": 0.00017946993466466665, + "objective/train/value_max": -8.821487426757812e-05, + "objective/train/value_min": -0.301513671875, + "objective/train/value_reward_corr": 0.65227393135886, + "objective/train/value_std": 0.012939453125, + "objective/train/weight_avg": 1.0006853342056274, + "objective/train/weighted_lm_loss": 1.3613041639328003, + "objective/train/weights_max": 1.1302473545074463, + "objective/train/weights_min": 0.3697379231452942, + "theoretical_loss": 3.457620979629973, + "tokens_seen": 2008678400 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003952014122933719, + "loss": 0.0702, + "theoretical_loss": 3.457602339488682, + "tokens_seen": 2008809472 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039512116835178945, + "loss": 0.0683, + "theoretical_loss": 3.4575650638760482, + "tokens_seen": 2009071616 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039504092441020706, + "loss": 0.0687, + "theoretical_loss": 3.457527794488466, + "tokens_seen": 2009333760 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003949606804686246, + "loss": 0.0685, + "theoretical_loss": 3.457490531324085, + "tokens_seen": 2009595904 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003948804365270422, + "loss": 0.0661, + "theoretical_loss": 3.4574532743810535, + "tokens_seen": 2009858048 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003948001925854598, + "loss": 0.0673, + "theoretical_loss": 3.4574160236575224, + "tokens_seen": 2010120192 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003947199486438774, + "loss": 0.0685, + "theoretical_loss": 3.457378779151642, + "tokens_seen": 2010382336 + }, + { + "epoch": 0.61, + "learning_rate": 0.000394639704702295, + "loss": 0.0675, + "theoretical_loss": 3.457341540861564, + "tokens_seen": 2010644480 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039455946076071257, + "loss": 0.0668, + "theoretical_loss": 3.4573043087854414, + "tokens_seen": 2010906624 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003944792168191302, + "loss": 0.0688, + "theoretical_loss": 3.4572670829214265, + "tokens_seen": 2011168768 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003943989728775478, + "loss": 0.0682, + "theoretical_loss": 3.457229863267674, + "tokens_seen": 2011430912 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003943187289359653, + "loss": 0.0678, + "theoretical_loss": 3.457192649822338, + "tokens_seen": 2011693056 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.0002623085747472942, + "objective/train/docs_used": 733698, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4967702627182007, + "objective/train/original_loss": 1.4967701435089111, + "objective/train/theoretical_loss": 3.4571554425835753, + "objective/train/tokens_used": 2032415200, + "objective/train/value_avg": -0.007495880126953125, + "objective/train/value_loss": 0.00019993532623630017, + "objective/train/value_max": -4.363059997558594e-05, + "objective/train/value_min": -0.908203125, + "objective/train/value_reward_corr": 0.6953280027505602, + "objective/train/value_std": 0.013702392578125, + "objective/train/weight_avg": 1.0003552436828613, + "objective/train/weighted_lm_loss": 1.497015357017517, + "objective/train/weights_max": 1.4672387838363647, + "objective/train/weights_min": 0.3828728199005127, + "theoretical_loss": 3.4571554425835753, + "tokens_seen": 2011955200 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003942384849943829, + "loss": 0.0698, + "theoretical_loss": 3.4571554425835753, + "tokens_seen": 2011955200 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003941582410528005, + "loss": 0.0688, + "theoretical_loss": 3.457118241549541, + "tokens_seen": 2012217344 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039407799711121814, + "loss": 0.0698, + "theoretical_loss": 3.4570810467183932, + "tokens_seen": 2012479488 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003939977531696357, + "loss": 0.0696, + "theoretical_loss": 3.45704385808829, + "tokens_seen": 2012741632 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003939175092280533, + "loss": 0.0704, + "theoretical_loss": 3.4570066756573885, + "tokens_seen": 2013003776 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003938372652864709, + "loss": 0.0675, + "theoretical_loss": 3.45696949942385, + "tokens_seen": 2013265920 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003937570213448884, + "loss": 0.0683, + "theoretical_loss": 3.4569323293858334, + "tokens_seen": 2013528064 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039367677740330604, + "loss": 0.0678, + "theoretical_loss": 3.4568951655415017, + "tokens_seen": 2013790208 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039359653346172365, + "loss": 0.0665, + "theoretical_loss": 3.4568580078890143, + "tokens_seen": 2014052352 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003935162895201412, + "loss": 0.0694, + "theoretical_loss": 3.4568208564265364, + "tokens_seen": 2014314496 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003934360455785588, + "loss": 0.0678, + "theoretical_loss": 3.4567837111522293, + "tokens_seen": 2014576640 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039335580163697644, + "loss": 0.0676, + "theoretical_loss": 3.456746572064259, + "tokens_seen": 2014838784 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039327555769539405, + "loss": 0.0676, + "theoretical_loss": 3.456709439160789, + "tokens_seen": 2015100928 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.00043751299381256104, + "objective/train/docs_used": 734924, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4263153076171875, + "objective/train/original_loss": 1.4263153076171875, + "objective/train/theoretical_loss": 3.456690875027669, + "objective/train/tokens_used": 2035692000, + "objective/train/value_avg": -0.00778961181640625, + "objective/train/value_loss": 0.00017772949649952352, + "objective/train/value_max": -4.13060188293457e-05, + "objective/train/value_min": -0.476318359375, + "objective/train/value_reward_corr": 0.6913995082057987, + "objective/train/value_std": 0.01406097412109375, + "objective/train/weight_avg": 1.0005217790603638, + "objective/train/weighted_lm_loss": 1.4272732734680176, + "objective/train/weights_max": 1.438776969909668, + "objective/train/weights_min": 0.3907938003540039, + "theoretical_loss": 3.456690875027669, + "tokens_seen": 2015232000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003931953137538116, + "loss": 0.0678, + "theoretical_loss": 3.456672312439986, + "tokens_seen": 2015363072 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039311506981222916, + "loss": 0.0679, + "theoretical_loss": 3.4566351919000167, + "tokens_seen": 2015625216 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003930348258706468, + "loss": 0.0649, + "theoretical_loss": 3.4565980775390477, + "tokens_seen": 2015887360 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039295458192906434, + "loss": 0.0688, + "theoretical_loss": 3.456560969355248, + "tokens_seen": 2016149504 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039287433798748195, + "loss": 0.0673, + "theoretical_loss": 3.456523867346786, + "tokens_seen": 2016411648 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039279409404589956, + "loss": 0.0655, + "theoretical_loss": 3.4564867715118313, + "tokens_seen": 2016673792 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003927138501043172, + "loss": 0.0686, + "theoretical_loss": 3.4564496818485546, + "tokens_seen": 2016935936 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039263360616273473, + "loss": 0.0693, + "theoretical_loss": 3.4564125983551275, + "tokens_seen": 2017198080 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003925533622211523, + "loss": 0.0696, + "theoretical_loss": 3.4563755210297216, + "tokens_seen": 2017460224 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003924731182795699, + "loss": 0.0684, + "theoretical_loss": 3.4563384498705094, + "tokens_seen": 2017722368 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039239287433798746, + "loss": 0.0679, + "theoretical_loss": 3.456301384875666, + "tokens_seen": 2017984512 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003923126303964051, + "loss": 0.0666, + "theoretical_loss": 3.456264326043364, + "tokens_seen": 2018246656 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.0014328511897474527, + "objective/train/docs_used": 736110, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3064955472946167, + "objective/train/original_loss": 1.3064955472946167, + "objective/train/theoretical_loss": 3.4562272733717796, + "objective/train/tokens_used": 2038968800, + "objective/train/value_avg": -0.0081787109375, + "objective/train/value_loss": 0.00021994484995957464, + "objective/train/value_max": -4.506111145019531e-05, + "objective/train/value_min": -0.57373046875, + "objective/train/value_reward_corr": 0.7439695048999607, + "objective/train/value_std": 0.017333984375, + "objective/train/weight_avg": 1.001534104347229, + "objective/train/weighted_lm_loss": 1.3077045679092407, + "objective/train/weights_max": 1.2325594425201416, + "objective/train/weights_min": 0.3684840798377991, + "theoretical_loss": 3.4562272733717796, + "tokens_seen": 2018508800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003922323864548227, + "loss": 0.0686, + "theoretical_loss": 3.4562272733717796, + "tokens_seen": 2018508800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003921521425132403, + "loss": 0.0679, + "theoretical_loss": 3.4561902268590883, + "tokens_seen": 2018770944 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039207189857165786, + "loss": 0.0673, + "theoretical_loss": 3.4561531865034665, + "tokens_seen": 2019033088 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003919916546300754, + "loss": 0.0653, + "theoretical_loss": 3.4561161523030925, + "tokens_seen": 2019295232 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039191141068849303, + "loss": 0.0672, + "theoretical_loss": 3.456079124256145, + "tokens_seen": 2019557376 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003918311667469106, + "loss": 0.0707, + "theoretical_loss": 3.4560421023608012, + "tokens_seen": 2019819520 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003917509228053282, + "loss": 0.0706, + "theoretical_loss": 3.4560050866152423, + "tokens_seen": 2020081664 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003916706788637458, + "loss": 0.0665, + "theoretical_loss": 3.455968077017649, + "tokens_seen": 2020343808 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039159043492216337, + "loss": 0.0675, + "theoretical_loss": 3.455931073566202, + "tokens_seen": 2020605952 + }, + { + "epoch": 0.61, + "learning_rate": 0.000391510190980581, + "loss": 0.0674, + "theoretical_loss": 3.4558940762590837, + "tokens_seen": 2020868096 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003914299470389986, + "loss": 0.0698, + "theoretical_loss": 3.455857085094477, + "tokens_seen": 2021130240 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039134970309741615, + "loss": 0.0661, + "theoretical_loss": 3.4558201000705653, + "tokens_seen": 2021392384 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003912694591558337, + "loss": 0.0701, + "theoretical_loss": 3.455783121185534, + "tokens_seen": 2021654528 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.0006677538040094078, + "objective/train/docs_used": 737310, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3411515951156616, + "objective/train/original_loss": 1.3411513566970825, + "objective/train/theoretical_loss": 3.455764634044531, + "objective/train/tokens_used": 2042245600, + "objective/train/value_avg": -0.006855010986328125, + "objective/train/value_loss": 0.00017928997112903744, + "objective/train/value_max": -5.561113357543945e-05, + "objective/train/value_min": -0.269287109375, + "objective/train/value_reward_corr": 0.717935934624405, + "objective/train/value_std": 0.01328277587890625, + "objective/train/weight_avg": 1.0007518529891968, + "objective/train/weighted_lm_loss": 1.3418787717819214, + "objective/train/weights_max": 1.1867854595184326, + "objective/train/weights_min": 0.3734874427318573, + "theoretical_loss": 3.455764634044531, + "tokens_seen": 2021785600 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003911892152142513, + "loss": 0.0651, + "theoretical_loss": 3.4557461484375676, + "tokens_seen": 2021916672 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039110897127266894, + "loss": 0.0693, + "theoretical_loss": 3.4557091818248518, + "tokens_seen": 2022178816 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003910287273310865, + "loss": 0.0682, + "theoretical_loss": 3.4556722213455737, + "tokens_seen": 2022440960 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003909484833895041, + "loss": 0.0672, + "theoretical_loss": 3.455635266997921, + "tokens_seen": 2022703104 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003908682394479217, + "loss": 0.0665, + "theoretical_loss": 3.4555983187800825, + "tokens_seen": 2022965248 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003907879955063393, + "loss": 0.0682, + "theoretical_loss": 3.455561376690246, + "tokens_seen": 2023227392 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039070775156475684, + "loss": 0.0677, + "theoretical_loss": 3.4555244407266024, + "tokens_seen": 2023489536 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039062750762317445, + "loss": 0.0688, + "theoretical_loss": 3.455487510887342, + "tokens_seen": 2023751680 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039054726368159206, + "loss": 0.0689, + "theoretical_loss": 3.455450587170656, + "tokens_seen": 2024013824 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003904670197400096, + "loss": 0.0705, + "theoretical_loss": 3.455413669574737, + "tokens_seen": 2024275968 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039038677579842723, + "loss": 0.068, + "theoretical_loss": 3.4553767580977777, + "tokens_seen": 2024538112 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039030653185684485, + "loss": 0.0681, + "theoretical_loss": 3.4553398527379717, + "tokens_seen": 2024800256 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.00035192776704207063, + "objective/train/docs_used": 738608, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2800827026367188, + "objective/train/original_loss": 1.2800827026367188, + "objective/train/theoretical_loss": 3.4553029534935136, + "objective/train/tokens_used": 2045522400, + "objective/train/value_avg": -0.010284423828125, + "objective/train/value_loss": 0.00023136693926062435, + "objective/train/value_max": -9.995698928833008e-05, + "objective/train/value_min": -0.76904296875, + "objective/train/value_reward_corr": 0.884760049339832, + "objective/train/value_std": 0.0257110595703125, + "objective/train/weight_avg": 1.0004581212997437, + "objective/train/weighted_lm_loss": 1.2805683612823486, + "objective/train/weights_max": 1.1941192150115967, + "objective/train/weights_min": 0.37031102180480957, + "theoretical_loss": 3.4553029534935136, + "tokens_seen": 2025062400 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003902262879152624, + "loss": 0.067, + "theoretical_loss": 3.4553029534935136, + "tokens_seen": 2025062400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00039014604397367996, + "loss": 0.0669, + "theoretical_loss": 3.4552660603625984, + "tokens_seen": 2025324544 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003900658000320976, + "loss": 0.0669, + "theoretical_loss": 3.455229173343423, + "tokens_seen": 2025586688 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003899855560905152, + "loss": 0.068, + "theoretical_loss": 3.455192292434183, + "tokens_seen": 2025848832 + }, + { + "epoch": 0.61, + "learning_rate": 0.00038990531214893275, + "loss": 0.0658, + "theoretical_loss": 3.455155417633076, + "tokens_seen": 2026110976 + }, + { + "epoch": 0.61, + "learning_rate": 0.00038982506820735036, + "loss": 0.0652, + "theoretical_loss": 3.4551185489383007, + "tokens_seen": 2026373120 + }, + { + "epoch": 0.61, + "learning_rate": 0.00038974482426576797, + "loss": 0.0675, + "theoretical_loss": 3.4550816863480565, + "tokens_seen": 2026635264 + }, + { + "epoch": 0.61, + "learning_rate": 0.00038966458032418553, + "loss": 0.0679, + "theoretical_loss": 3.455044829860543, + "tokens_seen": 2026897408 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003895843363826031, + "loss": 0.0691, + "theoretical_loss": 3.45500797947396, + "tokens_seen": 2027159552 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003895040924410207, + "loss": 0.0679, + "theoretical_loss": 3.45497113518651, + "tokens_seen": 2027421696 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003894238484994383, + "loss": 0.0699, + "theoretical_loss": 3.4549342969963943, + "tokens_seen": 2027683840 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003893436045578559, + "loss": 0.0679, + "theoretical_loss": 3.4548974649018165, + "tokens_seen": 2027945984 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003892633606162735, + "loss": 0.0685, + "theoretical_loss": 3.4548606389009793, + "tokens_seen": 2028208128 + }, + { + "epoch": 0.61, + "objective/train/advantage_avg": 0.0008364830864593387, + "objective/train/docs_used": 739750, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.382369041442871, + "objective/train/original_loss": 1.382369041442871, + "objective/train/theoretical_loss": 3.4548422281851527, + "objective/train/tokens_used": 2048799200, + "objective/train/value_avg": -0.00458526611328125, + "objective/train/value_loss": 0.000101262099633459, + "objective/train/value_max": -3.510713577270508e-05, + "objective/train/value_min": -0.1873779296875, + "objective/train/value_reward_corr": 0.6766283418160592, + "objective/train/value_std": 0.00801849365234375, + "objective/train/weight_avg": 1.000882863998413, + "objective/train/weighted_lm_loss": 1.384031891822815, + "objective/train/weights_max": 1.1359182596206665, + "objective/train/weights_min": 0.3874214291572571, + "theoretical_loss": 3.4548422281851527, + "tokens_seen": 2028339200 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003891831166746911, + "loss": 0.0653, + "theoretical_loss": 3.4548238189920877, + "tokens_seen": 2028470272 + }, + { + "epoch": 0.61, + "learning_rate": 0.00038910287273310866, + "loss": 0.0661, + "theoretical_loss": 3.4547870051733467, + "tokens_seen": 2028732416 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003890226287915262, + "loss": 0.0675, + "theoretical_loss": 3.4547501974429626, + "tokens_seen": 2028994560 + }, + { + "epoch": 0.61, + "learning_rate": 0.00038894238484994383, + "loss": 0.0661, + "theoretical_loss": 3.454713395799142, + "tokens_seen": 2029256704 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038886214090836144, + "loss": 0.0681, + "theoretical_loss": 3.4546766002400915, + "tokens_seen": 2029518848 + }, + { + "epoch": 0.62, + "learning_rate": 0.000388781896966779, + "loss": 0.0674, + "theoretical_loss": 3.4546398107640197, + "tokens_seen": 2029780992 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003887016530251966, + "loss": 0.07, + "theoretical_loss": 3.4546030273691364, + "tokens_seen": 2030043136 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003886214090836142, + "loss": 0.066, + "theoretical_loss": 3.4545662500536505, + "tokens_seen": 2030305280 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003885411651420318, + "loss": 0.071, + "theoretical_loss": 3.4545294788157728, + "tokens_seen": 2030567424 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038846092120044934, + "loss": 0.0686, + "theoretical_loss": 3.454492713653714, + "tokens_seen": 2030829568 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038838067725886695, + "loss": 0.0693, + "theoretical_loss": 3.4544559545656863, + "tokens_seen": 2031091712 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038830043331728457, + "loss": 0.0662, + "theoretical_loss": 3.4544192015499027, + "tokens_seen": 2031353856 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.00042441111872904, + "objective/train/docs_used": 741020, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3329555988311768, + "objective/train/original_loss": 1.3329553604125977, + "objective/train/theoretical_loss": 3.454382454604577, + "objective/train/tokens_used": 2052076000, + "objective/train/value_avg": -0.0072784423828125, + "objective/train/value_loss": 0.00047027276013977826, + "objective/train/value_max": -1.9848346710205078e-05, + "objective/train/value_min": -0.70166015625, + "objective/train/value_reward_corr": 0.7057014467850871, + "objective/train/value_std": 0.018829345703125, + "objective/train/weight_avg": 1.0006253719329834, + "objective/train/weighted_lm_loss": 1.3332040309906006, + "objective/train/weights_max": 1.614859700202942, + "objective/train/weights_min": 0.3689524829387665, + "theoretical_loss": 3.454382454604577, + "tokens_seen": 2031616000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003882201893757021, + "loss": 0.0668, + "theoretical_loss": 3.454382454604577, + "tokens_seen": 2031616000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038813994543411974, + "loss": 0.0666, + "theoretical_loss": 3.454345713727923, + "tokens_seen": 2031878144 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038805970149253735, + "loss": 0.0685, + "theoretical_loss": 3.4543089789181556, + "tokens_seen": 2032140288 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003879794575509549, + "loss": 0.0662, + "theoretical_loss": 3.4542722501734904, + "tokens_seen": 2032402432 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003878992136093725, + "loss": 0.0682, + "theoretical_loss": 3.454235527492145, + "tokens_seen": 2032664576 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003878189696677901, + "loss": 0.0644, + "theoretical_loss": 3.4541988108723354, + "tokens_seen": 2032926720 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038773872572620764, + "loss": 0.0698, + "theoretical_loss": 3.4541621003122804, + "tokens_seen": 2033188864 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038765848178462525, + "loss": 0.0708, + "theoretical_loss": 3.4541253958101983, + "tokens_seen": 2033451008 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038757823784304286, + "loss": 0.0677, + "theoretical_loss": 3.454088697364309, + "tokens_seen": 2033713152 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003874979939014605, + "loss": 0.0651, + "theoretical_loss": 3.454052004972833, + "tokens_seen": 2033975296 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038741774995987803, + "loss": 0.0679, + "theoretical_loss": 3.4540153186339912, + "tokens_seen": 2034237440 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038733750601829565, + "loss": 0.0684, + "theoretical_loss": 3.4539786383460047, + "tokens_seen": 2034499584 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003872572620767132, + "loss": 0.0666, + "theoretical_loss": 3.4539419641070968, + "tokens_seen": 2034761728 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.0009591991547495127, + "objective/train/docs_used": 742098, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4628486633300781, + "objective/train/original_loss": 1.4628486633300781, + "objective/train/theoretical_loss": 3.453923629255492, + "objective/train/tokens_used": 2055352800, + "objective/train/value_avg": -0.00998687744140625, + "objective/train/value_loss": 0.00025923250359483063, + "objective/train/value_max": -5.561113357543945e-05, + "objective/train/value_min": -0.90869140625, + "objective/train/value_reward_corr": 0.6947016315842284, + "objective/train/value_std": 0.0157470703125, + "objective/train/weight_avg": 1.0010782480239868, + "objective/train/weighted_lm_loss": 1.463948369026184, + "objective/train/weights_max": 1.1520448923110962, + "objective/train/weights_min": 0.36821871995925903, + "theoretical_loss": 3.453923629255492, + "tokens_seen": 2034892800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038717701813513076, + "loss": 0.0676, + "theoretical_loss": 3.4539052959154906, + "tokens_seen": 2035023872 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003870967741935484, + "loss": 0.0689, + "theoretical_loss": 3.4538686337694102, + "tokens_seen": 2035286016 + }, + { + "epoch": 0.62, + "learning_rate": 0.000387016530251966, + "loss": 0.0687, + "theoretical_loss": 3.45383197766708, + "tokens_seen": 2035548160 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003869362863103836, + "loss": 0.0645, + "theoretical_loss": 3.453795327606726, + "tokens_seen": 2035810304 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038685604236880116, + "loss": 0.0687, + "theoretical_loss": 3.4537586835865746, + "tokens_seen": 2036072448 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038677579842721877, + "loss": 0.0698, + "theoretical_loss": 3.4537220456048523, + "tokens_seen": 2036334592 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003866955544856364, + "loss": 0.0685, + "theoretical_loss": 3.453685413659788, + "tokens_seen": 2036596736 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003866153105440539, + "loss": 0.0683, + "theoretical_loss": 3.4536487877496085, + "tokens_seen": 2036858880 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003865350666024715, + "loss": 0.0692, + "theoretical_loss": 3.4536121678725444, + "tokens_seen": 2037121024 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003864548226608891, + "loss": 0.0678, + "theoretical_loss": 3.453575554026825, + "tokens_seen": 2037383168 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003863745787193067, + "loss": 0.0651, + "theoretical_loss": 3.4535389462106822, + "tokens_seen": 2037645312 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003862943347777243, + "loss": 0.0685, + "theoretical_loss": 3.4535023444223465, + "tokens_seen": 2037907456 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": -0.0006078130681999028, + "objective/train/docs_used": 743253, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2707901000976562, + "objective/train/original_loss": 1.2707901000976562, + "objective/train/theoretical_loss": 3.453465748660051, + "objective/train/tokens_used": 2058629600, + "objective/train/value_avg": -0.007007598876953125, + "objective/train/value_loss": 0.00013605756976176053, + "objective/train/value_max": -5.269050598144531e-05, + "objective/train/value_min": -0.456787109375, + "objective/train/value_reward_corr": 0.7220390549582588, + "objective/train/value_std": 0.01114654541015625, + "objective/train/weight_avg": 0.9994558691978455, + "objective/train/weighted_lm_loss": 1.2703384160995483, + "objective/train/weights_max": 1.1364604234695435, + "objective/train/weights_min": 0.3713054358959198, + "theoretical_loss": 3.453465748660051, + "tokens_seen": 2038169600 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003862140908361419, + "loss": 0.0676, + "theoretical_loss": 3.453465748660051, + "tokens_seen": 2038169600 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003861338468945595, + "loss": 0.0671, + "theoretical_loss": 3.4534291589220274, + "tokens_seen": 2038431744 + }, + { + "epoch": 0.62, + "learning_rate": 0.000386053602952977, + "loss": 0.0685, + "theoretical_loss": 3.4533925752065104, + "tokens_seen": 2038693888 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038597335901139463, + "loss": 0.0683, + "theoretical_loss": 3.4533559975117347, + "tokens_seen": 2038956032 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038589311506981224, + "loss": 0.0696, + "theoretical_loss": 3.453319425835935, + "tokens_seen": 2039218176 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038581287112822985, + "loss": 0.0668, + "theoretical_loss": 3.4532828601773478, + "tokens_seen": 2039480320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003857326271866474, + "loss": 0.0664, + "theoretical_loss": 3.453246300534209, + "tokens_seen": 2039742464 + }, + { + "epoch": 0.62, + "learning_rate": 0.000385652383245065, + "loss": 0.0696, + "theoretical_loss": 3.4532097469047573, + "tokens_seen": 2040004608 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038557213930348264, + "loss": 0.0684, + "theoretical_loss": 3.4531731992872303, + "tokens_seen": 2040266752 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038549189536190014, + "loss": 0.0667, + "theoretical_loss": 3.4531366576798668, + "tokens_seen": 2040528896 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038541165142031775, + "loss": 0.0698, + "theoretical_loss": 3.4531001220809068, + "tokens_seen": 2040791040 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038533140747873537, + "loss": 0.0685, + "theoretical_loss": 3.453063592488591, + "tokens_seen": 2041053184 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003852511635371529, + "loss": 0.0664, + "theoretical_loss": 3.4530270689011595, + "tokens_seen": 2041315328 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.0007066431571729481, + "objective/train/docs_used": 744490, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.501568078994751, + "objective/train/original_loss": 1.501568078994751, + "objective/train/theoretical_loss": 3.4530088093587263, + "objective/train/tokens_used": 2061906400, + "objective/train/value_avg": -0.007480621337890625, + "objective/train/value_loss": 0.0003073073457926512, + "objective/train/value_max": -4.267692565917969e-05, + "objective/train/value_min": -0.266845703125, + "objective/train/value_reward_corr": 0.5380204613048158, + "objective/train/value_std": 0.01178741455078125, + "objective/train/weight_avg": 1.0008387565612793, + "objective/train/weighted_lm_loss": 1.501943826675415, + "objective/train/weights_max": 1.2743744850158691, + "objective/train/weights_min": 0.37047773599624634, + "theoretical_loss": 3.4530088093587263, + "tokens_seen": 2041446400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038517091959557054, + "loss": 0.0693, + "theoretical_loss": 3.4529905513168555, + "tokens_seen": 2041577472 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038509067565398815, + "loss": 0.0675, + "theoretical_loss": 3.452954039733921, + "tokens_seen": 2041839616 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038501043171240576, + "loss": 0.0654, + "theoretical_loss": 3.4529175341505995, + "tokens_seen": 2042101760 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003849301877708233, + "loss": 0.0674, + "theoretical_loss": 3.4528810345651357, + "tokens_seen": 2042363904 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003848499438292409, + "loss": 0.0677, + "theoretical_loss": 3.4528445409757738, + "tokens_seen": 2042626048 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003847696998876585, + "loss": 0.0691, + "theoretical_loss": 3.45280805338076, + "tokens_seen": 2042888192 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038468945594607605, + "loss": 0.0659, + "theoretical_loss": 3.45277157177834, + "tokens_seen": 2043150336 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038460921200449366, + "loss": 0.0665, + "theoretical_loss": 3.4527350961667613, + "tokens_seen": 2043412480 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003845289680629113, + "loss": 0.0669, + "theoretical_loss": 3.452698626544272, + "tokens_seen": 2043674624 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003844487241213289, + "loss": 0.0674, + "theoretical_loss": 3.45266216290912, + "tokens_seen": 2043936768 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038436848017974645, + "loss": 0.0683, + "theoretical_loss": 3.452625705259556, + "tokens_seen": 2044198912 + }, + { + "epoch": 0.62, + "learning_rate": 0.000384288236238164, + "loss": 0.0685, + "theoretical_loss": 3.4525892535938283, + "tokens_seen": 2044461056 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.0002817703934852034, + "objective/train/docs_used": 745676, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1205445528030396, + "objective/train/original_loss": 1.1205445528030396, + "objective/train/theoretical_loss": 3.4525528079101884, + "objective/train/tokens_used": 2065183200, + "objective/train/value_avg": -0.006732940673828125, + "objective/train/value_loss": 0.00019041493942495435, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.61474609375, + "objective/train/value_reward_corr": 0.7468571501310386, + "objective/train/value_std": 0.01427459716796875, + "objective/train/weight_avg": 1.0003718137741089, + "objective/train/weighted_lm_loss": 1.1208994388580322, + "objective/train/weights_max": 1.6867209672927856, + "objective/train/weights_min": 0.49503588676452637, + "theoretical_loss": 3.4525528079101884, + "tokens_seen": 2044723200 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003842079922965816, + "loss": 0.068, + "theoretical_loss": 3.4525528079101884, + "tokens_seen": 2044723200 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003841277483549992, + "loss": 0.0682, + "theoretical_loss": 3.4525163682068882, + "tokens_seen": 2044985344 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003840475044134168, + "loss": 0.0706, + "theoretical_loss": 3.45247993448218, + "tokens_seen": 2045247488 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003839672604718344, + "loss": 0.067, + "theoretical_loss": 3.4524435067343164, + "tokens_seen": 2045509632 + }, + { + "epoch": 0.62, + "learning_rate": 0.000383887016530252, + "loss": 0.0644, + "theoretical_loss": 3.452407084961551, + "tokens_seen": 2045771776 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038380677258866957, + "loss": 0.0643, + "theoretical_loss": 3.452370669162139, + "tokens_seen": 2046033920 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038372652864708713, + "loss": 0.0677, + "theoretical_loss": 3.452334259334335, + "tokens_seen": 2046296064 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038364628470550474, + "loss": 0.0677, + "theoretical_loss": 3.452297855476395, + "tokens_seen": 2046558208 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003835660407639223, + "loss": 0.0668, + "theoretical_loss": 3.4522614575865753, + "tokens_seen": 2046820352 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003834857968223399, + "loss": 0.0647, + "theoretical_loss": 3.452225065663134, + "tokens_seen": 2047082496 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003834055528807575, + "loss": 0.0687, + "theoretical_loss": 3.4521886797043293, + "tokens_seen": 2047344640 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003833253089391751, + "loss": 0.0661, + "theoretical_loss": 3.4521522997084197, + "tokens_seen": 2047606784 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003832450649975927, + "loss": 0.0682, + "theoretical_loss": 3.452115925673665, + "tokens_seen": 2047868928 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.0006643827073276043, + "objective/train/docs_used": 746888, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2512800693511963, + "objective/train/original_loss": 1.2512800693511963, + "objective/train/theoretical_loss": 3.4520977408911766, + "objective/train/tokens_used": 2068460000, + "objective/train/value_avg": -0.004673004150390625, + "objective/train/value_loss": 9.969693201128393e-05, + "objective/train/value_max": -3.451108932495117e-05, + "objective/train/value_min": -0.271240234375, + "objective/train/value_reward_corr": 0.6657257095051546, + "objective/train/value_std": 0.0092926025390625, + "objective/train/weight_avg": 1.000710368156433, + "objective/train/weighted_lm_loss": 1.2529733180999756, + "objective/train/weights_max": 1.1873843669891357, + "objective/train/weights_min": 0.393546462059021, + "theoretical_loss": 3.4520977408911766, + "tokens_seen": 2048000000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003831648210560103, + "loss": 0.063, + "theoretical_loss": 3.4520795575983247, + "tokens_seen": 2048131072 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038308457711442787, + "loss": 0.0709, + "theoretical_loss": 3.4520431954806607, + "tokens_seen": 2048393216 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003830043331728454, + "loss": 0.0681, + "theoretical_loss": 3.452006839318935, + "tokens_seen": 2048655360 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038292408923126304, + "loss": 0.0686, + "theoretical_loss": 3.45197048911141, + "tokens_seen": 2048917504 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038284384528968065, + "loss": 0.0693, + "theoretical_loss": 3.451934144856348, + "tokens_seen": 2049179648 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003827636013480982, + "loss": 0.0679, + "theoretical_loss": 3.451897806552014, + "tokens_seen": 2049441792 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003826833574065158, + "loss": 0.0692, + "theoretical_loss": 3.451861474196672, + "tokens_seen": 2049703936 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038260311346493344, + "loss": 0.0687, + "theoretical_loss": 3.4518251477885884, + "tokens_seen": 2049966080 + }, + { + "epoch": 0.62, + "learning_rate": 0.000382522869523351, + "loss": 0.0713, + "theoretical_loss": 3.4517888273260287, + "tokens_seen": 2050228224 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038244262558176855, + "loss": 0.0678, + "theoretical_loss": 3.4517525128072593, + "tokens_seen": 2050490368 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038236238164018616, + "loss": 0.0668, + "theoretical_loss": 3.4517162042305483, + "tokens_seen": 2050752512 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003822821376986038, + "loss": 0.0671, + "theoretical_loss": 3.4516799015941646, + "tokens_seen": 2051014656 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": -0.0004441846103873104, + "objective/train/docs_used": 748156, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.271807312965393, + "objective/train/original_loss": 1.271807312965393, + "objective/train/theoretical_loss": 3.451643604896377, + "objective/train/tokens_used": 2071736800, + "objective/train/value_avg": -0.008514404296875, + "objective/train/value_loss": 0.00032554796780459583, + "objective/train/value_max": -2.86102294921875e-05, + "objective/train/value_min": -0.8369140625, + "objective/train/value_reward_corr": 0.689176385382612, + "objective/train/value_std": 0.0157318115234375, + "objective/train/weight_avg": 0.9996981024742126, + "objective/train/weighted_lm_loss": 1.270479679107666, + "objective/train/weights_max": 1.6649008989334106, + "objective/train/weights_min": 0.36991289258003235, + "theoretical_loss": 3.451643604896377, + "tokens_seen": 2051276800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038220189375702134, + "loss": 0.0683, + "theoretical_loss": 3.451643604896377, + "tokens_seen": 2051276800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038212164981543895, + "loss": 0.0699, + "theoretical_loss": 3.4516073141354546, + "tokens_seen": 2051538944 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038204140587385656, + "loss": 0.069, + "theoretical_loss": 3.451571029309668, + "tokens_seen": 2051801088 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003819611619322742, + "loss": 0.0666, + "theoretical_loss": 3.4515347504172893, + "tokens_seen": 2052063232 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003818809179906917, + "loss": 0.0687, + "theoretical_loss": 3.4514984774565898, + "tokens_seen": 2052325376 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003818006740491093, + "loss": 0.0676, + "theoretical_loss": 3.4514622104258423, + "tokens_seen": 2052587520 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003817204301075269, + "loss": 0.0653, + "theoretical_loss": 3.451425949323321, + "tokens_seen": 2052849664 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038164018616594446, + "loss": 0.0666, + "theoretical_loss": 3.451389694147298, + "tokens_seen": 2053111808 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003815599422243621, + "loss": 0.0679, + "theoretical_loss": 3.45135344489605, + "tokens_seen": 2053373952 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003814796982827797, + "loss": 0.069, + "theoretical_loss": 3.4513172015678526, + "tokens_seen": 2053636096 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038139945434119725, + "loss": 0.0651, + "theoretical_loss": 3.451280964160981, + "tokens_seen": 2053898240 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003813192103996148, + "loss": 0.0677, + "theoretical_loss": 3.451244732673713, + "tokens_seen": 2054160384 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003812389664580324, + "loss": 0.0655, + "theoretical_loss": 3.451208507104326, + "tokens_seen": 2054422528 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.0003811193164438009, + "objective/train/docs_used": 749325, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3119323253631592, + "objective/train/original_loss": 1.3119322061538696, + "objective/train/theoretical_loss": 3.4511903965382995, + "objective/train/tokens_used": 2075013600, + "objective/train/value_avg": -0.006206512451171875, + "objective/train/value_loss": 0.00013376564311329275, + "objective/train/value_max": -3.88026237487793e-05, + "objective/train/value_min": -0.200927734375, + "objective/train/value_reward_corr": 0.6514871179032515, + "objective/train/value_std": 0.00937652587890625, + "objective/train/weight_avg": 1.000443458557129, + "objective/train/weighted_lm_loss": 1.3129256963729858, + "objective/train/weights_max": 1.1168068647384644, + "objective/train/weights_min": 0.39825838804244995, + "theoretical_loss": 3.4511903965382995, + "tokens_seen": 2054553600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038115872251645003, + "loss": 0.0674, + "theoretical_loss": 3.451172287451098, + "tokens_seen": 2054684672 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003810784785748676, + "loss": 0.0697, + "theoretical_loss": 3.45113607371231, + "tokens_seen": 2054946816 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003809982346332852, + "loss": 0.0675, + "theoretical_loss": 3.4510998658862397, + "tokens_seen": 2055208960 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003809179906917028, + "loss": 0.0672, + "theoretical_loss": 3.451063663971169, + "tokens_seen": 2055471104 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038083774675012037, + "loss": 0.0683, + "theoretical_loss": 3.4510274679653787, + "tokens_seen": 2055733248 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038075750280853793, + "loss": 0.0685, + "theoretical_loss": 3.4509912778671517, + "tokens_seen": 2055995392 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038067725886695554, + "loss": 0.0665, + "theoretical_loss": 3.4509550936747697, + "tokens_seen": 2056257536 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038059701492537315, + "loss": 0.0699, + "theoretical_loss": 3.4509189153865165, + "tokens_seen": 2056519680 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003805167709837907, + "loss": 0.0684, + "theoretical_loss": 3.450882743000677, + "tokens_seen": 2056781824 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003804365270422083, + "loss": 0.0706, + "theoretical_loss": 3.450846576515535, + "tokens_seen": 2057043968 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038035628310062594, + "loss": 0.0682, + "theoretical_loss": 3.4508104159293773, + "tokens_seen": 2057306112 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003802760391590435, + "loss": 0.0696, + "theoretical_loss": 3.4507742612404897, + "tokens_seen": 2057568256 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 0.0004177381924819201, + "objective/train/docs_used": 750571, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4458907842636108, + "objective/train/original_loss": 1.4458907842636108, + "objective/train/theoretical_loss": 3.4507381124471594, + "objective/train/tokens_used": 2078290400, + "objective/train/value_avg": -0.006328582763671875, + "objective/train/value_loss": 0.00010109315917361528, + "objective/train/value_max": -4.100799560546875e-05, + "objective/train/value_min": -0.20556640625, + "objective/train/value_reward_corr": 0.7573714555918173, + "objective/train/value_std": 0.01088714599609375, + "objective/train/weight_avg": 1.00046706199646, + "objective/train/weighted_lm_loss": 1.4466280937194824, + "objective/train/weights_max": 1.1097402572631836, + "objective/train/weights_min": 0.7170053720474243, + "theoretical_loss": 3.4507381124471594, + "tokens_seen": 2057830400 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003801957952174611, + "loss": 0.0687, + "theoretical_loss": 3.4507381124471594, + "tokens_seen": 2057830400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00038011555127587867, + "loss": 0.0675, + "theoretical_loss": 3.4507019695476737, + "tokens_seen": 2058092544 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003800353073342963, + "loss": 0.0685, + "theoretical_loss": 3.450665832540322, + "tokens_seen": 2058354688 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037995506339271384, + "loss": 0.0671, + "theoretical_loss": 3.450629701423393, + "tokens_seen": 2058616832 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037987481945113145, + "loss": 0.0705, + "theoretical_loss": 3.4505935761951765, + "tokens_seen": 2058878976 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037979457550954906, + "loss": 0.0681, + "theoretical_loss": 3.4505574568539634, + "tokens_seen": 2059141120 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003797143315679666, + "loss": 0.0667, + "theoretical_loss": 3.4505213433980453, + "tokens_seen": 2059403264 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037963408762638423, + "loss": 0.07, + "theoretical_loss": 3.450485235825714, + "tokens_seen": 2059665408 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003795538436848018, + "loss": 0.0669, + "theoretical_loss": 3.450449134135262, + "tokens_seen": 2059927552 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037947359974321935, + "loss": 0.0672, + "theoretical_loss": 3.4504130383249834, + "tokens_seen": 2060189696 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037939335580163696, + "loss": 0.0676, + "theoretical_loss": 3.4503769483931723, + "tokens_seen": 2060451840 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003793131118600546, + "loss": 0.0667, + "theoretical_loss": 3.4503408643381235, + "tokens_seen": 2060713984 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003792328679184722, + "loss": 0.066, + "theoretical_loss": 3.4503047861581324, + "tokens_seen": 2060976128 + }, + { + "epoch": 0.62, + "objective/train/advantage_avg": 8.919855463318527e-05, + "objective/train/docs_used": 751685, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3986313343048096, + "objective/train/original_loss": 1.39863121509552, + "objective/train/theoretical_loss": 3.450286749270751, + "objective/train/tokens_used": 2081567200, + "objective/train/value_avg": -0.0098876953125, + "objective/train/value_loss": 0.0007862445199862123, + "objective/train/value_max": -4.4345855712890625e-05, + "objective/train/value_min": -0.83544921875, + "objective/train/value_reward_corr": 0.5908050268196616, + "objective/train/value_std": 0.0193023681640625, + "objective/train/weight_avg": 1.0004451274871826, + "objective/train/weighted_lm_loss": 1.4001002311706543, + "objective/train/weights_max": 1.6845263242721558, + "objective/train/weights_min": 0.37146979570388794, + "theoretical_loss": 3.450286749270751, + "tokens_seen": 2061107200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037915262397688975, + "loss": 0.0688, + "theoretical_loss": 3.4502687138514956, + "tokens_seen": 2061238272 + }, + { + "epoch": 0.62, + "learning_rate": 0.00037907238003530736, + "loss": 0.0686, + "theoretical_loss": 3.4502326474165104, + "tokens_seen": 2061500416 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003789921360937249, + "loss": 0.0694, + "theoretical_loss": 3.450196586851474, + "tokens_seen": 2061762560 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003789118921521425, + "loss": 0.069, + "theoretical_loss": 3.450160532154685, + "tokens_seen": 2062024704 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003788316482105601, + "loss": 0.0674, + "theoretical_loss": 3.4501244833244438, + "tokens_seen": 2062286848 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003787514042689777, + "loss": 0.0688, + "theoretical_loss": 3.450088440359049, + "tokens_seen": 2062548992 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003786711603273953, + "loss": 0.0665, + "theoretical_loss": 3.450052403256801, + "tokens_seen": 2062811136 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003785909163858129, + "loss": 0.0673, + "theoretical_loss": 3.450016372016002, + "tokens_seen": 2063073280 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003785106724442305, + "loss": 0.0695, + "theoretical_loss": 3.4499803466349537, + "tokens_seen": 2063335424 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003784304285026481, + "loss": 0.0696, + "theoretical_loss": 3.449944327111959, + "tokens_seen": 2063597568 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003783501845610656, + "loss": 0.0677, + "theoretical_loss": 3.449908313445321, + "tokens_seen": 2063859712 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003782699406194832, + "loss": 0.0677, + "theoretical_loss": 3.4498723056333445, + "tokens_seen": 2064121856 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.001222593942657113, + "objective/train/docs_used": 752866, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3336776494979858, + "objective/train/original_loss": 1.3336775302886963, + "objective/train/theoretical_loss": 3.4498363036743336, + "objective/train/tokens_used": 2084844000, + "objective/train/value_avg": -0.0068817138671875, + "objective/train/value_loss": 0.0002020899555645883, + "objective/train/value_max": -1.996755599975586e-05, + "objective/train/value_min": -0.223388671875, + "objective/train/value_reward_corr": 0.5299720695477663, + "objective/train/value_std": 0.0103607177734375, + "objective/train/weight_avg": 1.0013130903244019, + "objective/train/weighted_lm_loss": 1.3344902992248535, + "objective/train/weights_max": 1.1334251165390015, + "objective/train/weights_min": 0.3700779974460602, + "theoretical_loss": 3.4498363036743336, + "tokens_seen": 2064384000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037818969667790083, + "loss": 0.0716, + "theoretical_loss": 3.4498363036743336, + "tokens_seen": 2064384000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037810945273631844, + "loss": 0.0667, + "theoretical_loss": 3.449800307566594, + "tokens_seen": 2064646144 + }, + { + "epoch": 0.63, + "learning_rate": 0.000378029208794736, + "loss": 0.0679, + "theoretical_loss": 3.449764317308432, + "tokens_seen": 2064908288 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003779489648531536, + "loss": 0.0701, + "theoretical_loss": 3.449728332898155, + "tokens_seen": 2065170432 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003778687209115712, + "loss": 0.0661, + "theoretical_loss": 3.4496923543340703, + "tokens_seen": 2065432576 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037778847696998873, + "loss": 0.0691, + "theoretical_loss": 3.4496563816144867, + "tokens_seen": 2065694720 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037770823302840634, + "loss": 0.0676, + "theoretical_loss": 3.449620414737713, + "tokens_seen": 2065956864 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037762798908682395, + "loss": 0.0661, + "theoretical_loss": 3.449584453702059, + "tokens_seen": 2066219008 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003775477451452415, + "loss": 0.0674, + "theoretical_loss": 3.449548498505834, + "tokens_seen": 2066481152 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003774675012036591, + "loss": 0.0714, + "theoretical_loss": 3.4495125491473515, + "tokens_seen": 2066743296 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037738725726207674, + "loss": 0.0672, + "theoretical_loss": 3.449476605624922, + "tokens_seen": 2067005440 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037730701332049435, + "loss": 0.0682, + "theoretical_loss": 3.4494406679368583, + "tokens_seen": 2067267584 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003772267693789119, + "loss": 0.0696, + "theoretical_loss": 3.449404736081474, + "tokens_seen": 2067529728 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.001186740817502141, + "objective/train/docs_used": 753921, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5087406635284424, + "objective/train/original_loss": 1.5087406635284424, + "objective/train/theoretical_loss": 3.4493867723405094, + "objective/train/tokens_used": 2088120800, + "objective/train/value_avg": -0.01195526123046875, + "objective/train/value_loss": 0.000287308037513867, + "objective/train/value_max": -2.3186206817626953e-05, + "objective/train/value_min": -0.3701171875, + "objective/train/value_reward_corr": 0.7702874414541618, + "objective/train/value_std": 0.023345947265625, + "objective/train/weight_avg": 1.0013281106948853, + "objective/train/weighted_lm_loss": 1.509456753730774, + "objective/train/weights_max": 1.3135128021240234, + "objective/train/weights_min": 0.6228498816490173, + "theoretical_loss": 3.4493867723405094, + "tokens_seen": 2067660800 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037714652543732947, + "loss": 0.0713, + "theoretical_loss": 3.4493688100570825, + "tokens_seen": 2067791872 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003770662814957471, + "loss": 0.0702, + "theoretical_loss": 3.4493328898619993, + "tokens_seen": 2068054016 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037698603755416464, + "loss": 0.0677, + "theoretical_loss": 3.449296975494539, + "tokens_seen": 2068316160 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037690579361258225, + "loss": 0.0666, + "theoretical_loss": 3.449261066953018, + "tokens_seen": 2068578304 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037682554967099986, + "loss": 0.0686, + "theoretical_loss": 3.4492251642357536, + "tokens_seen": 2068840448 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003767453057294175, + "loss": 0.0696, + "theoretical_loss": 3.4491892673410627, + "tokens_seen": 2069102592 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037666506178783503, + "loss": 0.069, + "theoretical_loss": 3.449153376267264, + "tokens_seen": 2069364736 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765848178462526, + "loss": 0.068, + "theoretical_loss": 3.449117491012676, + "tokens_seen": 2069626880 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003765045739046702, + "loss": 0.0673, + "theoretical_loss": 3.449081611575618, + "tokens_seen": 2069889024 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037642432996308776, + "loss": 0.0675, + "theoretical_loss": 3.4490457379544113, + "tokens_seen": 2070151168 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003763440860215054, + "loss": 0.0706, + "theoretical_loss": 3.4490098701473757, + "tokens_seen": 2070413312 + }, + { + "epoch": 0.63, + "learning_rate": 0.000376263842079923, + "loss": 0.0672, + "theoretical_loss": 3.448974008152834, + "tokens_seen": 2070675456 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.0002597297716420144, + "objective/train/docs_used": 755047, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2841310501098633, + "objective/train/original_loss": 1.2841309309005737, + "objective/train/theoretical_loss": 3.4489381519691085, + "objective/train/tokens_used": 2091397600, + "objective/train/value_avg": -0.0073089599609375, + "objective/train/value_loss": 0.00023801308998372406, + "objective/train/value_max": -6.401538848876953e-05, + "objective/train/value_min": -0.2763671875, + "objective/train/value_reward_corr": 0.7057852962488432, + "objective/train/value_std": 0.013275146484375, + "objective/train/weight_avg": 1.0003691911697388, + "objective/train/weighted_lm_loss": 1.2843565940856934, + "objective/train/weights_max": 1.14552640914917, + "objective/train/weights_min": 0.3723408579826355, + "theoretical_loss": 3.4489381519691085, + "tokens_seen": 2070937600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003761835981383406, + "loss": 0.0684, + "theoretical_loss": 3.4489381519691085, + "tokens_seen": 2070937600 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037610335419675816, + "loss": 0.0707, + "theoretical_loss": 3.4489023015945213, + "tokens_seen": 2071199744 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003760231102551757, + "loss": 0.0682, + "theoretical_loss": 3.448866457027397, + "tokens_seen": 2071461888 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037594286631359333, + "loss": 0.0685, + "theoretical_loss": 3.4488306182660597, + "tokens_seen": 2071724032 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003758626223720109, + "loss": 0.0713, + "theoretical_loss": 3.448794785308835, + "tokens_seen": 2071986176 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003757823784304285, + "loss": 0.0687, + "theoretical_loss": 3.4487589581540483, + "tokens_seen": 2072248320 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003757021344888461, + "loss": 0.0677, + "theoretical_loss": 3.4487231368000266, + "tokens_seen": 2072510464 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003756218905472637, + "loss": 0.0674, + "theoretical_loss": 3.448687321245097, + "tokens_seen": 2072772608 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003755416466056813, + "loss": 0.066, + "theoretical_loss": 3.4486515114875873, + "tokens_seen": 2073034752 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003754614026640989, + "loss": 0.0704, + "theoretical_loss": 3.448615707525826, + "tokens_seen": 2073296896 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037538115872251646, + "loss": 0.0686, + "theoretical_loss": 3.448579909358143, + "tokens_seen": 2073559040 + }, + { + "epoch": 0.63, + "learning_rate": 0.000375300914780934, + "loss": 0.0685, + "theoretical_loss": 3.448544116982868, + "tokens_seen": 2073821184 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037522067083935163, + "loss": 0.0696, + "theoretical_loss": 3.448508330398332, + "tokens_seen": 2074083328 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 6.10949209658429e-05, + "objective/train/docs_used": 756110, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3281543254852295, + "objective/train/original_loss": 1.3281540870666504, + "objective/train/theoretical_loss": 3.4484904392770694, + "objective/train/tokens_used": 2094674400, + "objective/train/value_avg": -0.00695037841796875, + "objective/train/value_loss": 0.00031872920226305723, + "objective/train/value_max": -4.100799560546875e-05, + "objective/train/value_min": -0.329345703125, + "objective/train/value_reward_corr": 0.6203797536959921, + "objective/train/value_std": 0.0136566162109375, + "objective/train/weight_avg": 1.0001968145370483, + "objective/train/weighted_lm_loss": 1.328813076019287, + "objective/train/weights_max": 1.3900582790374756, + "objective/train/weights_min": 0.372374951839447, + "theoretical_loss": 3.4484904392770694, + "tokens_seen": 2074214400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037514042689776924, + "loss": 0.0672, + "theoretical_loss": 3.448472549602866, + "tokens_seen": 2074345472 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003750601829561868, + "loss": 0.0652, + "theoretical_loss": 3.4484367745948026, + "tokens_seen": 2074607616 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003749799390146044, + "loss": 0.0697, + "theoretical_loss": 3.4484010053724736, + "tokens_seen": 2074869760 + }, + { + "epoch": 0.63, + "learning_rate": 0.000374899695073022, + "loss": 0.0687, + "theoretical_loss": 3.448365241934214, + "tokens_seen": 2075131904 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003748194511314396, + "loss": 0.0673, + "theoretical_loss": 3.4483294842783563, + "tokens_seen": 2075394048 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037473920718985714, + "loss": 0.0697, + "theoretical_loss": 3.4482937324032368, + "tokens_seen": 2075656192 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037465896324827475, + "loss": 0.0709, + "theoretical_loss": 3.4482579863071905, + "tokens_seen": 2075918336 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037457871930669237, + "loss": 0.0682, + "theoretical_loss": 3.4482222459885534, + "tokens_seen": 2076180480 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003744984753651099, + "loss": 0.0678, + "theoretical_loss": 3.4481865114456625, + "tokens_seen": 2076442624 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037441823142352754, + "loss": 0.0671, + "theoretical_loss": 3.448150782676856, + "tokens_seen": 2076704768 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037433798748194515, + "loss": 0.0641, + "theoretical_loss": 3.4481150596804717, + "tokens_seen": 2076966912 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003742577435403627, + "loss": 0.0668, + "theoretical_loss": 3.4480793424548493, + "tokens_seen": 2077229056 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.0016278502298519015, + "objective/train/docs_used": 757304, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4312312602996826, + "objective/train/original_loss": 1.4312312602996826, + "objective/train/theoretical_loss": 3.4480436309983276, + "objective/train/tokens_used": 2097951200, + "objective/train/value_avg": -0.007061004638671875, + "objective/train/value_loss": 0.00019786780467256904, + "objective/train/value_max": -4.6133995056152344e-05, + "objective/train/value_min": -0.6015625, + "objective/train/value_reward_corr": 0.6384379863088263, + "objective/train/value_std": 0.01360321044921875, + "objective/train/weight_avg": 1.0017173290252686, + "objective/train/weighted_lm_loss": 1.4329873323440552, + "objective/train/weights_max": 1.4410042762756348, + "objective/train/weights_min": 0.3698126971721649, + "theoretical_loss": 3.4480436309983276, + "tokens_seen": 2077491200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037417749959878027, + "loss": 0.0667, + "theoretical_loss": 3.4480436309983276, + "tokens_seen": 2077491200 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003740972556571979, + "loss": 0.0688, + "theoretical_loss": 3.448007925309247, + "tokens_seen": 2077753344 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003740170117156155, + "loss": 0.0694, + "theoretical_loss": 3.447972225385949, + "tokens_seen": 2078015488 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037393676777403305, + "loss": 0.065, + "theoretical_loss": 3.447936531226776, + "tokens_seen": 2078277632 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037385652383245066, + "loss": 0.0651, + "theoretical_loss": 3.4479008428300686, + "tokens_seen": 2078539776 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003737762798908683, + "loss": 0.0677, + "theoretical_loss": 3.447865160194171, + "tokens_seen": 2078801920 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003736960359492859, + "loss": 0.0687, + "theoretical_loss": 3.447829483317428, + "tokens_seen": 2079064064 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003736157920077034, + "loss": 0.0657, + "theoretical_loss": 3.4477938121981824, + "tokens_seen": 2079326208 + }, + { + "epoch": 0.63, + "learning_rate": 0.000373535548066121, + "loss": 0.0644, + "theoretical_loss": 3.44775814683478, + "tokens_seen": 2079588352 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003734553041245386, + "loss": 0.0665, + "theoretical_loss": 3.447722487225567, + "tokens_seen": 2079850496 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003733750601829562, + "loss": 0.064, + "theoretical_loss": 3.4476868333688904, + "tokens_seen": 2080112640 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003732948162413738, + "loss": 0.0685, + "theoretical_loss": 3.447651185263096, + "tokens_seen": 2080374784 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003732145722997914, + "loss": 0.0677, + "theoretical_loss": 3.447615542906532, + "tokens_seen": 2080636928 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.0009759876993484795, + "objective/train/docs_used": 758504, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2792775630950928, + "objective/train/original_loss": 1.2792774438858032, + "objective/train/theoretical_loss": 3.447597723883696, + "objective/train/tokens_used": 2101228000, + "objective/train/value_avg": -0.0057220458984375, + "objective/train/value_loss": 0.00013313665112946182, + "objective/train/value_max": -7.367134094238281e-05, + "objective/train/value_min": -0.2230224609375, + "objective/train/value_reward_corr": 0.5767379735236927, + "objective/train/value_std": 0.00873565673828125, + "objective/train/weight_avg": 1.0010370016098022, + "objective/train/weighted_lm_loss": 1.2802315950393677, + "objective/train/weights_max": 1.1462258100509644, + "objective/train/weights_min": 0.3722386062145233, + "theoretical_loss": 3.447597723883696, + "tokens_seen": 2080768000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037313432835820896, + "loss": 0.0691, + "theoretical_loss": 3.4475799062975483, + "tokens_seen": 2080899072 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003730540844166265, + "loss": 0.0679, + "theoretical_loss": 3.4475442754344927, + "tokens_seen": 2081161216 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037297384047504413, + "loss": 0.0641, + "theoretical_loss": 3.447508650315716, + "tokens_seen": 2081423360 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037289359653346174, + "loss": 0.0663, + "theoretical_loss": 3.4474730309395687, + "tokens_seen": 2081685504 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003728133525918793, + "loss": 0.0708, + "theoretical_loss": 3.4474374173044025, + "tokens_seen": 2081947648 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003727331086502969, + "loss": 0.0684, + "theoretical_loss": 3.4474018094085683, + "tokens_seen": 2082209792 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003726528647087145, + "loss": 0.0687, + "theoretical_loss": 3.4473662072504196, + "tokens_seen": 2082471936 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003725726207671321, + "loss": 0.0675, + "theoretical_loss": 3.4473306108283097, + "tokens_seen": 2082734080 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003724923768255497, + "loss": 0.0677, + "theoretical_loss": 3.4472950201405923, + "tokens_seen": 2082996224 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037241213288396726, + "loss": 0.0655, + "theoretical_loss": 3.4472594351856225, + "tokens_seen": 2083258368 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037233188894238487, + "loss": 0.0686, + "theoretical_loss": 3.447223855961756, + "tokens_seen": 2083520512 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003722516450008024, + "loss": 0.0675, + "theoretical_loss": 3.447188282467348, + "tokens_seen": 2083782656 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.000588573922868818, + "objective/train/docs_used": 759560, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3722100257873535, + "objective/train/original_loss": 1.3722100257873535, + "objective/train/theoretical_loss": 3.4471527147007555, + "objective/train/tokens_used": 2104504800, + "objective/train/value_avg": -0.00565338134765625, + "objective/train/value_loss": 0.00010087020200444385, + "objective/train/value_max": -6.014108657836914e-05, + "objective/train/value_min": -0.7431640625, + "objective/train/value_reward_corr": 0.7463114972528345, + "objective/train/value_std": 0.01171875, + "objective/train/weight_avg": 1.0006376504898071, + "objective/train/weighted_lm_loss": 1.3735826015472412, + "objective/train/weights_max": 1.217918872833252, + "objective/train/weights_min": 0.534896731376648, + "theoretical_loss": 3.4471527147007555, + "tokens_seen": 2084044800 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037217140105922004, + "loss": 0.0711, + "theoretical_loss": 3.4471527147007555, + "tokens_seen": 2084044800 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037209115711763765, + "loss": 0.0671, + "theoretical_loss": 3.447117152660337, + "tokens_seen": 2084306944 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003720109131760552, + "loss": 0.0685, + "theoretical_loss": 3.447081596344449, + "tokens_seen": 2084569088 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003719306692344728, + "loss": 0.0679, + "theoretical_loss": 3.447046045751451, + "tokens_seen": 2084831232 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003718504252928904, + "loss": 0.0659, + "theoretical_loss": 3.447010500879703, + "tokens_seen": 2085093376 + }, + { + "epoch": 0.63, + "learning_rate": 0.000371770181351308, + "loss": 0.065, + "theoretical_loss": 3.4469749617275642, + "tokens_seen": 2085355520 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037168993740972555, + "loss": 0.0661, + "theoretical_loss": 3.4469394282933967, + "tokens_seen": 2085617664 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037160969346814316, + "loss": 0.0699, + "theoretical_loss": 3.4469039005755606, + "tokens_seen": 2085879808 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003715294495265608, + "loss": 0.069, + "theoretical_loss": 3.446868378572419, + "tokens_seen": 2086141952 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037144920558497834, + "loss": 0.0692, + "theoretical_loss": 3.446832862282334, + "tokens_seen": 2086404096 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037136896164339595, + "loss": 0.0657, + "theoretical_loss": 3.44679735170367, + "tokens_seen": 2086666240 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003712887177018135, + "loss": 0.0657, + "theoretical_loss": 3.4467618468347903, + "tokens_seen": 2086928384 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037120847376023107, + "loss": 0.0668, + "theoretical_loss": 3.44672634767406, + "tokens_seen": 2087190528 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.0005934222135692835, + "objective/train/docs_used": 760861, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2901841402053833, + "objective/train/original_loss": 1.2901840209960938, + "objective/train/theoretical_loss": 3.446708600233741, + "objective/train/tokens_used": 2107781600, + "objective/train/value_avg": -0.00887298583984375, + "objective/train/value_loss": 0.0004026810929644853, + "objective/train/value_max": -5.918741226196289e-05, + "objective/train/value_min": -0.87939453125, + "objective/train/value_reward_corr": 0.670880919662618, + "objective/train/value_std": 0.016845703125, + "objective/train/weight_avg": 1.0007684230804443, + "objective/train/weighted_lm_loss": 1.2911221981048584, + "objective/train/weights_max": 1.1675496101379395, + "objective/train/weights_min": 0.3683558702468872, + "theoretical_loss": 3.446708600233741, + "tokens_seen": 2087321600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003711282298186487, + "loss": 0.0669, + "theoretical_loss": 3.4466908542198453, + "tokens_seen": 2087452672 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003710479858770663, + "loss": 0.0679, + "theoretical_loss": 3.4466553664705124, + "tokens_seen": 2087714816 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003709677419354839, + "loss": 0.067, + "theoretical_loss": 3.446619884424427, + "tokens_seen": 2087976960 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037088749799390146, + "loss": 0.0678, + "theoretical_loss": 3.446584408079958, + "tokens_seen": 2088239104 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003708072540523191, + "loss": 0.0691, + "theoretical_loss": 3.4465489374354727, + "tokens_seen": 2088501248 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003707270101107367, + "loss": 0.0689, + "theoretical_loss": 3.4465134724893405, + "tokens_seen": 2088763392 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003706467661691542, + "loss": 0.0681, + "theoretical_loss": 3.446478013239931, + "tokens_seen": 2089025536 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003705665222275718, + "loss": 0.0694, + "theoretical_loss": 3.4464425596856136, + "tokens_seen": 2089287680 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003704862782859894, + "loss": 0.0691, + "theoretical_loss": 3.446407111824761, + "tokens_seen": 2089549824 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037040603434440703, + "loss": 0.0684, + "theoretical_loss": 3.4463716696557425, + "tokens_seen": 2089811968 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003703257904028246, + "loss": 0.0674, + "theoretical_loss": 3.4463362331769325, + "tokens_seen": 2090074112 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003702455464612422, + "loss": 0.0671, + "theoretical_loss": 3.4463008023867028, + "tokens_seen": 2090336256 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.0002297655591974035, + "objective/train/docs_used": 762048, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4020442962646484, + "objective/train/original_loss": 1.4020442962646484, + "objective/train/theoretical_loss": 3.4462653772834266, + "objective/train/tokens_used": 2111058400, + "objective/train/value_avg": -0.00919342041015625, + "objective/train/value_loss": 0.00034913301351480186, + "objective/train/value_max": -3.0219554901123047e-05, + "objective/train/value_min": -0.4814453125, + "objective/train/value_reward_corr": 0.747774063492229, + "objective/train/value_std": 0.01910400390625, + "objective/train/weight_avg": 1.0003926753997803, + "objective/train/weighted_lm_loss": 1.4031243324279785, + "objective/train/weights_max": 1.618411898612976, + "objective/train/weights_min": 0.3690573275089264, + "theoretical_loss": 3.4462653772834266, + "tokens_seen": 2090598400 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003701653025196598, + "loss": 0.0695, + "theoretical_loss": 3.4462653772834266, + "tokens_seen": 2090598400 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003700850585780773, + "loss": 0.0679, + "theoretical_loss": 3.446229957865479, + "tokens_seen": 2090860544 + }, + { + "epoch": 0.63, + "learning_rate": 0.00037000481463649493, + "loss": 0.0691, + "theoretical_loss": 3.4461945441312354, + "tokens_seen": 2091122688 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036992457069491254, + "loss": 0.0671, + "theoretical_loss": 3.4461591360790704, + "tokens_seen": 2091384832 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036984432675333015, + "loss": 0.0657, + "theoretical_loss": 3.44612373370736, + "tokens_seen": 2091646976 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003697640828117477, + "loss": 0.0687, + "theoretical_loss": 3.446088337014482, + "tokens_seen": 2091909120 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003696838388701653, + "loss": 0.0685, + "theoretical_loss": 3.446052945998814, + "tokens_seen": 2092171264 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036960359492858294, + "loss": 0.0681, + "theoretical_loss": 3.446017560658734, + "tokens_seen": 2092433408 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036952335098700044, + "loss": 0.0674, + "theoretical_loss": 3.4459821809926208, + "tokens_seen": 2092695552 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036944310704541805, + "loss": 0.0668, + "theoretical_loss": 3.4459468069988537, + "tokens_seen": 2092957696 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036936286310383567, + "loss": 0.0667, + "theoretical_loss": 3.445911438675814, + "tokens_seen": 2093219840 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003692826191622532, + "loss": 0.0639, + "theoretical_loss": 3.445876076021882, + "tokens_seen": 2093481984 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036920237522067084, + "loss": 0.0684, + "theoretical_loss": 3.4458407190354388, + "tokens_seen": 2093744128 + }, + { + "epoch": 0.63, + "objective/train/advantage_avg": 0.00022437794541474432, + "objective/train/docs_used": 763180, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3575186729431152, + "objective/train/original_loss": 1.3575185537338257, + "objective/train/theoretical_loss": 3.4458230426670204, + "objective/train/tokens_used": 2114335200, + "objective/train/value_avg": -0.007144927978515625, + "objective/train/value_loss": 0.00013656125520356, + "objective/train/value_max": -6.204843521118164e-05, + "objective/train/value_min": -0.31640625, + "objective/train/value_reward_corr": 0.7161289931619385, + "objective/train/value_std": 0.01316070556640625, + "objective/train/weight_avg": 1.0002896785736084, + "objective/train/weighted_lm_loss": 1.3582242727279663, + "objective/train/weights_max": 1.2010502815246582, + "objective/train/weights_min": 0.4233749806880951, + "theoretical_loss": 3.4458230426670204, + "tokens_seen": 2093875200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036912213127908845, + "loss": 0.0666, + "theoretical_loss": 3.445805367714868, + "tokens_seen": 2094006272 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036904188733750606, + "loss": 0.0662, + "theoretical_loss": 3.445770022058551, + "tokens_seen": 2094268416 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003689616433959236, + "loss": 0.0677, + "theoretical_loss": 3.445734682064873, + "tokens_seen": 2094530560 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003688813994543412, + "loss": 0.0663, + "theoretical_loss": 3.4456993477322166, + "tokens_seen": 2094792704 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003688011555127588, + "loss": 0.0668, + "theoretical_loss": 3.4456640190589676, + "tokens_seen": 2095054848 + }, + { + "epoch": 0.63, + "learning_rate": 0.00036872091157117635, + "loss": 0.0686, + "theoretical_loss": 3.445628696043512, + "tokens_seen": 2095316992 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036864066762959396, + "loss": 0.0665, + "theoretical_loss": 3.4455933786842348, + "tokens_seen": 2095579136 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003685604236880116, + "loss": 0.07, + "theoretical_loss": 3.4455580669795243, + "tokens_seen": 2095841280 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003684801797464292, + "loss": 0.0665, + "theoretical_loss": 3.4455227609277674, + "tokens_seen": 2096103424 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036839993580484675, + "loss": 0.071, + "theoretical_loss": 3.445487460527352, + "tokens_seen": 2096365568 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003683196918632643, + "loss": 0.0694, + "theoretical_loss": 3.4454521657766675, + "tokens_seen": 2096627712 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003682394479216819, + "loss": 0.0699, + "theoretical_loss": 3.445416876674104, + "tokens_seen": 2096889856 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.0014173869276419282, + "objective/train/docs_used": 764413, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4790658950805664, + "objective/train/original_loss": 1.4790658950805664, + "objective/train/theoretical_loss": 3.44538159321805, + "objective/train/tokens_used": 2117612000, + "objective/train/value_avg": -0.007617950439453125, + "objective/train/value_loss": 0.00010832334373844787, + "objective/train/value_max": -7.033348083496094e-05, + "objective/train/value_min": -0.2322998046875, + "objective/train/value_reward_corr": 0.7426590004130484, + "objective/train/value_std": 0.013214111328125, + "objective/train/weight_avg": 1.0014710426330566, + "objective/train/weighted_lm_loss": 1.4811238050460815, + "objective/train/weights_max": 1.1480461359024048, + "objective/train/weights_min": 0.8195471167564392, + "theoretical_loss": 3.44538159321805, + "tokens_seen": 2097152000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003681592039800995, + "loss": 0.0677, + "theoretical_loss": 3.44538159321805, + "tokens_seen": 2097152000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003680789600385171, + "loss": 0.0677, + "theoretical_loss": 3.4453463154068977, + "tokens_seen": 2097414144 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003679987160969347, + "loss": 0.0691, + "theoretical_loss": 3.4453110432390384, + "tokens_seen": 2097676288 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003679184721553523, + "loss": 0.0682, + "theoretical_loss": 3.4452757767128643, + "tokens_seen": 2097938432 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003678382282137699, + "loss": 0.0643, + "theoretical_loss": 3.445240515826768, + "tokens_seen": 2098200576 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003677579842721875, + "loss": 0.0696, + "theoretical_loss": 3.4452052605791432, + "tokens_seen": 2098462720 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036767774033060504, + "loss": 0.0671, + "theoretical_loss": 3.4451700109683836, + "tokens_seen": 2098724864 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003675974963890226, + "loss": 0.0684, + "theoretical_loss": 3.4451347669928856, + "tokens_seen": 2098987008 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003675172524474402, + "loss": 0.0692, + "theoretical_loss": 3.4450995286510424, + "tokens_seen": 2099249152 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036743700850585783, + "loss": 0.0688, + "theoretical_loss": 3.445064295941252, + "tokens_seen": 2099511296 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036735676456427544, + "loss": 0.0665, + "theoretical_loss": 3.4450290688619103, + "tokens_seen": 2099773440 + }, + { + "epoch": 0.64, + "learning_rate": 0.000367276520622693, + "loss": 0.0637, + "theoretical_loss": 3.444993847411415, + "tokens_seen": 2100035584 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003671962766811106, + "loss": 0.069, + "theoretical_loss": 3.4449586315881637, + "tokens_seen": 2100297728 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.000818012107629329, + "objective/train/docs_used": 765555, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2233686447143555, + "objective/train/original_loss": 1.2233686447143555, + "objective/train/theoretical_loss": 3.4449410257862545, + "objective/train/tokens_used": 2120888800, + "objective/train/value_avg": -0.006542205810546875, + "objective/train/value_loss": 0.00015375840303022414, + "objective/train/value_max": -5.3048133850097656e-05, + "objective/train/value_min": -0.29150390625, + "objective/train/value_reward_corr": 0.6431444439728494, + "objective/train/value_std": 0.01126861572265625, + "objective/train/weight_avg": 1.000891923904419, + "objective/train/weighted_lm_loss": 1.2243032455444336, + "objective/train/weights_max": 1.3039275407791138, + "objective/train/weights_min": 0.6103137731552124, + "theoretical_loss": 3.4449410257862545, + "tokens_seen": 2100428800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036711603273952817, + "loss": 0.065, + "theoretical_loss": 3.4449234213905564, + "tokens_seen": 2100559872 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036703578879794573, + "loss": 0.0652, + "theoretical_loss": 3.4448882168169908, + "tokens_seen": 2100822016 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036695554485636334, + "loss": 0.0654, + "theoretical_loss": 3.444853017865869, + "tokens_seen": 2101084160 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036687530091478095, + "loss": 0.0646, + "theoretical_loss": 3.4448178245355896, + "tokens_seen": 2101346304 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003667950569731985, + "loss": 0.0658, + "theoretical_loss": 3.4447826368245558, + "tokens_seen": 2101608448 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003667148130316161, + "loss": 0.0659, + "theoretical_loss": 3.4447474547311683, + "tokens_seen": 2101870592 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036663456909003374, + "loss": 0.0669, + "theoretical_loss": 3.4447122782538306, + "tokens_seen": 2102132736 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003665543251484513, + "loss": 0.0656, + "theoretical_loss": 3.444677107390946, + "tokens_seen": 2102394880 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036647408120686885, + "loss": 0.0644, + "theoretical_loss": 3.444641942140918, + "tokens_seen": 2102657024 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036639383726528647, + "loss": 0.0662, + "theoretical_loss": 3.4446067825021514, + "tokens_seen": 2102919168 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003663135933237041, + "loss": 0.0657, + "theoretical_loss": 3.444571628473052, + "tokens_seen": 2103181312 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036623334938212164, + "loss": 0.0662, + "theoretical_loss": 3.4445364800520255, + "tokens_seen": 2103443456 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": -0.0001629337202757597, + "objective/train/docs_used": 766743, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4270471334457397, + "objective/train/original_loss": 1.4270472526550293, + "objective/train/theoretical_loss": 3.444501337237478, + "objective/train/tokens_used": 2124165600, + "objective/train/value_avg": -0.01125335693359375, + "objective/train/value_loss": 0.0007498456398025155, + "objective/train/value_max": -4.988908767700195e-05, + "objective/train/value_min": -0.91162109375, + "objective/train/value_reward_corr": 0.6557396160471719, + "objective/train/value_std": 0.0236358642578125, + "objective/train/weight_avg": 1.000152587890625, + "objective/train/weighted_lm_loss": 1.4258575439453125, + "objective/train/weights_max": 1.6239049434661865, + "objective/train/weights_min": 0.23518386483192444, + "theoretical_loss": 3.444501337237478, + "tokens_seen": 2103705600 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036615310544053925, + "loss": 0.0694, + "theoretical_loss": 3.444501337237478, + "tokens_seen": 2103705600 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036607286149895686, + "loss": 0.0668, + "theoretical_loss": 3.4444662000278177, + "tokens_seen": 2103967744 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003659926175573745, + "loss": 0.0626, + "theoretical_loss": 3.4444310684214514, + "tokens_seen": 2104229888 + }, + { + "epoch": 0.64, + "learning_rate": 0.000365912373615792, + "loss": 0.0638, + "theoretical_loss": 3.444395942416789, + "tokens_seen": 2104492032 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003658321296742096, + "loss": 0.0654, + "theoretical_loss": 3.4443608220122384, + "tokens_seen": 2104754176 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003657518857326272, + "loss": 0.0668, + "theoretical_loss": 3.44432570720621, + "tokens_seen": 2105016320 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036567164179104476, + "loss": 0.0622, + "theoretical_loss": 3.4442905979971146, + "tokens_seen": 2105278464 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003655913978494624, + "loss": 0.0672, + "theoretical_loss": 3.4442554943833628, + "tokens_seen": 2105540608 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036551115390788, + "loss": 0.0673, + "theoretical_loss": 3.444220396363367, + "tokens_seen": 2105802752 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003654309099662976, + "loss": 0.0642, + "theoretical_loss": 3.444185303935539, + "tokens_seen": 2106064896 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003653506660247151, + "loss": 0.0659, + "theoretical_loss": 3.4441502170982927, + "tokens_seen": 2106327040 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003652704220831327, + "loss": 0.0689, + "theoretical_loss": 3.444115135850041, + "tokens_seen": 2106589184 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036519017814155033, + "loss": 0.0685, + "theoretical_loss": 3.444080060189199, + "tokens_seen": 2106851328 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.0003502106701489538, + "objective/train/docs_used": 767939, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3586885929107666, + "objective/train/original_loss": 1.3586885929107666, + "objective/train/theoretical_loss": 3.444062524453561, + "objective/train/tokens_used": 2127442400, + "objective/train/value_avg": -0.00652313232421875, + "objective/train/value_loss": 0.0001283576712012291, + "objective/train/value_max": -3.916025161743164e-05, + "objective/train/value_min": -0.291259765625, + "objective/train/value_reward_corr": 0.7288310765868844, + "objective/train/value_std": 0.01226806640625, + "objective/train/weight_avg": 1.000410556793213, + "objective/train/weighted_lm_loss": 1.3594164848327637, + "objective/train/weights_max": 1.1665211915969849, + "objective/train/weights_min": 0.3985501825809479, + "theoretical_loss": 3.444062524453561, + "tokens_seen": 2106982400 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003651099341999679, + "loss": 0.0692, + "theoretical_loss": 3.444044990114181, + "tokens_seen": 2107113472 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003650296902583855, + "loss": 0.0677, + "theoretical_loss": 3.4440099256234036, + "tokens_seen": 2107375616 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003649494463168031, + "loss": 0.0688, + "theoretical_loss": 3.4439748667152825, + "tokens_seen": 2107637760 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036486920237522067, + "loss": 0.0667, + "theoretical_loss": 3.443939813388235, + "tokens_seen": 2107899904 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036478895843363823, + "loss": 0.0659, + "theoretical_loss": 3.4439047656406783, + "tokens_seen": 2108162048 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036470871449205584, + "loss": 0.0691, + "theoretical_loss": 3.443869723471031, + "tokens_seen": 2108424192 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036462847055047346, + "loss": 0.067, + "theoretical_loss": 3.443834686877712, + "tokens_seen": 2108686336 + }, + { + "epoch": 0.64, + "learning_rate": 0.000364548226608891, + "loss": 0.0674, + "theoretical_loss": 3.443799655859141, + "tokens_seen": 2108948480 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036446798266730863, + "loss": 0.0673, + "theoretical_loss": 3.443764630413738, + "tokens_seen": 2109210624 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036438773872572624, + "loss": 0.0655, + "theoretical_loss": 3.4437296105399238, + "tokens_seen": 2109472768 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003643074947841438, + "loss": 0.0693, + "theoretical_loss": 3.4436945962361203, + "tokens_seen": 2109734912 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003642272508425614, + "loss": 0.0664, + "theoretical_loss": 3.4436595875007487, + "tokens_seen": 2109997056 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 6.14080490777269e-05, + "objective/train/docs_used": 769058, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2044825553894043, + "objective/train/original_loss": 1.2044825553894043, + "objective/train/theoretical_loss": 3.443624584332233, + "objective/train/tokens_used": 2130719200, + "objective/train/value_avg": -0.006946563720703125, + "objective/train/value_loss": 0.00027350004529580474, + "objective/train/value_max": -3.11732292175293e-05, + "objective/train/value_min": -0.69775390625, + "objective/train/value_reward_corr": 0.5902221741881584, + "objective/train/value_std": 0.011749267578125, + "objective/train/weight_avg": 1.0001745223999023, + "objective/train/weighted_lm_loss": 1.2041305303573608, + "objective/train/weights_max": 1.1966127157211304, + "objective/train/weights_min": 0.3685459494590759, + "theoretical_loss": 3.443624584332233, + "tokens_seen": 2110259200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036414700690097897, + "loss": 0.0672, + "theoretical_loss": 3.443624584332233, + "tokens_seen": 2110259200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003640667629593966, + "loss": 0.0677, + "theoretical_loss": 3.4435895867289963, + "tokens_seen": 2110521344 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036398651901781414, + "loss": 0.0666, + "theoretical_loss": 3.443554594689462, + "tokens_seen": 2110783488 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036390627507623175, + "loss": 0.0638, + "theoretical_loss": 3.443519608212055, + "tokens_seen": 2111045632 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036382603113464937, + "loss": 0.064, + "theoretical_loss": 3.4434846272952013, + "tokens_seen": 2111307776 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003637457871930669, + "loss": 0.067, + "theoretical_loss": 3.4434496519373265, + "tokens_seen": 2111569920 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036366554325148454, + "loss": 0.0684, + "theoretical_loss": 3.443414682136857, + "tokens_seen": 2111832064 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003635852993099021, + "loss": 0.0662, + "theoretical_loss": 3.4433797178922205, + "tokens_seen": 2112094208 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003635050553683197, + "loss": 0.0647, + "theoretical_loss": 3.4433447592018447, + "tokens_seen": 2112356352 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036342481142673727, + "loss": 0.0672, + "theoretical_loss": 3.443309806064158, + "tokens_seen": 2112618496 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003633445674851549, + "loss": 0.0664, + "theoretical_loss": 3.44327485847759, + "tokens_seen": 2112880640 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003632643235435725, + "loss": 0.0669, + "theoretical_loss": 3.44323991644057, + "tokens_seen": 2113142784 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036318407960199005, + "loss": 0.0649, + "theoretical_loss": 3.443204979951529, + "tokens_seen": 2113404928 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.0007937829359434545, + "objective/train/docs_used": 770311, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2841191291809082, + "objective/train/original_loss": 1.2841191291809082, + "objective/train/theoretical_loss": 3.4431875137870103, + "objective/train/tokens_used": 2133996000, + "objective/train/value_avg": -0.0069122314453125, + "objective/train/value_loss": 0.00022702872229274362, + "objective/train/value_max": -7.659196853637695e-05, + "objective/train/value_min": -0.361328125, + "objective/train/value_reward_corr": 0.6486080520614381, + "objective/train/value_std": 0.0117950439453125, + "objective/train/weight_avg": 1.0008962154388428, + "objective/train/weighted_lm_loss": 1.2856719493865967, + "objective/train/weights_max": 1.2625762224197388, + "objective/train/weights_min": 0.3683098554611206, + "theoretical_loss": 3.4431875137870103, + "tokens_seen": 2113536000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036310383566040766, + "loss": 0.0642, + "theoretical_loss": 3.4431700490088977, + "tokens_seen": 2113667072 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003630235917188253, + "loss": 0.0675, + "theoretical_loss": 3.443135123611108, + "tokens_seen": 2113929216 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003629433477772428, + "loss": 0.0669, + "theoretical_loss": 3.4431002037565923, + "tokens_seen": 2114191360 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003628631038356604, + "loss": 0.0658, + "theoretical_loss": 3.4430652894437834, + "tokens_seen": 2114453504 + }, + { + "epoch": 0.64, + "learning_rate": 0.000362782859894078, + "loss": 0.0699, + "theoretical_loss": 3.4430303806711158, + "tokens_seen": 2114715648 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003627026159524956, + "loss": 0.0675, + "theoretical_loss": 3.442995477437023, + "tokens_seen": 2114977792 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003626223720109132, + "loss": 0.066, + "theoretical_loss": 3.44296057973994, + "tokens_seen": 2115239936 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003625421280693308, + "loss": 0.0671, + "theoretical_loss": 3.442925687578302, + "tokens_seen": 2115502080 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003624618841277484, + "loss": 0.0672, + "theoretical_loss": 3.442890800950546, + "tokens_seen": 2115764224 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003623816401861659, + "loss": 0.0637, + "theoretical_loss": 3.442855919855109, + "tokens_seen": 2116026368 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003623013962445835, + "loss": 0.0634, + "theoretical_loss": 3.4428210442904277, + "tokens_seen": 2116288512 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036222115230300113, + "loss": 0.0654, + "theoretical_loss": 3.4427861742549406, + "tokens_seen": 2116550656 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.00024320276861544698, + "objective/train/docs_used": 771474, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2714860439300537, + "objective/train/original_loss": 1.2714862823486328, + "objective/train/theoretical_loss": 3.442751309747086, + "objective/train/tokens_used": 2137272800, + "objective/train/value_avg": -0.009307861328125, + "objective/train/value_loss": 0.00032071780879050493, + "objective/train/value_max": -4.988908767700195e-05, + "objective/train/value_min": -0.431396484375, + "objective/train/value_reward_corr": 0.735485788504759, + "objective/train/value_std": 0.0192718505859375, + "objective/train/weight_avg": 1.00038480758667, + "objective/train/weighted_lm_loss": 1.271644115447998, + "objective/train/weights_max": 1.3421837091445923, + "objective/train/weights_min": 0.3815314471721649, + "theoretical_loss": 3.442751309747086, + "tokens_seen": 2116812800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036214090836141874, + "loss": 0.0647, + "theoretical_loss": 3.442751309747086, + "tokens_seen": 2116812800 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003620606644198363, + "loss": 0.0663, + "theoretical_loss": 3.442716450765304, + "tokens_seen": 2117074944 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003619804204782539, + "loss": 0.0667, + "theoretical_loss": 3.4426815973080345, + "tokens_seen": 2117337088 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003619001765366715, + "loss": 0.0685, + "theoretical_loss": 3.4426467493737176, + "tokens_seen": 2117599232 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036181993259508903, + "loss": 0.0663, + "theoretical_loss": 3.4426119069607948, + "tokens_seen": 2117861376 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036173968865350664, + "loss": 0.0675, + "theoretical_loss": 3.4425770700677085, + "tokens_seen": 2118123520 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036165944471192426, + "loss": 0.0644, + "theoretical_loss": 3.4425422386929005, + "tokens_seen": 2118385664 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036157920077034187, + "loss": 0.0657, + "theoretical_loss": 3.4425074128348148, + "tokens_seen": 2118647808 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003614989568287594, + "loss": 0.0664, + "theoretical_loss": 3.442472592491894, + "tokens_seen": 2118909952 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036141871288717704, + "loss": 0.0669, + "theoretical_loss": 3.442437777662584, + "tokens_seen": 2119172096 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036133846894559465, + "loss": 0.0677, + "theoretical_loss": 3.4424029683453288, + "tokens_seen": 2119434240 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003612582250040122, + "loss": 0.0683, + "theoretical_loss": 3.442368164538575, + "tokens_seen": 2119696384 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036117798106242977, + "loss": 0.0655, + "theoretical_loss": 3.4423333662407676, + "tokens_seen": 2119958528 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": -0.0005769357667304575, + "objective/train/docs_used": 772695, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3378933668136597, + "objective/train/original_loss": 1.3378934860229492, + "objective/train/theoretical_loss": 3.442315969157234, + "objective/train/tokens_used": 2140549600, + "objective/train/value_avg": -0.00926971435546875, + "objective/train/value_loss": 0.0003164195513818413, + "objective/train/value_max": -2.110004425048828e-05, + "objective/train/value_min": -0.26513671875, + "objective/train/value_reward_corr": 0.7086236395211777, + "objective/train/value_std": 0.0174713134765625, + "objective/train/weight_avg": 0.999565601348877, + "objective/train/weighted_lm_loss": 1.3366376161575317, + "objective/train/weights_max": 1.3036091327667236, + "objective/train/weights_min": 0.368646502494812, + "theoretical_loss": 3.442315969157234, + "tokens_seen": 2120089600 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003610977371208474, + "loss": 0.0673, + "theoretical_loss": 3.442298573450355, + "tokens_seen": 2120220672 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036101749317926494, + "loss": 0.0659, + "theoretical_loss": 3.4422637861657837, + "tokens_seen": 2120482816 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036093724923768255, + "loss": 0.0657, + "theoretical_loss": 3.442229004385502, + "tokens_seen": 2120744960 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036085700529610017, + "loss": 0.0661, + "theoretical_loss": 3.44219422810796, + "tokens_seen": 2121007104 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003607767613545178, + "loss": 0.0644, + "theoretical_loss": 3.4421594573316057, + "tokens_seen": 2121269248 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036069651741293534, + "loss": 0.0669, + "theoretical_loss": 3.44212469205489, + "tokens_seen": 2121531392 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003606162734713529, + "loss": 0.0693, + "theoretical_loss": 3.4420899322762635, + "tokens_seen": 2121793536 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003605360295297705, + "loss": 0.0681, + "theoretical_loss": 3.4420551779941775, + "tokens_seen": 2122055680 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036045578558818807, + "loss": 0.0676, + "theoretical_loss": 3.4420204292070844, + "tokens_seen": 2122317824 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003603755416466057, + "loss": 0.0661, + "theoretical_loss": 3.4419856859134357, + "tokens_seen": 2122579968 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003602952977050233, + "loss": 0.0665, + "theoretical_loss": 3.4419509481116854, + "tokens_seen": 2122842112 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003602150537634409, + "loss": 0.0688, + "theoretical_loss": 3.441916215800288, + "tokens_seen": 2123104256 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": -0.00034259771928191185, + "objective/train/docs_used": 773937, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2587370872497559, + "objective/train/original_loss": 1.2587368488311768, + "objective/train/theoretical_loss": 3.441881488977697, + "objective/train/tokens_used": 2143826400, + "objective/train/value_avg": -0.01271820068359375, + "objective/train/value_loss": 0.00030873133800923824, + "objective/train/value_max": -4.363059997558594e-05, + "objective/train/value_min": -0.39013671875, + "objective/train/value_reward_corr": 0.75689359226375, + "objective/train/value_std": 0.0196685791015625, + "objective/train/weight_avg": 0.9998003840446472, + "objective/train/weighted_lm_loss": 1.2579768896102905, + "objective/train/weights_max": 1.375349760055542, + "objective/train/weights_min": 0.3696674406528473, + "theoretical_loss": 3.441881488977697, + "tokens_seen": 2123366400 + }, + { + "epoch": 0.64, + "learning_rate": 0.00036013480982185846, + "loss": 0.0635, + "theoretical_loss": 3.441881488977697, + "tokens_seen": 2123366400 + }, + { + "epoch": 0.64, + "learning_rate": 0.000360054565880276, + "loss": 0.0633, + "theoretical_loss": 3.441846767642368, + "tokens_seen": 2123628544 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035997432193869363, + "loss": 0.0663, + "theoretical_loss": 3.4418120517927564, + "tokens_seen": 2123890688 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003598940779971112, + "loss": 0.0686, + "theoretical_loss": 3.441777341427319, + "tokens_seen": 2124152832 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003598138340555288, + "loss": 0.0702, + "theoretical_loss": 3.4417426365445127, + "tokens_seen": 2124414976 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003597335901139464, + "loss": 0.0644, + "theoretical_loss": 3.4417079371427945, + "tokens_seen": 2124677120 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035965334617236403, + "loss": 0.0665, + "theoretical_loss": 3.441673243220624, + "tokens_seen": 2124939264 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003595731022307816, + "loss": 0.0656, + "theoretical_loss": 3.4416385547764583, + "tokens_seen": 2125201408 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003594928582891992, + "loss": 0.0698, + "theoretical_loss": 3.4416038718087583, + "tokens_seen": 2125463552 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035941261434761676, + "loss": 0.067, + "theoretical_loss": 3.4415691943159836, + "tokens_seen": 2125725696 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003593323704060343, + "loss": 0.0672, + "theoretical_loss": 3.441534522296595, + "tokens_seen": 2125987840 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035925212646445193, + "loss": 0.0643, + "theoretical_loss": 3.441499855749054, + "tokens_seen": 2126249984 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035917188252286954, + "loss": 0.0648, + "theoretical_loss": 3.441465194671822, + "tokens_seen": 2126512128 + }, + { + "epoch": 0.64, + "objective/train/advantage_avg": 0.0008241595351137221, + "objective/train/docs_used": 774962, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2446796894073486, + "objective/train/original_loss": 1.244679570198059, + "objective/train/theoretical_loss": 3.441447866184092, + "objective/train/tokens_used": 2147103200, + "objective/train/value_avg": -0.0080718994140625, + "objective/train/value_loss": 0.0002672381524462253, + "objective/train/value_max": -6.556510925292969e-05, + "objective/train/value_min": -0.96728515625, + "objective/train/value_reward_corr": 0.7998461563914081, + "objective/train/value_std": 0.0204925537109375, + "objective/train/weight_avg": 1.0009392499923706, + "objective/train/weighted_lm_loss": 1.2461644411087036, + "objective/train/weights_max": 1.2684307098388672, + "objective/train/weights_min": 0.3747805058956146, + "theoretical_loss": 3.441447866184092, + "tokens_seen": 2126643200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035909163858128715, + "loss": 0.0664, + "theoretical_loss": 3.4414305390633624, + "tokens_seen": 2126774272 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003590113946397047, + "loss": 0.0648, + "theoretical_loss": 3.4413958889221385, + "tokens_seen": 2127036416 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003589311506981223, + "loss": 0.0652, + "theoretical_loss": 3.4413612442466133, + "tokens_seen": 2127298560 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003588509067565399, + "loss": 0.0668, + "theoretical_loss": 3.441326605035252, + "tokens_seen": 2127560704 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035877066281495744, + "loss": 0.067, + "theoretical_loss": 3.441291971286519, + "tokens_seen": 2127822848 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035869041887337506, + "loss": 0.0644, + "theoretical_loss": 3.44125734299888, + "tokens_seen": 2128084992 + }, + { + "epoch": 0.64, + "learning_rate": 0.00035861017493179267, + "loss": 0.0677, + "theoretical_loss": 3.4412227201708028, + "tokens_seen": 2128347136 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003585299309902102, + "loss": 0.0679, + "theoretical_loss": 3.4411881028007527, + "tokens_seen": 2128609280 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035844968704862784, + "loss": 0.0646, + "theoretical_loss": 3.4411534908871984, + "tokens_seen": 2128871424 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035836944310704545, + "loss": 0.068, + "theoretical_loss": 3.4411188844286067, + "tokens_seen": 2129133568 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035828919916546306, + "loss": 0.0676, + "theoretical_loss": 3.441084283423448, + "tokens_seen": 2129395712 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035820895522388057, + "loss": 0.0691, + "theoretical_loss": 3.4410496878701906, + "tokens_seen": 2129657856 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.0012429844355210662, + "objective/train/docs_used": 775953, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.301253318786621, + "objective/train/original_loss": 1.301253080368042, + "objective/train/theoretical_loss": 3.441015097767305, + "objective/train/tokens_used": 2150380000, + "objective/train/value_avg": -0.0076141357421875, + "objective/train/value_loss": 0.00014564950834028423, + "objective/train/value_max": -8.285045623779297e-05, + "objective/train/value_min": -0.197021484375, + "objective/train/value_reward_corr": 0.7439541772547014, + "objective/train/value_std": 0.01273345947265625, + "objective/train/weight_avg": 1.001312017440796, + "objective/train/weighted_lm_loss": 1.3027527332305908, + "objective/train/weights_max": 1.1942163705825806, + "objective/train/weights_min": 0.3970448672771454, + "theoretical_loss": 3.441015097767305, + "tokens_seen": 2129920000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003581287112822982, + "loss": 0.065, + "theoretical_loss": 3.441015097767305, + "tokens_seen": 2129920000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003580484673407158, + "loss": 0.0689, + "theoretical_loss": 3.440980513113262, + "tokens_seen": 2130182144 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035796822339913335, + "loss": 0.0678, + "theoretical_loss": 3.4409459339065327, + "tokens_seen": 2130444288 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035788797945755096, + "loss": 0.0704, + "theoretical_loss": 3.4409113601455887, + "tokens_seen": 2130706432 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003578077355159686, + "loss": 0.0663, + "theoretical_loss": 3.4408767918289027, + "tokens_seen": 2130968576 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003577274915743862, + "loss": 0.0664, + "theoretical_loss": 3.4408422289549483, + "tokens_seen": 2131230720 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003576472476328037, + "loss": 0.0654, + "theoretical_loss": 3.4408076715221982, + "tokens_seen": 2131492864 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003575670036912213, + "loss": 0.0658, + "theoretical_loss": 3.440773119529128, + "tokens_seen": 2131755008 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003574867597496389, + "loss": 0.0676, + "theoretical_loss": 3.440738572974212, + "tokens_seen": 2132017152 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003574065158080565, + "loss": 0.0688, + "theoretical_loss": 3.440704031855926, + "tokens_seen": 2132279296 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003573262718664741, + "loss": 0.0694, + "theoretical_loss": 3.440669496172746, + "tokens_seen": 2132541440 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003572460279248917, + "loss": 0.0683, + "theoretical_loss": 3.4406349659231488, + "tokens_seen": 2132803584 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003571657839833093, + "loss": 0.0638, + "theoretical_loss": 3.4406004411056124, + "tokens_seen": 2133065728 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.0007250725175254047, + "objective/train/docs_used": 777226, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2577089071273804, + "objective/train/original_loss": 1.2577087879180908, + "objective/train/theoretical_loss": 3.440583180733391, + "objective/train/tokens_used": 2153656800, + "objective/train/value_avg": -0.0079498291015625, + "objective/train/value_loss": 0.00016377944848500192, + "objective/train/value_max": -7.033348083496094e-05, + "objective/train/value_min": -0.2166748046875, + "objective/train/value_reward_corr": 0.6768520456626519, + "objective/train/value_std": 0.01261138916015625, + "objective/train/weight_avg": 1.0007988214492798, + "objective/train/weighted_lm_loss": 1.2580339908599854, + "objective/train/weights_max": 1.2066720724105835, + "objective/train/weights_min": 0.3683168888092041, + "theoretical_loss": 3.440583180733391, + "tokens_seen": 2133196800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003570855400417268, + "loss": 0.0642, + "theoretical_loss": 3.4405659217186138, + "tokens_seen": 2133327872 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035700529610014443, + "loss": 0.0641, + "theoretical_loss": 3.440531407760633, + "tokens_seen": 2133590016 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035692505215856204, + "loss": 0.0672, + "theoretical_loss": 3.4404968992301477, + "tokens_seen": 2133852160 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003568448082169796, + "loss": 0.0639, + "theoretical_loss": 3.440462396125639, + "tokens_seen": 2134114304 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003567645642753972, + "loss": 0.0676, + "theoretical_loss": 3.4404278984455874, + "tokens_seen": 2134376448 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035668432033381483, + "loss": 0.0686, + "theoretical_loss": 3.4403934061884733, + "tokens_seen": 2134638592 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003566040763922324, + "loss": 0.068, + "theoretical_loss": 3.4403589193527786, + "tokens_seen": 2134900736 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035652383245065, + "loss": 0.0676, + "theoretical_loss": 3.440324437936986, + "tokens_seen": 2135162880 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035644358850906756, + "loss": 0.0661, + "theoretical_loss": 3.4402899619395786, + "tokens_seen": 2135425024 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035636334456748517, + "loss": 0.063, + "theoretical_loss": 3.440255491359039, + "tokens_seen": 2135687168 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035628310062590273, + "loss": 0.0671, + "theoretical_loss": 3.440221026193852, + "tokens_seen": 2135949312 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035620285668432034, + "loss": 0.0644, + "theoretical_loss": 3.4401865664425024, + "tokens_seen": 2136211456 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.0004655689117498696, + "objective/train/docs_used": 778288, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3205798864364624, + "objective/train/original_loss": 1.3205798864364624, + "objective/train/theoretical_loss": 3.440152112103476, + "objective/train/tokens_used": 2156933600, + "objective/train/value_avg": -0.00875091552734375, + "objective/train/value_loss": 0.00023888830037321895, + "objective/train/value_max": -5.918741226196289e-05, + "objective/train/value_min": -0.34912109375, + "objective/train/value_reward_corr": 0.666328584912689, + "objective/train/value_std": 0.01355743408203125, + "objective/train/weight_avg": 1.0005714893341064, + "objective/train/weighted_lm_loss": 1.3204231262207031, + "objective/train/weights_max": 1.0932023525238037, + "objective/train/weights_min": 0.37003564834594727, + "theoretical_loss": 3.440152112103476, + "tokens_seen": 2136473600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035612261274273795, + "loss": 0.066, + "theoretical_loss": 3.440152112103476, + "tokens_seen": 2136473600 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003560423688011555, + "loss": 0.0668, + "theoretical_loss": 3.4401176631752577, + "tokens_seen": 2136735744 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003559621248595731, + "loss": 0.0657, + "theoretical_loss": 3.4400832196563353, + "tokens_seen": 2136997888 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003558818809179907, + "loss": 0.0667, + "theoretical_loss": 3.440048781545195, + "tokens_seen": 2137260032 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003558016369764083, + "loss": 0.0691, + "theoretical_loss": 3.440014348840325, + "tokens_seen": 2137522176 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035572139303482585, + "loss": 0.0674, + "theoretical_loss": 3.439979921540214, + "tokens_seen": 2137784320 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035564114909324347, + "loss": 0.0666, + "theoretical_loss": 3.4399454996433514, + "tokens_seen": 2138046464 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003555609051516611, + "loss": 0.0684, + "theoretical_loss": 3.4399110831482256, + "tokens_seen": 2138308608 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035548066121007864, + "loss": 0.0672, + "theoretical_loss": 3.4398766720533276, + "tokens_seen": 2138570752 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035540041726849625, + "loss": 0.0691, + "theoretical_loss": 3.4398422663571484, + "tokens_seen": 2138832896 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003553201733269138, + "loss": 0.0662, + "theoretical_loss": 3.439807866058179, + "tokens_seen": 2139095040 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003552399293853314, + "loss": 0.0697, + "theoretical_loss": 3.439773471154912, + "tokens_seen": 2139357184 + }, + { + "epoch": 0.65, + "learning_rate": 0.000355159685443749, + "loss": 0.0675, + "theoretical_loss": 3.4397390816458397, + "tokens_seen": 2139619328 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.0012974967248737812, + "objective/train/docs_used": 779442, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3170843124389648, + "objective/train/original_loss": 1.3170843124389648, + "objective/train/theoretical_loss": 3.439721888913656, + "objective/train/tokens_used": 2160210400, + "objective/train/value_avg": -0.00569915771484375, + "objective/train/value_loss": 0.00020559185941237956, + "objective/train/value_max": -3.1948089599609375e-05, + "objective/train/value_min": -0.98291015625, + "objective/train/value_reward_corr": 0.6640440608056907, + "objective/train/value_std": 0.011474609375, + "objective/train/weight_avg": 1.0013810396194458, + "objective/train/weighted_lm_loss": 1.3188480138778687, + "objective/train/weights_max": 1.2265723943710327, + "objective/train/weights_min": 0.25205379724502563, + "theoretical_loss": 3.439721888913656, + "tokens_seen": 2139750400 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003550794415021666, + "loss": 0.0673, + "theoretical_loss": 3.4397046975294554, + "tokens_seen": 2139881472 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003549991975605842, + "loss": 0.069, + "theoretical_loss": 3.4396703188042537, + "tokens_seen": 2140143616 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035491895361900176, + "loss": 0.0653, + "theoretical_loss": 3.4396359454687278, + "tokens_seen": 2140405760 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003548387096774194, + "loss": 0.0649, + "theoretical_loss": 3.4396015775213735, + "tokens_seen": 2140667904 + }, + { + "epoch": 0.65, + "learning_rate": 0.000354758465735837, + "loss": 0.0663, + "theoretical_loss": 3.439567214960687, + "tokens_seen": 2140930048 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003546782217942545, + "loss": 0.0646, + "theoretical_loss": 3.439532857785164, + "tokens_seen": 2141192192 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003545979778526721, + "loss": 0.0658, + "theoretical_loss": 3.4394985059933014, + "tokens_seen": 2141454336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003545177339110897, + "loss": 0.0668, + "theoretical_loss": 3.439464159583597, + "tokens_seen": 2141716480 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035443748996950733, + "loss": 0.0654, + "theoretical_loss": 3.4394298185545487, + "tokens_seen": 2141978624 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003543572460279249, + "loss": 0.0682, + "theoretical_loss": 3.439395482904655, + "tokens_seen": 2142240768 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003542770020863425, + "loss": 0.0648, + "theoretical_loss": 3.439361152632416, + "tokens_seen": 2142502912 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003541967581447601, + "loss": 0.0661, + "theoretical_loss": 3.4393268277363305, + "tokens_seen": 2142765056 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.001232887152582407, + "objective/train/docs_used": 780500, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1809592247009277, + "objective/train/original_loss": 1.1809592247009277, + "objective/train/theoretical_loss": 3.4392925082149004, + "objective/train/tokens_used": 2163487200, + "objective/train/value_avg": -0.006099700927734375, + "objective/train/value_loss": 0.00020563544239848852, + "objective/train/value_max": -4.684925079345703e-05, + "objective/train/value_min": -0.39013671875, + "objective/train/value_reward_corr": 0.6734060620060285, + "objective/train/value_std": 0.01251983642578125, + "objective/train/weight_avg": 1.0013197660446167, + "objective/train/weighted_lm_loss": 1.1824616193771362, + "objective/train/weights_max": 1.163423776626587, + "objective/train/weights_min": 0.36951375007629395, + "theoretical_loss": 3.4392925082149004, + "tokens_seen": 2143027200 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003541165142031776, + "loss": 0.0684, + "theoretical_loss": 3.4392925082149004, + "tokens_seen": 2143027200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035403627026159523, + "loss": 0.0679, + "theoretical_loss": 3.4392581940666256, + "tokens_seen": 2143289344 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035395602632001284, + "loss": 0.0647, + "theoretical_loss": 3.4392238852900086, + "tokens_seen": 2143551488 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035387578237843046, + "loss": 0.0665, + "theoretical_loss": 3.4391895818835514, + "tokens_seen": 2143813632 + }, + { + "epoch": 0.65, + "learning_rate": 0.000353795538436848, + "loss": 0.0638, + "theoretical_loss": 3.439155283845757, + "tokens_seen": 2144075776 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035371529449526563, + "loss": 0.0688, + "theoretical_loss": 3.4391209911751286, + "tokens_seen": 2144337920 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035363505055368324, + "loss": 0.0653, + "theoretical_loss": 3.4390867038701716, + "tokens_seen": 2144600064 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003535548066121008, + "loss": 0.0669, + "theoretical_loss": 3.439052421929389, + "tokens_seen": 2144862208 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035347456267051836, + "loss": 0.0643, + "theoretical_loss": 3.439018145351287, + "tokens_seen": 2145124352 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035339431872893597, + "loss": 0.0681, + "theoretical_loss": 3.4389838741343715, + "tokens_seen": 2145386496 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003533140747873536, + "loss": 0.066, + "theoretical_loss": 3.438949608277149, + "tokens_seen": 2145648640 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035323383084577114, + "loss": 0.0649, + "theoretical_loss": 3.438915347778127, + "tokens_seen": 2145910784 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035315358690418875, + "loss": 0.0686, + "theoretical_loss": 3.4388810926358127, + "tokens_seen": 2146172928 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.0006946653011254966, + "objective/train/docs_used": 781712, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.253171682357788, + "objective/train/original_loss": 1.253171443939209, + "objective/train/theoretical_loss": 3.4388639670729546, + "objective/train/tokens_used": 2166764000, + "objective/train/value_avg": -0.008758544921875, + "objective/train/value_loss": 0.0003154955920763314, + "objective/train/value_max": -6.157159805297852e-05, + "objective/train/value_min": -0.465087890625, + "objective/train/value_reward_corr": 0.6826854809281973, + "objective/train/value_std": 0.017791748046875, + "objective/train/weight_avg": 1.0008307695388794, + "objective/train/weighted_lm_loss": 1.254067063331604, + "objective/train/weights_max": 1.5921541452407837, + "objective/train/weights_min": 0.3686527907848358, + "theoretical_loss": 3.4388639670729546, + "tokens_seen": 2146304000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035307334296260637, + "loss": 0.0651, + "theoretical_loss": 3.4388468428487142, + "tokens_seen": 2146435072 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003529930990210239, + "loss": 0.0658, + "theoretical_loss": 3.4388125984153413, + "tokens_seen": 2146697216 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003529128550794415, + "loss": 0.0665, + "theoretical_loss": 3.4387783593342025, + "tokens_seen": 2146959360 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003528326111378591, + "loss": 0.07, + "theoretical_loss": 3.438744125603809, + "tokens_seen": 2147221504 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035275236719627665, + "loss": 0.0652, + "theoretical_loss": 3.4387098972226706, + "tokens_seen": 2147483648 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035267212325469427, + "loss": 0.0623, + "theoretical_loss": 3.4386756741892994, + "tokens_seen": 2147745792 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003525918793131119, + "loss": 0.0694, + "theoretical_loss": 3.4386414565022063, + "tokens_seen": 2148007936 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003525116353715295, + "loss": 0.0668, + "theoretical_loss": 3.438607244159905, + "tokens_seen": 2148270080 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035243139142994705, + "loss": 0.0655, + "theoretical_loss": 3.4385730371609076, + "tokens_seen": 2148532224 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003523511474883646, + "loss": 0.0673, + "theoretical_loss": 3.438538835503728, + "tokens_seen": 2148794368 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003522709035467822, + "loss": 0.0678, + "theoretical_loss": 3.438504639186881, + "tokens_seen": 2149056512 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003521906596051998, + "loss": 0.0655, + "theoretical_loss": 3.4384704482088813, + "tokens_seen": 2149318656 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.00044662863365374506, + "objective/train/docs_used": 782928, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1585477590560913, + "objective/train/original_loss": 1.1585477590560913, + "objective/train/theoretical_loss": 3.4384362625682434, + "objective/train/tokens_used": 2170040800, + "objective/train/value_avg": -0.005054473876953125, + "objective/train/value_loss": 9.015284013003111e-05, + "objective/train/value_max": -3.11732292175293e-05, + "objective/train/value_min": -0.219970703125, + "objective/train/value_reward_corr": 0.7191277656844346, + "objective/train/value_std": 0.00902557373046875, + "objective/train/weight_avg": 1.0004909038543701, + "objective/train/weighted_lm_loss": 1.1594600677490234, + "objective/train/weights_max": 1.1269409656524658, + "objective/train/weights_min": 0.8207743763923645, + "theoretical_loss": 3.4384362625682434, + "tokens_seen": 2149580800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003521104156636174, + "loss": 0.0636, + "theoretical_loss": 3.4384362625682434, + "tokens_seen": 2149580800 + }, + { + "epoch": 0.65, + "learning_rate": 0.000352030171722035, + "loss": 0.0657, + "theoretical_loss": 3.438402082263485, + "tokens_seen": 2149842944 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003519499277804526, + "loss": 0.0681, + "theoretical_loss": 3.4383679072931215, + "tokens_seen": 2150105088 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003518696838388702, + "loss": 0.0655, + "theoretical_loss": 3.438333737655671, + "tokens_seen": 2150367232 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003517894398972878, + "loss": 0.0687, + "theoretical_loss": 3.43829957334965, + "tokens_seen": 2150629376 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035170919595570535, + "loss": 0.0655, + "theoretical_loss": 3.4382654143735785, + "tokens_seen": 2150891520 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003516289520141229, + "loss": 0.0678, + "theoretical_loss": 3.438231260725975, + "tokens_seen": 2151153664 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003515487080725405, + "loss": 0.0661, + "theoretical_loss": 3.4381971124053594, + "tokens_seen": 2151415808 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035146846413095813, + "loss": 0.0682, + "theoretical_loss": 3.438162969410251, + "tokens_seen": 2151677952 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035138822018937574, + "loss": 0.0675, + "theoretical_loss": 3.438128831739171, + "tokens_seen": 2151940096 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003513079762477933, + "loss": 0.0686, + "theoretical_loss": 3.4380946993906414, + "tokens_seen": 2152202240 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003512277323062109, + "loss": 0.0664, + "theoretical_loss": 3.4380605723631836, + "tokens_seen": 2152464384 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035114748836462847, + "loss": 0.0677, + "theoretical_loss": 3.4380264506553204, + "tokens_seen": 2152726528 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.000863985565956682, + "objective/train/docs_used": 784071, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3599311113357544, + "objective/train/original_loss": 1.3599311113357544, + "objective/train/theoretical_loss": 3.4380093917957755, + "objective/train/tokens_used": 2173317600, + "objective/train/value_avg": -0.0035495758056640625, + "objective/train/value_loss": 7.935243775136769e-05, + "objective/train/value_max": -5.7816505432128906e-05, + "objective/train/value_min": -0.1246337890625, + "objective/train/value_reward_corr": 0.49532153914812793, + "objective/train/value_std": 0.0050201416015625, + "objective/train/weight_avg": 1.0008996725082397, + "objective/train/weighted_lm_loss": 1.3618212938308716, + "objective/train/weights_max": 1.1140156984329224, + "objective/train/weights_min": 0.37567365169525146, + "theoretical_loss": 3.4380093917957755, + "tokens_seen": 2152857600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035106724442304603, + "loss": 0.0669, + "theoretical_loss": 3.4379923342655747, + "tokens_seen": 2152988672 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035098700048146364, + "loss": 0.0671, + "theoretical_loss": 3.437958223192471, + "tokens_seen": 2153250816 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035090675653988126, + "loss": 0.0693, + "theoretical_loss": 3.4379241174345325, + "tokens_seen": 2153512960 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035082651259829887, + "loss": 0.0635, + "theoretical_loss": 3.437890016990285, + "tokens_seen": 2153775104 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003507462686567164, + "loss": 0.0668, + "theoretical_loss": 3.4378559218582536, + "tokens_seen": 2154037248 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035066602471513404, + "loss": 0.065, + "theoretical_loss": 3.4378218320369647, + "tokens_seen": 2154299392 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003505857807735516, + "loss": 0.0653, + "theoretical_loss": 3.437787747524945, + "tokens_seen": 2154561536 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035050553683196916, + "loss": 0.0666, + "theoretical_loss": 3.4377536683207217, + "tokens_seen": 2154823680 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035042529289038677, + "loss": 0.0672, + "theoretical_loss": 3.4377195944228225, + "tokens_seen": 2155085824 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003503450489488044, + "loss": 0.0654, + "theoretical_loss": 3.4376855258297763, + "tokens_seen": 2155347968 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035026480500722194, + "loss": 0.0663, + "theoretical_loss": 3.4376514625401113, + "tokens_seen": 2155610112 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035018456106563955, + "loss": 0.0665, + "theoretical_loss": 3.437617404552358, + "tokens_seen": 2155872256 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.0005617592250928283, + "objective/train/docs_used": 785270, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3288520574569702, + "objective/train/original_loss": 1.3288519382476807, + "objective/train/theoretical_loss": 3.4375833518650465, + "objective/train/tokens_used": 2176594400, + "objective/train/value_avg": -0.007755279541015625, + "objective/train/value_loss": 0.0002667588123586029, + "objective/train/value_max": -4.07099723815918e-05, + "objective/train/value_min": -0.849609375, + "objective/train/value_reward_corr": 0.736639202225347, + "objective/train/value_std": 0.018310546875, + "objective/train/weight_avg": 1.0006906986236572, + "objective/train/weighted_lm_loss": 1.3300721645355225, + "objective/train/weights_max": 2.143956422805786, + "objective/train/weights_min": 0.5176886916160583, + "theoretical_loss": 3.4375833518650465, + "tokens_seen": 2156134400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00035010431712405717, + "loss": 0.0678, + "theoretical_loss": 3.4375833518650465, + "tokens_seen": 2156134400 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003500240731824748, + "loss": 0.0661, + "theoretical_loss": 3.4375493044767076, + "tokens_seen": 2156396544 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003499438292408923, + "loss": 0.0649, + "theoretical_loss": 3.4375152623858725, + "tokens_seen": 2156658688 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003498635852993099, + "loss": 0.0664, + "theoretical_loss": 3.437481225591073, + "tokens_seen": 2156920832 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003497833413577275, + "loss": 0.0665, + "theoretical_loss": 3.4374471940908418, + "tokens_seen": 2157182976 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034970309741614507, + "loss": 0.0647, + "theoretical_loss": 3.4374131678837125, + "tokens_seen": 2157445120 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003496228534745627, + "loss": 0.0661, + "theoretical_loss": 3.4373791469682184, + "tokens_seen": 2157707264 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003495426095329803, + "loss": 0.0662, + "theoretical_loss": 3.437345131342894, + "tokens_seen": 2157969408 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003494623655913979, + "loss": 0.0664, + "theoretical_loss": 3.437311121006274, + "tokens_seen": 2158231552 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003493821216498154, + "loss": 0.0669, + "theoretical_loss": 3.4372771159568942, + "tokens_seen": 2158493696 + }, + { + "epoch": 0.65, + "learning_rate": 0.000349301877708233, + "loss": 0.0678, + "theoretical_loss": 3.43724311619329, + "tokens_seen": 2158755840 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034922163376665063, + "loss": 0.066, + "theoretical_loss": 3.437209121713999, + "tokens_seen": 2159017984 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003491413898250682, + "loss": 0.0669, + "theoretical_loss": 3.4371751325175586, + "tokens_seen": 2159280128 + }, + { + "epoch": 0.65, + "objective/train/advantage_avg": 0.00039554800605401397, + "objective/train/docs_used": 786428, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3268866539001465, + "objective/train/original_loss": 1.3268866539001465, + "objective/train/theoretical_loss": 3.4371581398999496, + "objective/train/tokens_used": 2179871200, + "objective/train/value_avg": -0.011016845703125, + "objective/train/value_loss": 0.000277909217402339, + "objective/train/value_max": -4.13060188293457e-05, + "objective/train/value_min": -0.7626953125, + "objective/train/value_reward_corr": 0.8255372104578527, + "objective/train/value_std": 0.0236968994140625, + "objective/train/weight_avg": 1.0005244016647339, + "objective/train/weighted_lm_loss": 1.3273314237594604, + "objective/train/weights_max": 1.3975777626037598, + "objective/train/weights_min": 0.40282997488975525, + "theoretical_loss": 3.4371581398999496, + "tokens_seen": 2159411200 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003490611458834858, + "loss": 0.0656, + "theoretical_loss": 3.437141148602505, + "tokens_seen": 2159542272 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003489809019419034, + "loss": 0.0643, + "theoretical_loss": 3.437107169967378, + "tokens_seen": 2159804416 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034890065800032103, + "loss": 0.0671, + "theoretical_loss": 3.437073196610716, + "tokens_seen": 2160066560 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003488204140587386, + "loss": 0.0643, + "theoretical_loss": 3.4370392285310594, + "tokens_seen": 2160328704 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034874017011715615, + "loss": 0.0641, + "theoretical_loss": 3.437005265726947, + "tokens_seen": 2160590848 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034865992617557376, + "loss": 0.0653, + "theoretical_loss": 3.4369713081969206, + "tokens_seen": 2160852992 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003485796822339913, + "loss": 0.068, + "theoretical_loss": 3.436937355939521, + "tokens_seen": 2161115136 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034849943829240893, + "loss": 0.0682, + "theoretical_loss": 3.4369034089532904, + "tokens_seen": 2161377280 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034841919435082654, + "loss": 0.0654, + "theoretical_loss": 3.436869467236771, + "tokens_seen": 2161639424 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003483389504092441, + "loss": 0.0644, + "theoretical_loss": 3.4368355307885063, + "tokens_seen": 2161901568 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003482587064676617, + "loss": 0.0667, + "theoretical_loss": 3.436801599607039, + "tokens_seen": 2162163712 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034817846252607927, + "loss": 0.0681, + "theoretical_loss": 3.4367676736909143, + "tokens_seen": 2162425856 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.001019138377159834, + "objective/train/docs_used": 787745, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4617555141448975, + "objective/train/original_loss": 1.461755633354187, + "objective/train/theoretical_loss": 3.4367337530386766, + "objective/train/tokens_used": 2183148000, + "objective/train/value_avg": -0.00838470458984375, + "objective/train/value_loss": 0.00023759658506605774, + "objective/train/value_max": -3.4809112548828125e-05, + "objective/train/value_min": -0.732421875, + "objective/train/value_reward_corr": 0.7778528006169423, + "objective/train/value_std": 0.0183563232421875, + "objective/train/weight_avg": 1.0011314153671265, + "objective/train/weighted_lm_loss": 1.4620612859725952, + "objective/train/weights_max": 1.4613994359970093, + "objective/train/weights_min": 0.5150108933448792, + "theoretical_loss": 3.4367337530386766, + "tokens_seen": 2162688000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003480982185844969, + "loss": 0.0669, + "theoretical_loss": 3.4367337530386766, + "tokens_seen": 2162688000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034801797464291444, + "loss": 0.068, + "theoretical_loss": 3.436699837648871, + "tokens_seen": 2162950144 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034793773070133206, + "loss": 0.0692, + "theoretical_loss": 3.4366659275200444, + "tokens_seen": 2163212288 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034785748675974967, + "loss": 0.0677, + "theoretical_loss": 3.436632022650742, + "tokens_seen": 2163474432 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003477772428181672, + "loss": 0.0682, + "theoretical_loss": 3.4365981230395115, + "tokens_seen": 2163736576 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034769699887658484, + "loss": 0.0652, + "theoretical_loss": 3.436564228684901, + "tokens_seen": 2163998720 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003476167549350024, + "loss": 0.066, + "theoretical_loss": 3.436530339585458, + "tokens_seen": 2164260864 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034753651099342, + "loss": 0.0668, + "theoretical_loss": 3.4364964557397317, + "tokens_seen": 2164523008 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034745626705183757, + "loss": 0.0663, + "theoretical_loss": 3.436462577146272, + "tokens_seen": 2164785152 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003473760231102552, + "loss": 0.066, + "theoretical_loss": 3.4364287038036276, + "tokens_seen": 2165047296 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003472957791686728, + "loss": 0.067, + "theoretical_loss": 3.4363948357103506, + "tokens_seen": 2165309440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034721553522709035, + "loss": 0.0668, + "theoretical_loss": 3.436360972864991, + "tokens_seen": 2165571584 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034713529128550796, + "loss": 0.0668, + "theoretical_loss": 3.4363271152661006, + "tokens_seen": 2165833728 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.0009586879750713706, + "objective/train/docs_used": 788845, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3089131116867065, + "objective/train/original_loss": 1.308912992477417, + "objective/train/theoretical_loss": 3.4363101884336293, + "objective/train/tokens_used": 2186424800, + "objective/train/value_avg": -0.0082855224609375, + "objective/train/value_loss": 0.0004079457721672952, + "objective/train/value_max": -1.9252300262451172e-05, + "objective/train/value_min": -0.744140625, + "objective/train/value_reward_corr": 0.7048424748964199, + "objective/train/value_std": 0.0200958251953125, + "objective/train/weight_avg": 1.0011377334594727, + "objective/train/weighted_lm_loss": 1.3098411560058594, + "objective/train/weights_max": 1.4547736644744873, + "objective/train/weights_min": 0.3825574517250061, + "theoretical_loss": 3.4363101884336293, + "tokens_seen": 2165964800 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003470550473439256, + "loss": 0.0683, + "theoretical_loss": 3.4362932629122325, + "tokens_seen": 2166095872 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034697480340234314, + "loss": 0.0728, + "theoretical_loss": 3.436259415801939, + "tokens_seen": 2166358016 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003468945594607607, + "loss": 0.0655, + "theoretical_loss": 3.436225573933773, + "tokens_seen": 2166620160 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003468143155191783, + "loss": 0.0668, + "theoretical_loss": 3.43619173730629, + "tokens_seen": 2166882304 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003467340715775959, + "loss": 0.0676, + "theoretical_loss": 3.4361579059180425, + "tokens_seen": 2167144448 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003466538276360135, + "loss": 0.0677, + "theoretical_loss": 3.4361240797675876, + "tokens_seen": 2167406592 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003465735836944311, + "loss": 0.0691, + "theoretical_loss": 3.43609025885348, + "tokens_seen": 2167668736 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003464933397528487, + "loss": 0.0663, + "theoretical_loss": 3.436056443174276, + "tokens_seen": 2167930880 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003464130958112662, + "loss": 0.0685, + "theoretical_loss": 3.436022632728533, + "tokens_seen": 2168193024 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003463328518696838, + "loss": 0.07, + "theoretical_loss": 3.4359888275148083, + "tokens_seen": 2168455168 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034625260792810143, + "loss": 0.0653, + "theoretical_loss": 3.4359550275316595, + "tokens_seen": 2168717312 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034617236398651904, + "loss": 0.0661, + "theoretical_loss": 3.4359212327776456, + "tokens_seen": 2168979456 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": -0.00022465267102234066, + "objective/train/docs_used": 790125, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5220752954483032, + "objective/train/original_loss": 1.5220751762390137, + "objective/train/theoretical_loss": 3.4358874432513256, + "objective/train/tokens_used": 2189701600, + "objective/train/value_avg": -0.01103973388671875, + "objective/train/value_loss": 0.0004339893057476729, + "objective/train/value_max": -4.649162292480469e-05, + "objective/train/value_min": -0.76220703125, + "objective/train/value_reward_corr": 0.8229982443682127, + "objective/train/value_std": 0.0262908935546875, + "objective/train/weight_avg": 0.9999734163284302, + "objective/train/weighted_lm_loss": 1.522541880607605, + "objective/train/weights_max": 1.6124958992004395, + "objective/train/weights_min": 0.39407527446746826, + "theoretical_loss": 3.4358874432513256, + "tokens_seen": 2169241600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003460921200449366, + "loss": 0.067, + "theoretical_loss": 3.4358874432513256, + "tokens_seen": 2169241600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003460118761033542, + "loss": 0.0658, + "theoretical_loss": 3.435853658951259, + "tokens_seen": 2169503744 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034593163216177183, + "loss": 0.064, + "theoretical_loss": 3.4358198798760067, + "tokens_seen": 2169765888 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034585138822018933, + "loss": 0.066, + "theoretical_loss": 3.4357861060241293, + "tokens_seen": 2170028032 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034577114427860695, + "loss": 0.0668, + "theoretical_loss": 3.4357523373941876, + "tokens_seen": 2170290176 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034569090033702456, + "loss": 0.0676, + "theoretical_loss": 3.4357185739847447, + "tokens_seen": 2170552320 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034561065639544217, + "loss": 0.0659, + "theoretical_loss": 3.4356848157943625, + "tokens_seen": 2170814464 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034553041245385973, + "loss": 0.0684, + "theoretical_loss": 3.4356510628216044, + "tokens_seen": 2171076608 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034545016851227734, + "loss": 0.0696, + "theoretical_loss": 3.435617315065034, + "tokens_seen": 2171338752 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034536992457069495, + "loss": 0.0678, + "theoretical_loss": 3.435583572523216, + "tokens_seen": 2171600896 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003452896806291125, + "loss": 0.0672, + "theoretical_loss": 3.4355498351947142, + "tokens_seen": 2171863040 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034520943668753007, + "loss": 0.0668, + "theoretical_loss": 3.435516103078095, + "tokens_seen": 2172125184 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003451291927459477, + "loss": 0.0688, + "theoretical_loss": 3.435482376171924, + "tokens_seen": 2172387328 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 4.255705061950721e-05, + "objective/train/docs_used": 791273, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3144559860229492, + "objective/train/original_loss": 1.3144559860229492, + "objective/train/theoretical_loss": 3.4354655146723085, + "objective/train/tokens_used": 2192978400, + "objective/train/value_avg": -0.007049560546875, + "objective/train/value_loss": 0.00017210084479302168, + "objective/train/value_max": -2.777576446533203e-05, + "objective/train/value_min": -0.5546875, + "objective/train/value_reward_corr": 0.7298873457361592, + "objective/train/value_std": 0.01337432861328125, + "objective/train/weight_avg": 1.0001221895217896, + "objective/train/weighted_lm_loss": 1.313899278640747, + "objective/train/weights_max": 1.1279730796813965, + "objective/train/weights_min": 0.36996087431907654, + "theoretical_loss": 3.4354655146723085, + "tokens_seen": 2172518400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003450489488043653, + "loss": 0.0654, + "theoretical_loss": 3.435448654474768, + "tokens_seen": 2172649472 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034496870486278285, + "loss": 0.0674, + "theoretical_loss": 3.435414937985194, + "tokens_seen": 2172911616 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034488846092120047, + "loss": 0.0688, + "theoretical_loss": 3.43538122670177, + "tokens_seen": 2173173760 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003448082169796181, + "loss": 0.0691, + "theoretical_loss": 3.435347520623063, + "tokens_seen": 2173435904 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034472797303803564, + "loss": 0.0677, + "theoretical_loss": 3.435313819747644, + "tokens_seen": 2173698048 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003446477290964532, + "loss": 0.0663, + "theoretical_loss": 3.4352801240740805, + "tokens_seen": 2173960192 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003445674851548708, + "loss": 0.0643, + "theoretical_loss": 3.435246433600943, + "tokens_seen": 2174222336 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034448724121328837, + "loss": 0.0645, + "theoretical_loss": 3.4352127483268022, + "tokens_seen": 2174484480 + }, + { + "epoch": 0.66, + "learning_rate": 0.000344406997271706, + "loss": 0.0644, + "theoretical_loss": 3.4351790682502297, + "tokens_seen": 2174746624 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003443267533301236, + "loss": 0.0675, + "theoretical_loss": 3.435145393369796, + "tokens_seen": 2175008768 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003442465093885412, + "loss": 0.0671, + "theoretical_loss": 3.4351117236840745, + "tokens_seen": 2175270912 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034416626544695876, + "loss": 0.0643, + "theoretical_loss": 3.4350780591916372, + "tokens_seen": 2175533056 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.0008052947814576328, + "objective/train/docs_used": 792571, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2874644994735718, + "objective/train/original_loss": 1.2874643802642822, + "objective/train/theoretical_loss": 3.4350443998910576, + "objective/train/tokens_used": 2196255200, + "objective/train/value_avg": -0.009521484375, + "objective/train/value_loss": 0.0003565671213436872, + "objective/train/value_max": -6.812810897827148e-05, + "objective/train/value_min": -0.54638671875, + "objective/train/value_reward_corr": 0.6494977411090334, + "objective/train/value_std": 0.018707275390625, + "objective/train/weight_avg": 1.0009691715240479, + "objective/train/weighted_lm_loss": 1.288053035736084, + "objective/train/weights_max": 1.560215950012207, + "objective/train/weights_min": 0.3739407956600189, + "theoretical_loss": 3.4350443998910576, + "tokens_seen": 2175795200 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003440860215053764, + "loss": 0.065, + "theoretical_loss": 3.4350443998910576, + "tokens_seen": 2175795200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034400577756379393, + "loss": 0.0674, + "theoretical_loss": 3.435010745780909, + "tokens_seen": 2176057344 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003439255336222115, + "loss": 0.0652, + "theoretical_loss": 3.4349770968597677, + "tokens_seen": 2176319488 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003438452896806291, + "loss": 0.065, + "theoretical_loss": 3.4349434531262073, + "tokens_seen": 2176581632 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003437650457390467, + "loss": 0.0667, + "theoretical_loss": 3.4349098145788033, + "tokens_seen": 2176843776 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034368480179746433, + "loss": 0.0673, + "theoretical_loss": 3.434876181216133, + "tokens_seen": 2177105920 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003436045578558819, + "loss": 0.0649, + "theoretical_loss": 3.4348425530367717, + "tokens_seen": 2177368064 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003435243139142995, + "loss": 0.0671, + "theoretical_loss": 3.4348089300392974, + "tokens_seen": 2177630208 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034344406997271706, + "loss": 0.066, + "theoretical_loss": 3.434775312222288, + "tokens_seen": 2177892352 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003433638260311346, + "loss": 0.0676, + "theoretical_loss": 3.434741699584322, + "tokens_seen": 2178154496 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034328358208955223, + "loss": 0.0677, + "theoretical_loss": 3.434708092123978, + "tokens_seen": 2178416640 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034320333814796984, + "loss": 0.0674, + "theoretical_loss": 3.434674489839836, + "tokens_seen": 2178678784 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034312309420638746, + "loss": 0.0649, + "theoretical_loss": 3.4346408927304757, + "tokens_seen": 2178940928 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": -0.001850332017056644, + "objective/train/docs_used": 793725, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1691232919692993, + "objective/train/original_loss": 1.1691234111785889, + "objective/train/theoretical_loss": 3.434624096115895, + "objective/train/tokens_used": 2199532000, + "objective/train/value_avg": -0.00922393798828125, + "objective/train/value_loss": 0.00025595849729143083, + "objective/train/value_max": -7.426738739013672e-05, + "objective/train/value_min": -0.34521484375, + "objective/train/value_reward_corr": 0.850702016525787, + "objective/train/value_std": 0.0190887451171875, + "objective/train/weight_avg": 0.9982719421386719, + "objective/train/weighted_lm_loss": 1.1677327156066895, + "objective/train/weights_max": 1.2134528160095215, + "objective/train/weights_min": 0.41253384947776794, + "theoretical_loss": 3.434624096115895, + "tokens_seen": 2179072000 + }, + { + "epoch": 0.66, + "learning_rate": 0.000343042850264805, + "loss": 0.067, + "theoretical_loss": 3.434607300794478, + "tokens_seen": 2179203072 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034296260632322263, + "loss": 0.0645, + "theoretical_loss": 3.434573714030424, + "tokens_seen": 2179465216 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003428823623816402, + "loss": 0.0648, + "theoretical_loss": 3.4345401324368954, + "tokens_seen": 2179727360 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034280211844005774, + "loss": 0.0671, + "theoretical_loss": 3.4345065560124746, + "tokens_seen": 2179989504 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034272187449847536, + "loss": 0.0658, + "theoretical_loss": 3.434472984755745, + "tokens_seen": 2180251648 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034264163055689297, + "loss": 0.0674, + "theoretical_loss": 3.434439418665289, + "tokens_seen": 2180513792 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034256138661531053, + "loss": 0.064, + "theoretical_loss": 3.434405857739691, + "tokens_seen": 2180775936 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034248114267372814, + "loss": 0.067, + "theoretical_loss": 3.434372301977536, + "tokens_seen": 2181038080 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034240089873214575, + "loss": 0.067, + "theoretical_loss": 3.4343387513774095, + "tokens_seen": 2181300224 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034232065479056337, + "loss": 0.0696, + "theoretical_loss": 3.434305205937896, + "tokens_seen": 2181562368 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034224041084898087, + "loss": 0.0679, + "theoretical_loss": 3.4342716656575822, + "tokens_seen": 2181824512 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003421601669073985, + "loss": 0.0671, + "theoretical_loss": 3.4342381305350553, + "tokens_seen": 2182086656 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.0005700654583051801, + "objective/train/docs_used": 794520, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4715244770050049, + "objective/train/original_loss": 1.4715242385864258, + "objective/train/theoretical_loss": 3.434204600568902, + "objective/train/tokens_used": 2202808800, + "objective/train/value_avg": -0.006561279296875, + "objective/train/value_loss": 0.00040286124567501247, + "objective/train/value_max": -3.3974647521972656e-05, + "objective/train/value_min": -0.49853515625, + "objective/train/value_reward_corr": 0.6038308390405315, + "objective/train/value_std": 0.0157318115234375, + "objective/train/weight_avg": 1.000749945640564, + "objective/train/weighted_lm_loss": 1.47190523147583, + "objective/train/weights_max": 1.613708734512329, + "objective/train/weights_min": 0.39268267154693604, + "theoretical_loss": 3.434204600568902, + "tokens_seen": 2182348800 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003420799229658161, + "loss": 0.0697, + "theoretical_loss": 3.434204600568902, + "tokens_seen": 2182348800 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034199967902423365, + "loss": 0.0699, + "theoretical_loss": 3.434171075757711, + "tokens_seen": 2182610944 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034191943508265127, + "loss": 0.0654, + "theoretical_loss": 3.4341375561000698, + "tokens_seen": 2182873088 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003418391911410689, + "loss": 0.0656, + "theoretical_loss": 3.4341040415945683, + "tokens_seen": 2183135232 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003417589471994865, + "loss": 0.0672, + "theoretical_loss": 3.434070532239796, + "tokens_seen": 2183397376 + }, + { + "epoch": 0.66, + "learning_rate": 0.000341678703257904, + "loss": 0.066, + "theoretical_loss": 3.4340370280343424, + "tokens_seen": 2183659520 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003415984593163216, + "loss": 0.0651, + "theoretical_loss": 3.434003528976799, + "tokens_seen": 2183921664 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003415182153747392, + "loss": 0.0663, + "theoretical_loss": 3.433970035065756, + "tokens_seen": 2184183808 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003414379714331568, + "loss": 0.0674, + "theoretical_loss": 3.4339365462998064, + "tokens_seen": 2184445952 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003413577274915744, + "loss": 0.065, + "theoretical_loss": 3.4339030626775413, + "tokens_seen": 2184708096 + }, + { + "epoch": 0.66, + "learning_rate": 0.000341277483549992, + "loss": 0.0653, + "theoretical_loss": 3.433869584197555, + "tokens_seen": 2184970240 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003411972396084096, + "loss": 0.0665, + "theoretical_loss": 3.43383611085844, + "tokens_seen": 2185232384 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003411169956668271, + "loss": 0.0661, + "theoretical_loss": 3.43380264265879, + "tokens_seen": 2185494528 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.0009123857016675174, + "objective/train/docs_used": 795634, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2502954006195068, + "objective/train/original_loss": 1.2502954006195068, + "objective/train/theoretical_loss": 3.4337859104858257, + "objective/train/tokens_used": 2206085600, + "objective/train/value_avg": -0.006984710693359375, + "objective/train/value_loss": 8.377248741453514e-05, + "objective/train/value_max": -4.649162292480469e-05, + "objective/train/value_min": -0.2447509765625, + "objective/train/value_reward_corr": 0.7441979043018272, + "objective/train/value_std": 0.01093292236328125, + "objective/train/weight_avg": 1.0009539127349854, + "objective/train/weighted_lm_loss": 1.2514960765838623, + "objective/train/weights_max": 1.122109055519104, + "objective/train/weights_min": 0.8294188380241394, + "theoretical_loss": 3.4337859104858257, + "tokens_seen": 2185625600 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034103675172524473, + "loss": 0.0652, + "theoretical_loss": 3.433769179597201, + "tokens_seen": 2185756672 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034095650778366235, + "loss": 0.0669, + "theoretical_loss": 3.433735721672267, + "tokens_seen": 2186018816 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003408762638420799, + "loss": 0.0664, + "theoretical_loss": 3.4337022688825836, + "tokens_seen": 2186280960 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003407960199004975, + "loss": 0.0686, + "theoretical_loss": 3.4336688212267474, + "tokens_seen": 2186543104 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034071577595891513, + "loss": 0.0694, + "theoretical_loss": 3.4336353787033556, + "tokens_seen": 2186805248 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034063553201733274, + "loss": 0.0662, + "theoretical_loss": 3.4336019413110046, + "tokens_seen": 2187067392 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003405552880757503, + "loss": 0.0654, + "theoretical_loss": 3.4335685090482926, + "tokens_seen": 2187329536 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034047504413416786, + "loss": 0.0702, + "theoretical_loss": 3.4335350819138184, + "tokens_seen": 2187591680 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034039480019258547, + "loss": 0.0663, + "theoretical_loss": 3.433501659906181, + "tokens_seen": 2187853824 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034031455625100303, + "loss": 0.0655, + "theoretical_loss": 3.433468243023979, + "tokens_seen": 2188115968 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034023431230942064, + "loss": 0.0658, + "theoretical_loss": 3.433434831265814, + "tokens_seen": 2188378112 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034015406836783826, + "loss": 0.0674, + "theoretical_loss": 3.4334014246302855, + "tokens_seen": 2188640256 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.0003115667204838246, + "objective/train/docs_used": 796788, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.321669340133667, + "objective/train/original_loss": 1.321669340133667, + "objective/train/theoretical_loss": 3.4333680231159946, + "objective/train/tokens_used": 2209362400, + "objective/train/value_avg": -0.0115966796875, + "objective/train/value_loss": 0.0005159495631232858, + "objective/train/value_max": -4.947185516357422e-05, + "objective/train/value_min": -0.75146484375, + "objective/train/value_reward_corr": 0.7070658662793439, + "objective/train/value_std": 0.0208740234375, + "objective/train/weight_avg": 1.0005357265472412, + "objective/train/weighted_lm_loss": 1.3218821287155151, + "objective/train/weights_max": 1.463372826576233, + "objective/train/weights_min": 0.370111882686615, + "theoretical_loss": 3.4333680231159946, + "tokens_seen": 2188902400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003400738244262558, + "loss": 0.0668, + "theoretical_loss": 3.4333680231159946, + "tokens_seen": 2188902400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003399935804846734, + "loss": 0.066, + "theoretical_loss": 3.433334626721544, + "tokens_seen": 2189164544 + }, + { + "epoch": 0.66, + "learning_rate": 0.000339913336543091, + "loss": 0.0669, + "theoretical_loss": 3.4333012354455352, + "tokens_seen": 2189426688 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003398330926015086, + "loss": 0.067, + "theoretical_loss": 3.433267849286571, + "tokens_seen": 2189688832 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033975284865992616, + "loss": 0.072, + "theoretical_loss": 3.4332344682432554, + "tokens_seen": 2189950976 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033967260471834377, + "loss": 0.0673, + "theoretical_loss": 3.4332010923141913, + "tokens_seen": 2190213120 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003395923607767614, + "loss": 0.0646, + "theoretical_loss": 3.4331677214979845, + "tokens_seen": 2190475264 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033951211683517894, + "loss": 0.069, + "theoretical_loss": 3.4331343557932392, + "tokens_seen": 2190737408 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033943187289359655, + "loss": 0.0655, + "theoretical_loss": 3.433100995198561, + "tokens_seen": 2190999552 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033935162895201417, + "loss": 0.0672, + "theoretical_loss": 3.433067639712556, + "tokens_seen": 2191261696 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003392713850104317, + "loss": 0.0652, + "theoretical_loss": 3.4330342893338313, + "tokens_seen": 2191523840 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003391911410688493, + "loss": 0.0637, + "theoretical_loss": 3.4330009440609937, + "tokens_seen": 2191785984 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003391108971272669, + "loss": 0.066, + "theoretical_loss": 3.432967603892651, + "tokens_seen": 2192048128 + }, + { + "epoch": 0.66, + "objective/train/advantage_avg": 0.00028000472229905427, + "objective/train/docs_used": 798413, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3710542917251587, + "objective/train/original_loss": 1.3710544109344482, + "objective/train/theoretical_loss": 3.4329509357222303, + "objective/train/tokens_used": 2212639200, + "objective/train/value_avg": -0.00600433349609375, + "objective/train/value_loss": 0.0005311193526722491, + "objective/train/value_max": -5.02467155456543e-05, + "objective/train/value_min": -0.87890625, + "objective/train/value_reward_corr": 0.6472117724278922, + "objective/train/value_std": 0.01491546630859375, + "objective/train/weight_avg": 1.0005016326904297, + "objective/train/weighted_lm_loss": 1.3713548183441162, + "objective/train/weights_max": 2.0175466537475586, + "objective/train/weights_min": 0.3728155791759491, + "theoretical_loss": 3.4329509357222303, + "tokens_seen": 2192179200 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003390306531856845, + "loss": 0.0695, + "theoretical_loss": 3.432934268827412, + "tokens_seen": 2192310272 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033895040924410207, + "loss": 0.0663, + "theoretical_loss": 3.4329009388638845, + "tokens_seen": 2192572416 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003388701653025197, + "loss": 0.0678, + "theoretical_loss": 3.4328676140006786, + "tokens_seen": 2192834560 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003387899213609373, + "loss": 0.068, + "theoretical_loss": 3.4328342942364043, + "tokens_seen": 2193096704 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033870967741935485, + "loss": 0.0648, + "theoretical_loss": 3.432800979569672, + "tokens_seen": 2193358848 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003386294334777724, + "loss": 0.0667, + "theoretical_loss": 3.4327676699990928, + "tokens_seen": 2193620992 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033854918953619, + "loss": 0.0674, + "theoretical_loss": 3.432734365523278, + "tokens_seen": 2193883136 + }, + { + "epoch": 0.66, + "learning_rate": 0.00033846894559460763, + "loss": 0.0694, + "theoretical_loss": 3.4327010661408397, + "tokens_seen": 2194145280 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003383887016530252, + "loss": 0.067, + "theoretical_loss": 3.432667771850391, + "tokens_seen": 2194407424 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003383084577114428, + "loss": 0.0668, + "theoretical_loss": 3.4326344826505446, + "tokens_seen": 2194669568 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003382282137698604, + "loss": 0.0668, + "theoretical_loss": 3.4326011985399147, + "tokens_seen": 2194931712 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003381479698282779, + "loss": 0.068, + "theoretical_loss": 3.4325679195171155, + "tokens_seen": 2195193856 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": -0.00021135738643351942, + "objective/train/docs_used": 799213, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.386327862739563, + "objective/train/original_loss": 1.3863276243209839, + "objective/train/theoretical_loss": 3.4325346455807613, + "objective/train/tokens_used": 2215916000, + "objective/train/value_avg": -0.006687164306640625, + "objective/train/value_loss": 0.00016143263201229274, + "objective/train/value_max": -3.3736228942871094e-05, + "objective/train/value_min": -0.63623046875, + "objective/train/value_reward_corr": 0.798931972290373, + "objective/train/value_std": 0.0166168212890625, + "objective/train/weight_avg": 0.9998688697814941, + "objective/train/weighted_lm_loss": 1.3864178657531738, + "objective/train/weights_max": 1.5625030994415283, + "objective/train/weights_min": 0.6712589859962463, + "theoretical_loss": 3.4325346455807613, + "tokens_seen": 2195456000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033806772588669553, + "loss": 0.0644, + "theoretical_loss": 3.4325346455807613, + "tokens_seen": 2195456000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033798748194511315, + "loss": 0.0676, + "theoretical_loss": 3.4325013767294683, + "tokens_seen": 2195718144 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033790723800353076, + "loss": 0.0687, + "theoretical_loss": 3.4324681129618515, + "tokens_seen": 2195980288 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003378269940619483, + "loss": 0.067, + "theoretical_loss": 3.4324348542765284, + "tokens_seen": 2196242432 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033774675012036593, + "loss": 0.07, + "theoretical_loss": 3.432401600672115, + "tokens_seen": 2196504576 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033766650617878354, + "loss": 0.067, + "theoretical_loss": 3.43236835214723, + "tokens_seen": 2196766720 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003375862622372011, + "loss": 0.0664, + "theoretical_loss": 3.4323351087004905, + "tokens_seen": 2197028864 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033750601829561866, + "loss": 0.0646, + "theoretical_loss": 3.4323018703305155, + "tokens_seen": 2197291008 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033742577435403627, + "loss": 0.0655, + "theoretical_loss": 3.432268637035924, + "tokens_seen": 2197553152 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003373455304124539, + "loss": 0.0685, + "theoretical_loss": 3.4322354088153357, + "tokens_seen": 2197815296 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033726528647087144, + "loss": 0.068, + "theoretical_loss": 3.432202185667371, + "tokens_seen": 2198077440 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033718504252928906, + "loss": 0.0666, + "theoretical_loss": 3.432168967590651, + "tokens_seen": 2198339584 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033710479858770667, + "loss": 0.0673, + "theoretical_loss": 3.432135754583796, + "tokens_seen": 2198601728 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.001131325727328658, + "objective/train/docs_used": 800708, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2420262098312378, + "objective/train/original_loss": 1.2420260906219482, + "objective/train/theoretical_loss": 3.432119149981138, + "objective/train/tokens_used": 2219192800, + "objective/train/value_avg": -0.01074981689453125, + "objective/train/value_loss": 0.0003463710017967969, + "objective/train/value_max": -6.300210952758789e-05, + "objective/train/value_min": -0.396728515625, + "objective/train/value_reward_corr": 0.6319513812312934, + "objective/train/value_std": 0.01788330078125, + "objective/train/weight_avg": 1.001285433769226, + "objective/train/weighted_lm_loss": 1.242701530456543, + "objective/train/weights_max": 1.4869521856307983, + "objective/train/weights_min": 0.3684363067150116, + "theoretical_loss": 3.432119149981138, + "tokens_seen": 2198732800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003370245546461242, + "loss": 0.0672, + "theoretical_loss": 3.432102546645429, + "tokens_seen": 2198863872 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003369443107045418, + "loss": 0.0678, + "theoretical_loss": 3.432069343774172, + "tokens_seen": 2199126016 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003368640667629594, + "loss": 0.0678, + "theoretical_loss": 3.432036145968648, + "tokens_seen": 2199388160 + }, + { + "epoch": 0.67, + "learning_rate": 0.000336783822821377, + "loss": 0.0683, + "theoretical_loss": 3.43200295322748, + "tokens_seen": 2199650304 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033670357887979457, + "loss": 0.0648, + "theoretical_loss": 3.431969765549293, + "tokens_seen": 2199912448 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003366233349382122, + "loss": 0.0661, + "theoretical_loss": 3.43193658293271, + "tokens_seen": 2200174592 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003365430909966298, + "loss": 0.0681, + "theoretical_loss": 3.4319034053763575, + "tokens_seen": 2200436736 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033646284705504735, + "loss": 0.0656, + "theoretical_loss": 3.431870232878861, + "tokens_seen": 2200698880 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003363826031134649, + "loss": 0.0672, + "theoretical_loss": 3.431837065438846, + "tokens_seen": 2200961024 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003363023591718825, + "loss": 0.0681, + "theoretical_loss": 3.4318039030549397, + "tokens_seen": 2201223168 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003362221152303001, + "loss": 0.0663, + "theoretical_loss": 3.431770745725769, + "tokens_seen": 2201485312 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003361418712887177, + "loss": 0.0643, + "theoretical_loss": 3.431737593449962, + "tokens_seen": 2201747456 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.00045791416778229177, + "objective/train/docs_used": 801578, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3965429067611694, + "objective/train/original_loss": 1.396543025970459, + "objective/train/theoretical_loss": 3.4317044462261466, + "objective/train/tokens_used": 2222469600, + "objective/train/value_avg": -0.00762939453125, + "objective/train/value_loss": 0.00016113599122036248, + "objective/train/value_max": -5.608797073364258e-05, + "objective/train/value_min": -0.5087890625, + "objective/train/value_reward_corr": 0.7312199023293408, + "objective/train/value_std": 0.01407623291015625, + "objective/train/weight_avg": 1.0005367994308472, + "objective/train/weighted_lm_loss": 1.3970656394958496, + "objective/train/weights_max": 1.3251081705093384, + "objective/train/weights_min": 0.5159518122673035, + "theoretical_loss": 3.4317044462261466, + "tokens_seen": 2202009600 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003360616273471353, + "loss": 0.0653, + "theoretical_loss": 3.4317044462261466, + "tokens_seen": 2202009600 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003359813834055529, + "loss": 0.0664, + "theoretical_loss": 3.4316713040529523, + "tokens_seen": 2202271744 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003359011394639705, + "loss": 0.0673, + "theoretical_loss": 3.4316381669290084, + "tokens_seen": 2202533888 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003358208955223881, + "loss": 0.0655, + "theoretical_loss": 3.431605034852944, + "tokens_seen": 2202796032 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033574065158080565, + "loss": 0.0701, + "theoretical_loss": 3.43157190782339, + "tokens_seen": 2203058176 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003356604076392232, + "loss": 0.065, + "theoretical_loss": 3.431538785838978, + "tokens_seen": 2203320320 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003355801636976408, + "loss": 0.0676, + "theoretical_loss": 3.4315056688983385, + "tokens_seen": 2203582464 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033549991975605843, + "loss": 0.0686, + "theoretical_loss": 3.4314725570001046, + "tokens_seen": 2203844608 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033541967581447604, + "loss": 0.0661, + "theoretical_loss": 3.431439450142908, + "tokens_seen": 2204106752 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003353394318728936, + "loss": 0.0691, + "theoretical_loss": 3.4314063483253823, + "tokens_seen": 2204368896 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003352591879313112, + "loss": 0.0656, + "theoretical_loss": 3.431373251546161, + "tokens_seen": 2204631040 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003351789439897288, + "loss": 0.0691, + "theoretical_loss": 3.431340159803878, + "tokens_seen": 2204893184 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033509870004814633, + "loss": 0.0667, + "theoretical_loss": 3.4313070730971686, + "tokens_seen": 2205155328 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.00044071170850656927, + "objective/train/docs_used": 802757, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3551077842712402, + "objective/train/original_loss": 1.3551077842712402, + "objective/train/theoretical_loss": 3.4312905316317277, + "objective/train/tokens_used": 2225746400, + "objective/train/value_avg": -0.006072998046875, + "objective/train/value_loss": 0.00028490403201431036, + "objective/train/value_max": -2.4497509002685547e-05, + "objective/train/value_min": -0.94873046875, + "objective/train/value_reward_corr": 0.7518883224176258, + "objective/train/value_std": 0.01715087890625, + "objective/train/weight_avg": 1.000563144683838, + "objective/train/weighted_lm_loss": 1.3555856943130493, + "objective/train/weights_max": 1.4964208602905273, + "objective/train/weights_min": 0.3752783536911011, + "theoretical_loss": 3.4312905316317277, + "tokens_seen": 2205286400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033501845610656395, + "loss": 0.069, + "theoretical_loss": 3.431273991424668, + "tokens_seen": 2205417472 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033493821216498156, + "loss": 0.068, + "theoretical_loss": 3.431240914785012, + "tokens_seen": 2205679616 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033485796822339917, + "loss": 0.0654, + "theoretical_loss": 3.431207843176836, + "tokens_seen": 2205941760 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033477772428181673, + "loss": 0.0678, + "theoretical_loss": 3.431174776598778, + "tokens_seen": 2206203904 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033469748034023434, + "loss": 0.0676, + "theoretical_loss": 3.431141715049475, + "tokens_seen": 2206466048 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033461723639865195, + "loss": 0.0672, + "theoretical_loss": 3.4311086585275645, + "tokens_seen": 2206728192 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033453699245706946, + "loss": 0.0656, + "theoretical_loss": 3.4310756070316857, + "tokens_seen": 2206990336 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033445674851548707, + "loss": 0.0674, + "theoretical_loss": 3.431042560560477, + "tokens_seen": 2207252480 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003343765045739047, + "loss": 0.069, + "theoretical_loss": 3.431009519112578, + "tokens_seen": 2207514624 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033429626063232224, + "loss": 0.0683, + "theoretical_loss": 3.4309764826866287, + "tokens_seen": 2207776768 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033421601669073985, + "loss": 0.0675, + "theoretical_loss": 3.43094345128127, + "tokens_seen": 2208038912 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033413577274915747, + "loss": 0.0699, + "theoretical_loss": 3.430910424895143, + "tokens_seen": 2208301056 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.0003184771630913019, + "objective/train/docs_used": 804023, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4095585346221924, + "objective/train/original_loss": 1.4095585346221924, + "objective/train/theoretical_loss": 3.4308774035268885, + "objective/train/tokens_used": 2229023200, + "objective/train/value_avg": -0.00682830810546875, + "objective/train/value_loss": 0.0001328888174612075, + "objective/train/value_max": -4.3332576751708984e-05, + "objective/train/value_min": -0.2161865234375, + "objective/train/value_reward_corr": 0.7068750377978834, + "objective/train/value_std": 0.01126861572265625, + "objective/train/weight_avg": 1.0003799200057983, + "objective/train/weighted_lm_loss": 1.4102829694747925, + "objective/train/weights_max": 1.1803033351898193, + "objective/train/weights_min": 0.36821746826171875, + "theoretical_loss": 3.4308774035268885, + "tokens_seen": 2208563200 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003340555288075751, + "loss": 0.0686, + "theoretical_loss": 3.4308774035268885, + "tokens_seen": 2208563200 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003339752848659926, + "loss": 0.0675, + "theoretical_loss": 3.4308443871751497, + "tokens_seen": 2208825344 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003338950409244102, + "loss": 0.0644, + "theoretical_loss": 3.4308113758385685, + "tokens_seen": 2209087488 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003338147969828278, + "loss": 0.0686, + "theoretical_loss": 3.4307783695157887, + "tokens_seen": 2209349632 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033373455304124537, + "loss": 0.0676, + "theoretical_loss": 3.4307453682054536, + "tokens_seen": 2209611776 + }, + { + "epoch": 0.67, + "learning_rate": 0.000333654309099663, + "loss": 0.0674, + "theoretical_loss": 3.430712371906208, + "tokens_seen": 2209873920 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003335740651580806, + "loss": 0.0674, + "theoretical_loss": 3.4306793806166955, + "tokens_seen": 2210136064 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003334938212164982, + "loss": 0.0662, + "theoretical_loss": 3.4306463943355627, + "tokens_seen": 2210398208 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003334135772749157, + "loss": 0.0672, + "theoretical_loss": 3.430613413061455, + "tokens_seen": 2210660352 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003333333333333333, + "loss": 0.0672, + "theoretical_loss": 3.4305804367930186, + "tokens_seen": 2210922496 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033325308939175093, + "loss": 0.0639, + "theoretical_loss": 3.4305474655289006, + "tokens_seen": 2211184640 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003331728454501685, + "loss": 0.0678, + "theoretical_loss": 3.4305144992677485, + "tokens_seen": 2211446784 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003330926015085861, + "loss": 0.066, + "theoretical_loss": 3.4304815380082103, + "tokens_seen": 2211708928 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": -0.00014401759835891426, + "objective/train/docs_used": 805208, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3896081447601318, + "objective/train/original_loss": 1.3896081447601318, + "objective/train/theoretical_loss": 3.4304650592536237, + "objective/train/tokens_used": 2232300000, + "objective/train/value_avg": -0.00646209716796875, + "objective/train/value_loss": 0.00011668778461171314, + "objective/train/value_max": -2.7120113372802734e-05, + "objective/train/value_min": -0.60205078125, + "objective/train/value_reward_corr": 0.7311364752772134, + "objective/train/value_std": 0.01180267333984375, + "objective/train/weight_avg": 0.999912440776825, + "objective/train/weighted_lm_loss": 1.389186978340149, + "objective/train/weights_max": 1.1917989253997803, + "objective/train/weights_min": 0.534700870513916, + "theoretical_loss": 3.4304650592536237, + "tokens_seen": 2211840000 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003330123575670037, + "loss": 0.065, + "theoretical_loss": 3.430448581748934, + "tokens_seen": 2211971072 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033293211362542133, + "loss": 0.0692, + "theoretical_loss": 3.430415630488569, + "tokens_seen": 2212233216 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003328518696838389, + "loss": 0.0687, + "theoretical_loss": 3.4303826842257648, + "tokens_seen": 2212495360 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033277162574225645, + "loss": 0.0677, + "theoretical_loss": 3.430349742959171, + "tokens_seen": 2212757504 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033269138180067406, + "loss": 0.0656, + "theoretical_loss": 3.4303168066874385, + "tokens_seen": 2213019648 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003326111378590916, + "loss": 0.0671, + "theoretical_loss": 3.430283875409219, + "tokens_seen": 2213281792 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033253089391750923, + "loss": 0.065, + "theoretical_loss": 3.430250949123163, + "tokens_seen": 2213543936 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033245064997592684, + "loss": 0.0669, + "theoretical_loss": 3.430218027827924, + "tokens_seen": 2213806080 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033237040603434446, + "loss": 0.068, + "theoretical_loss": 3.430185111522153, + "tokens_seen": 2214068224 + }, + { + "epoch": 0.67, + "learning_rate": 0.000332290162092762, + "loss": 0.0642, + "theoretical_loss": 3.4301522002045046, + "tokens_seen": 2214330368 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003322099181511796, + "loss": 0.0692, + "theoretical_loss": 3.430119293873632, + "tokens_seen": 2214592512 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003321296742095972, + "loss": 0.0671, + "theoretical_loss": 3.4300863925281893, + "tokens_seen": 2214854656 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.000626943598035723, + "objective/train/docs_used": 806281, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2398408651351929, + "objective/train/original_loss": 1.2398409843444824, + "objective/train/theoretical_loss": 3.430053496166831, + "objective/train/tokens_used": 2235576800, + "objective/train/value_avg": -0.0085296630859375, + "objective/train/value_loss": 0.00041652534855529666, + "objective/train/value_max": -4.297494888305664e-05, + "objective/train/value_min": -0.98779296875, + "objective/train/value_reward_corr": 0.6897977803093197, + "objective/train/value_std": 0.021728515625, + "objective/train/weight_avg": 1.0008251667022705, + "objective/train/weighted_lm_loss": 1.240440845489502, + "objective/train/weights_max": 1.96941077709198, + "objective/train/weights_min": 0.3877526521682739, + "theoretical_loss": 3.430053496166831, + "tokens_seen": 2215116800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033204943026801474, + "loss": 0.0661, + "theoretical_loss": 3.430053496166831, + "tokens_seen": 2215116800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033196918632643236, + "loss": 0.0668, + "theoretical_loss": 3.4300206047882136, + "tokens_seen": 2215378944 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033188894238484997, + "loss": 0.0679, + "theoretical_loss": 3.4299877183909917, + "tokens_seen": 2215641088 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033180869844326753, + "loss": 0.0664, + "theoretical_loss": 3.4299548369738218, + "tokens_seen": 2215903232 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033172845450168514, + "loss": 0.0647, + "theoretical_loss": 3.429921960535361, + "tokens_seen": 2216165376 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003316482105601027, + "loss": 0.0666, + "theoretical_loss": 3.429889089074267, + "tokens_seen": 2216427520 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003315679666185203, + "loss": 0.0636, + "theoretical_loss": 3.429856222589197, + "tokens_seen": 2216689664 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033148772267693787, + "loss": 0.0671, + "theoretical_loss": 3.4298233610788094, + "tokens_seen": 2216951808 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003314074787353555, + "loss": 0.067, + "theoretical_loss": 3.429790504541764, + "tokens_seen": 2217213952 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003313272347937731, + "loss": 0.0659, + "theoretical_loss": 3.4297576529767193, + "tokens_seen": 2217476096 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033124699085219065, + "loss": 0.0671, + "theoretical_loss": 3.4297248063823362, + "tokens_seen": 2217738240 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033116674691060827, + "loss": 0.0661, + "theoretical_loss": 3.4296919647572746, + "tokens_seen": 2218000384 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003310865029690259, + "loss": 0.0669, + "theoretical_loss": 3.4296591281001954, + "tokens_seen": 2218262528 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": -0.001073150895535946, + "objective/train/docs_used": 807316, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.303073525428772, + "objective/train/original_loss": 1.303073525428772, + "objective/train/theoretical_loss": 3.429642711634231, + "objective/train/tokens_used": 2238853600, + "objective/train/value_avg": -0.0086517333984375, + "objective/train/value_loss": 0.0002828251163009554, + "objective/train/value_max": -2.2113323211669922e-05, + "objective/train/value_min": -0.83544921875, + "objective/train/value_reward_corr": 0.7588367060954335, + "objective/train/value_std": 0.0181427001953125, + "objective/train/weight_avg": 0.99905925989151, + "objective/train/weighted_lm_loss": 1.3010692596435547, + "objective/train/weights_max": 1.2279506921768188, + "objective/train/weights_min": 0.37165406346321106, + "theoretical_loss": 3.429642711634231, + "tokens_seen": 2218393600 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033100625902744344, + "loss": 0.0644, + "theoretical_loss": 3.42962629640976, + "tokens_seen": 2218524672 + }, + { + "epoch": 0.67, + "learning_rate": 0.000330926015085861, + "loss": 0.0681, + "theoretical_loss": 3.4295934696846313, + "tokens_seen": 2218786816 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003308457711442786, + "loss": 0.0643, + "theoretical_loss": 3.4295606479234713, + "tokens_seen": 2219048960 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003307655272026962, + "loss": 0.066, + "theoretical_loss": 3.429527831124943, + "tokens_seen": 2219311104 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003306852832611138, + "loss": 0.0673, + "theoretical_loss": 3.42949501928771, + "tokens_seen": 2219573248 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003306050393195314, + "loss": 0.065, + "theoretical_loss": 3.4294622124104364, + "tokens_seen": 2219835392 + }, + { + "epoch": 0.67, + "learning_rate": 0.000330524795377949, + "loss": 0.0639, + "theoretical_loss": 3.429429410491787, + "tokens_seen": 2220097536 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033044455143636656, + "loss": 0.0658, + "theoretical_loss": 3.429396613530427, + "tokens_seen": 2220359680 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003303643074947841, + "loss": 0.0667, + "theoretical_loss": 3.429363821525022, + "tokens_seen": 2220621824 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033028406355320173, + "loss": 0.0667, + "theoretical_loss": 3.4293310344742385, + "tokens_seen": 2220883968 + }, + { + "epoch": 0.67, + "learning_rate": 0.00033020381961161935, + "loss": 0.0649, + "theoretical_loss": 3.429298252376743, + "tokens_seen": 2221146112 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003301235756700369, + "loss": 0.0638, + "theoretical_loss": 3.429265475231202, + "tokens_seen": 2221408256 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.001115342602133751, + "objective/train/docs_used": 808517, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3400652408599854, + "objective/train/original_loss": 1.3400652408599854, + "objective/train/theoretical_loss": 3.429232703036284, + "objective/train/tokens_used": 2242130400, + "objective/train/value_avg": -0.010284423828125, + "objective/train/value_loss": 0.00031161116203293204, + "objective/train/value_max": -3.6776065826416016e-05, + "objective/train/value_min": -0.7216796875, + "objective/train/value_reward_corr": 0.7999857711414065, + "objective/train/value_std": 0.0249176025390625, + "objective/train/weight_avg": 1.0012611150741577, + "objective/train/weighted_lm_loss": 1.3421872854232788, + "objective/train/weights_max": 1.4046753644943237, + "objective/train/weights_min": 0.4194653332233429, + "theoretical_loss": 3.429232703036284, + "tokens_seen": 2221670400 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003300433317284545, + "loss": 0.065, + "theoretical_loss": 3.429232703036284, + "tokens_seen": 2221670400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032996308778687213, + "loss": 0.067, + "theoretical_loss": 3.429199935790657, + "tokens_seen": 2221932544 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003298828438452897, + "loss": 0.069, + "theoretical_loss": 3.4291671734929903, + "tokens_seen": 2222194688 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032980259990370725, + "loss": 0.0682, + "theoretical_loss": 3.4291344161419524, + "tokens_seen": 2222456832 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032972235596212486, + "loss": 0.0659, + "theoretical_loss": 3.4291016637362137, + "tokens_seen": 2222718976 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032964211202054247, + "loss": 0.0657, + "theoretical_loss": 3.429068916274444, + "tokens_seen": 2222981120 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032956186807896003, + "loss": 0.0653, + "theoretical_loss": 3.429036173755314, + "tokens_seen": 2223243264 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032948162413737764, + "loss": 0.0692, + "theoretical_loss": 3.429003436177496, + "tokens_seen": 2223505408 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032940138019579526, + "loss": 0.067, + "theoretical_loss": 3.4289707035396613, + "tokens_seen": 2223767552 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003293211362542128, + "loss": 0.0698, + "theoretical_loss": 3.428937975840482, + "tokens_seen": 2224029696 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032924089231263037, + "loss": 0.0685, + "theoretical_loss": 3.4289052530786313, + "tokens_seen": 2224291840 + }, + { + "epoch": 0.67, + "learning_rate": 0.000329160648371048, + "loss": 0.068, + "theoretical_loss": 3.4288725352527827, + "tokens_seen": 2224553984 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003290804044294656, + "loss": 0.0662, + "theoretical_loss": 3.42883982236161, + "tokens_seen": 2224816128 + }, + { + "epoch": 0.67, + "objective/train/advantage_avg": 0.00034239990054629743, + "objective/train/docs_used": 809850, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4501973390579224, + "objective/train/original_loss": 1.450197458267212, + "objective/train/theoretical_loss": 3.4288234677661125, + "objective/train/tokens_used": 2245407200, + "objective/train/value_avg": -0.005359649658203125, + "objective/train/value_loss": 0.00012629618868231773, + "objective/train/value_max": -3.8504600524902344e-05, + "objective/train/value_min": -0.23193359375, + "objective/train/value_reward_corr": 0.6468167905238793, + "objective/train/value_std": 0.00925445556640625, + "objective/train/weight_avg": 1.0004007816314697, + "objective/train/weighted_lm_loss": 1.4504789113998413, + "objective/train/weights_max": 1.1305923461914062, + "objective/train/weights_min": 0.3682044744491577, + "theoretical_loss": 3.4288234677661125, + "tokens_seen": 2224947200 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032900016048788316, + "loss": 0.0648, + "theoretical_loss": 3.428807114403787, + "tokens_seen": 2225078272 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032891991654630077, + "loss": 0.0626, + "theoretical_loss": 3.428774411377989, + "tokens_seen": 2225340416 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003288396726047184, + "loss": 0.0677, + "theoretical_loss": 3.4287417132828923, + "tokens_seen": 2225602560 + }, + { + "epoch": 0.67, + "learning_rate": 0.00032875942866313594, + "loss": 0.0656, + "theoretical_loss": 3.428709020117172, + "tokens_seen": 2225864704 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003286791847215535, + "loss": 0.0646, + "theoretical_loss": 3.4286763318795046, + "tokens_seen": 2226126848 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003285989407799711, + "loss": 0.0639, + "theoretical_loss": 3.428643648568567, + "tokens_seen": 2226388992 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003285186968383887, + "loss": 0.0685, + "theoretical_loss": 3.4286109701830374, + "tokens_seen": 2226651136 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003284384528968063, + "loss": 0.0677, + "theoretical_loss": 3.428578296721593, + "tokens_seen": 2226913280 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003283582089552239, + "loss": 0.0669, + "theoretical_loss": 3.4285456281829125, + "tokens_seen": 2227175424 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003282779650136415, + "loss": 0.0694, + "theoretical_loss": 3.428512964565675, + "tokens_seen": 2227437568 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032819772107205907, + "loss": 0.0663, + "theoretical_loss": 3.4284803058685602, + "tokens_seen": 2227699712 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003281174771304767, + "loss": 0.0657, + "theoretical_loss": 3.428447652090248, + "tokens_seen": 2227961856 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.0006519161979667842, + "objective/train/docs_used": 811140, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.225870966911316, + "objective/train/original_loss": 1.2258708477020264, + "objective/train/theoretical_loss": 3.4284150032294187, + "objective/train/tokens_used": 2248684000, + "objective/train/value_avg": -0.00849151611328125, + "objective/train/value_loss": 0.0002140124561265111, + "objective/train/value_max": -2.9087066650390625e-05, + "objective/train/value_min": -0.2666015625, + "objective/train/value_reward_corr": 0.770223641316452, + "objective/train/value_std": 0.01739501953125, + "objective/train/weight_avg": 1.0007503032684326, + "objective/train/weighted_lm_loss": 1.2265666723251343, + "objective/train/weights_max": 1.1595954895019531, + "objective/train/weights_min": 0.3700695335865021, + "theoretical_loss": 3.4284150032294187, + "tokens_seen": 2228224000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032803723318889424, + "loss": 0.0673, + "theoretical_loss": 3.4284150032294187, + "tokens_seen": 2228224000 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003279569892473118, + "loss": 0.0666, + "theoretical_loss": 3.428382359284754, + "tokens_seen": 2228486144 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003278767453057294, + "loss": 0.0674, + "theoretical_loss": 3.4283497202549347, + "tokens_seen": 2228748288 + }, + { + "epoch": 0.68, + "learning_rate": 0.000327796501364147, + "loss": 0.069, + "theoretical_loss": 3.4283170861386436, + "tokens_seen": 2229010432 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032771625742256463, + "loss": 0.0683, + "theoretical_loss": 3.4282844569345623, + "tokens_seen": 2229272576 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003276360134809822, + "loss": 0.0675, + "theoretical_loss": 3.428251832641375, + "tokens_seen": 2229534720 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003275557695393998, + "loss": 0.0686, + "theoretical_loss": 3.4282192132577647, + "tokens_seen": 2229796864 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032747552559781736, + "loss": 0.0666, + "theoretical_loss": 3.4281865987824154, + "tokens_seen": 2230059008 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003273952816562349, + "loss": 0.0661, + "theoretical_loss": 3.428153989214012, + "tokens_seen": 2230321152 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032731503771465253, + "loss": 0.0656, + "theoretical_loss": 3.4281213845512397, + "tokens_seen": 2230583296 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032723479377307015, + "loss": 0.0642, + "theoretical_loss": 3.428088784792784, + "tokens_seen": 2230845440 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032715454983148776, + "loss": 0.0697, + "theoretical_loss": 3.4280561899373305, + "tokens_seen": 2231107584 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003270743058899053, + "loss": 0.0673, + "theoretical_loss": 3.428023599983567, + "tokens_seen": 2231369728 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.00044635971426032484, + "objective/train/docs_used": 812344, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1689112186431885, + "objective/train/original_loss": 1.1689109802246094, + "objective/train/theoretical_loss": 3.4280073068444086, + "objective/train/tokens_used": 2251960800, + "objective/train/value_avg": -0.007732391357421875, + "objective/train/value_loss": 0.0002456700021866709, + "objective/train/value_max": -5.435943603515625e-05, + "objective/train/value_min": -0.2529296875, + "objective/train/value_reward_corr": 0.6849964703725564, + "objective/train/value_std": 0.01465606689453125, + "objective/train/weight_avg": 1.0005552768707275, + "objective/train/weighted_lm_loss": 1.1684818267822266, + "objective/train/weights_max": 1.287792682647705, + "objective/train/weights_min": 0.36833932995796204, + "theoretical_loss": 3.4280073068444086, + "tokens_seen": 2231500800 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032699406194832293, + "loss": 0.0673, + "theoretical_loss": 3.4279910149301798, + "tokens_seen": 2231631872 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003269138180067405, + "loss": 0.0676, + "theoretical_loss": 3.4279584347758565, + "tokens_seen": 2231894016 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032683357406515805, + "loss": 0.0651, + "theoretical_loss": 3.4279258595192856, + "tokens_seen": 2232156160 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032675333012357566, + "loss": 0.0648, + "theoretical_loss": 3.427893289159156, + "tokens_seen": 2232418304 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032667308618199327, + "loss": 0.0671, + "theoretical_loss": 3.427860723694156, + "tokens_seen": 2232680448 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003265928422404109, + "loss": 0.0677, + "theoretical_loss": 3.427828163122976, + "tokens_seen": 2232942592 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032651259829882844, + "loss": 0.0671, + "theoretical_loss": 3.427795607444306, + "tokens_seen": 2233204736 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032643235435724606, + "loss": 0.069, + "theoretical_loss": 3.4277630566568367, + "tokens_seen": 2233466880 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032635211041566367, + "loss": 0.0659, + "theoretical_loss": 3.4277305107592593, + "tokens_seen": 2233729024 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032627186647408117, + "loss": 0.0682, + "theoretical_loss": 3.427697969750265, + "tokens_seen": 2233991168 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003261916225324988, + "loss": 0.0684, + "theoretical_loss": 3.427665433628547, + "tokens_seen": 2234253312 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003261113785909164, + "loss": 0.0678, + "theoretical_loss": 3.427632902392797, + "tokens_seen": 2234515456 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.0008528545149601996, + "objective/train/docs_used": 813661, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3385423421859741, + "objective/train/original_loss": 1.3385424613952637, + "objective/train/theoretical_loss": 3.427600376041709, + "objective/train/tokens_used": 2255237600, + "objective/train/value_avg": -0.0046844482421875, + "objective/train/value_loss": 7.787495269440114e-05, + "objective/train/value_max": -3.218650817871094e-05, + "objective/train/value_min": -0.242919921875, + "objective/train/value_reward_corr": 0.7000210827951145, + "objective/train/value_std": 0.00942230224609375, + "objective/train/weight_avg": 1.0008913278579712, + "objective/train/weighted_lm_loss": 1.3399440050125122, + "objective/train/weights_max": 1.1798712015151978, + "objective/train/weights_min": 0.8197346925735474, + "theoretical_loss": 3.427600376041709, + "tokens_seen": 2234777600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032603113464933396, + "loss": 0.0671, + "theoretical_loss": 3.427600376041709, + "tokens_seen": 2234777600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032595089070775157, + "loss": 0.0647, + "theoretical_loss": 3.4275678545739763, + "tokens_seen": 2235039744 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003258706467661692, + "loss": 0.0671, + "theoretical_loss": 3.4275353379882927, + "tokens_seen": 2235301888 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003257904028245868, + "loss": 0.0669, + "theoretical_loss": 3.427502826283354, + "tokens_seen": 2235564032 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003257101588830043, + "loss": 0.0668, + "theoretical_loss": 3.427470319457854, + "tokens_seen": 2235826176 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003256299149414219, + "loss": 0.0685, + "theoretical_loss": 3.4274378175104894, + "tokens_seen": 2236088320 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003255496709998395, + "loss": 0.0659, + "theoretical_loss": 3.427405320439956, + "tokens_seen": 2236350464 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003254694270582571, + "loss": 0.0657, + "theoretical_loss": 3.4273728282449514, + "tokens_seen": 2236612608 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003253891831166747, + "loss": 0.066, + "theoretical_loss": 3.4273403409241716, + "tokens_seen": 2236874752 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003253089391750923, + "loss": 0.0667, + "theoretical_loss": 3.427307858476315, + "tokens_seen": 2237136896 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003252286952335099, + "loss": 0.069, + "theoretical_loss": 3.4272753809000793, + "tokens_seen": 2237399040 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003251484512919275, + "loss": 0.0675, + "theoretical_loss": 3.4272429081941636, + "tokens_seen": 2237661184 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032506820735034504, + "loss": 0.0692, + "theoretical_loss": 3.4272104403572667, + "tokens_seen": 2237923328 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.0004680883139371872, + "objective/train/docs_used": 814894, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2324265241622925, + "objective/train/original_loss": 1.232426404953003, + "objective/train/theoretical_loss": 3.4271942082642948, + "objective/train/tokens_used": 2258514400, + "objective/train/value_avg": -0.0095062255859375, + "objective/train/value_loss": 0.0003552815178409219, + "objective/train/value_max": -3.6776065826416016e-05, + "objective/train/value_min": -0.6796875, + "objective/train/value_reward_corr": 0.7258998078479855, + "objective/train/value_std": 0.0178985595703125, + "objective/train/weight_avg": 1.0006299018859863, + "objective/train/weighted_lm_loss": 1.2327933311462402, + "objective/train/weights_max": 1.376213550567627, + "objective/train/weights_min": 0.3958108723163605, + "theoretical_loss": 3.4271942082642948, + "tokens_seen": 2238054400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032498796340876265, + "loss": 0.0668, + "theoretical_loss": 3.4271779773880895, + "tokens_seen": 2238185472 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003249077194671802, + "loss": 0.0657, + "theoretical_loss": 3.427145519285331, + "tokens_seen": 2238447616 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003248274755255978, + "loss": 0.0694, + "theoretical_loss": 3.427113066047692, + "tokens_seen": 2238709760 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032474723158401543, + "loss": 0.0688, + "theoretical_loss": 3.4270806176738744, + "tokens_seen": 2238971904 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032466698764243304, + "loss": 0.067, + "theoretical_loss": 3.4270481741625796, + "tokens_seen": 2239234048 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003245867437008506, + "loss": 0.0678, + "theoretical_loss": 3.4270157355125095, + "tokens_seen": 2239496192 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032450649975926816, + "loss": 0.0687, + "theoretical_loss": 3.426983301722367, + "tokens_seen": 2239758336 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003244262558176858, + "loss": 0.0668, + "theoretical_loss": 3.4269508727908553, + "tokens_seen": 2240020480 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032434601187610333, + "loss": 0.0668, + "theoretical_loss": 3.426918448716678, + "tokens_seen": 2240282624 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032426576793452095, + "loss": 0.0681, + "theoretical_loss": 3.42688602949854, + "tokens_seen": 2240544768 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032418552399293856, + "loss": 0.064, + "theoretical_loss": 3.426853615135145, + "tokens_seen": 2240806912 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032410528005135617, + "loss": 0.0663, + "theoretical_loss": 3.426821205625199, + "tokens_seen": 2241069056 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.0005579056451097131, + "objective/train/docs_used": 816099, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3084162473678589, + "objective/train/original_loss": 1.3084161281585693, + "objective/train/theoretical_loss": 3.4267888009674072, + "objective/train/tokens_used": 2261791200, + "objective/train/value_avg": -0.0101470947265625, + "objective/train/value_loss": 0.0003198507765773684, + "objective/train/value_max": -2.6881694793701172e-05, + "objective/train/value_min": -0.7275390625, + "objective/train/value_reward_corr": 0.7792313074359762, + "objective/train/value_std": 0.0230712890625, + "objective/train/weight_avg": 1.0007097721099854, + "objective/train/weighted_lm_loss": 1.3082178831100464, + "objective/train/weights_max": 1.6977381706237793, + "objective/train/weights_min": 0.39976832270622253, + "theoretical_loss": 3.4267888009674072, + "tokens_seen": 2241331200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032402503610977373, + "loss": 0.0652, + "theoretical_loss": 3.4267888009674072, + "tokens_seen": 2241331200 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003239447921681913, + "loss": 0.0663, + "theoretical_loss": 3.4267564011604756, + "tokens_seen": 2241593344 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003238645482266089, + "loss": 0.0652, + "theoretical_loss": 3.426724006203112, + "tokens_seen": 2241855488 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032378430428502646, + "loss": 0.0648, + "theoretical_loss": 3.426691616094022, + "tokens_seen": 2242117632 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032370406034344407, + "loss": 0.0677, + "theoretical_loss": 3.4266592308319144, + "tokens_seen": 2242379776 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003236238164018617, + "loss": 0.067, + "theoretical_loss": 3.426626850415497, + "tokens_seen": 2242641920 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032354357246027924, + "loss": 0.0662, + "theoretical_loss": 3.4265944748434785, + "tokens_seen": 2242904064 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032346332851869685, + "loss": 0.0662, + "theoretical_loss": 3.4265621041145677, + "tokens_seen": 2243166208 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032338308457711447, + "loss": 0.0641, + "theoretical_loss": 3.426529738227475, + "tokens_seen": 2243428352 + }, + { + "epoch": 0.68, + "learning_rate": 0.000323302840635532, + "loss": 0.0643, + "theoretical_loss": 3.4264973771809104, + "tokens_seen": 2243690496 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003232225966939496, + "loss": 0.0666, + "theoretical_loss": 3.426465020973584, + "tokens_seen": 2243952640 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003231423527523672, + "loss": 0.0677, + "theoretical_loss": 3.4264326696042073, + "tokens_seen": 2244214784 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003230621088107848, + "loss": 0.0691, + "theoretical_loss": 3.4264003230714923, + "tokens_seen": 2244476928 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": -2.073908763122745e-05, + "objective/train/docs_used": 817136, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3620100021362305, + "objective/train/original_loss": 1.3620097637176514, + "objective/train/theoretical_loss": 3.42638415161848, + "objective/train/tokens_used": 2265068000, + "objective/train/value_avg": -0.00905609130859375, + "objective/train/value_loss": 0.0004213712236378342, + "objective/train/value_max": -7.253885269165039e-05, + "objective/train/value_min": -0.34912109375, + "objective/train/value_reward_corr": 0.6160935511149752, + "objective/train/value_std": 0.01435089111328125, + "objective/train/weight_avg": 1.0001745223999023, + "objective/train/weighted_lm_loss": 1.3625872135162354, + "objective/train/weights_max": 1.1662676334381104, + "objective/train/weights_min": 0.37542152404785156, + "theoretical_loss": 3.42638415161848, + "tokens_seen": 2244608000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032298186486920237, + "loss": 0.0651, + "theoretical_loss": 3.4263679813741503, + "tokens_seen": 2244739072 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032290162092762, + "loss": 0.0679, + "theoretical_loss": 3.4263356445108943, + "tokens_seen": 2245001216 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003228213769860376, + "loss": 0.0649, + "theoretical_loss": 3.426303312480438, + "tokens_seen": 2245263360 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032274113304445515, + "loss": 0.0688, + "theoretical_loss": 3.426270985281494, + "tokens_seen": 2245525504 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003226608891028727, + "loss": 0.0668, + "theoretical_loss": 3.426238662912777, + "tokens_seen": 2245787648 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003225806451612903, + "loss": 0.0679, + "theoretical_loss": 3.4262063453730014, + "tokens_seen": 2246049792 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032250040121970793, + "loss": 0.0677, + "theoretical_loss": 3.4261740326608825, + "tokens_seen": 2246311936 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003224201572781255, + "loss": 0.0647, + "theoretical_loss": 3.4261417247751353, + "tokens_seen": 2246574080 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003223399133365431, + "loss": 0.0664, + "theoretical_loss": 3.4261094217144765, + "tokens_seen": 2246836224 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003222596693949607, + "loss": 0.0682, + "theoretical_loss": 3.4260771234776226, + "tokens_seen": 2247098368 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003221794254533782, + "loss": 0.0677, + "theoretical_loss": 3.4260448300632906, + "tokens_seen": 2247360512 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032209918151179584, + "loss": 0.0679, + "theoretical_loss": 3.4260125414701976, + "tokens_seen": 2247622656 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.0007315482362173498, + "objective/train/docs_used": 818241, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2269222736358643, + "objective/train/original_loss": 1.2269222736358643, + "objective/train/theoretical_loss": 3.4259802576970624, + "objective/train/tokens_used": 2268344800, + "objective/train/value_avg": -0.0052032470703125, + "objective/train/value_loss": 9.3780858151149e-05, + "objective/train/value_max": -4.3332576751708984e-05, + "objective/train/value_min": -0.3203125, + "objective/train/value_reward_corr": 0.6977430695561531, + "objective/train/value_std": 0.01030731201171875, + "objective/train/weight_avg": 1.000777244567871, + "objective/train/weighted_lm_loss": 1.2275296449661255, + "objective/train/weights_max": 1.1834774017333984, + "objective/train/weights_min": 0.6207625865936279, + "theoretical_loss": 3.4259802576970624, + "tokens_seen": 2247884800 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032201893757021345, + "loss": 0.0647, + "theoretical_loss": 3.4259802576970624, + "tokens_seen": 2247884800 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032193869362863106, + "loss": 0.0672, + "theoretical_loss": 3.425947978742603, + "tokens_seen": 2248146944 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003218584496870486, + "loss": 0.0666, + "theoretical_loss": 3.425915704605538, + "tokens_seen": 2248409088 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032177820574546623, + "loss": 0.0662, + "theoretical_loss": 3.4258834352845877, + "tokens_seen": 2248671232 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032169796180388384, + "loss": 0.0664, + "theoretical_loss": 3.425851170778472, + "tokens_seen": 2248933376 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003216177178623014, + "loss": 0.0667, + "theoretical_loss": 3.4258189110859107, + "tokens_seen": 2249195520 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032153747392071896, + "loss": 0.0658, + "theoretical_loss": 3.425786656205626, + "tokens_seen": 2249457664 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003214572299791366, + "loss": 0.0672, + "theoretical_loss": 3.425754406136338, + "tokens_seen": 2249719808 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003213769860375542, + "loss": 0.0648, + "theoretical_loss": 3.425722160876769, + "tokens_seen": 2249981952 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032129674209597174, + "loss": 0.0689, + "theoretical_loss": 3.425689920425642, + "tokens_seen": 2250244096 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032121649815438936, + "loss": 0.0667, + "theoretical_loss": 3.42565768478168, + "tokens_seen": 2250506240 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032113625421280697, + "loss": 0.0678, + "theoretical_loss": 3.4256254539436055, + "tokens_seen": 2250768384 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032105601027122453, + "loss": 0.067, + "theoretical_loss": 3.4255932279101433, + "tokens_seen": 2251030528 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.0012125978246331215, + "objective/train/docs_used": 819526, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4415205717086792, + "objective/train/original_loss": 1.4415202140808105, + "objective/train/theoretical_loss": 3.4255771166947424, + "objective/train/tokens_used": 2271621600, + "objective/train/value_avg": -0.01105499267578125, + "objective/train/value_loss": 0.00021423044381663203, + "objective/train/value_max": -5.3882598876953125e-05, + "objective/train/value_min": -0.497314453125, + "objective/train/value_reward_corr": 0.826165006112678, + "objective/train/value_std": 0.02288818359375, + "objective/train/weight_avg": 1.0013140439987183, + "objective/train/weighted_lm_loss": 1.4430190324783325, + "objective/train/weights_max": 1.2328771352767944, + "objective/train/weights_min": 0.37406063079833984, + "theoretical_loss": 3.4255771166947424, + "tokens_seen": 2251161600 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003209757663296421, + "loss": 0.0679, + "theoretical_loss": 3.4255610066800166, + "tokens_seen": 2251292672 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003208955223880597, + "loss": 0.0672, + "theoretical_loss": 3.425528790251952, + "tokens_seen": 2251554816 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003208152784464773, + "loss": 0.0673, + "theoretical_loss": 3.4254965786246734, + "tokens_seen": 2251816960 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032073503450489487, + "loss": 0.0677, + "theoretical_loss": 3.4254643717969073, + "tokens_seen": 2252079104 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003206547905633125, + "loss": 0.0661, + "theoretical_loss": 3.42543216976738, + "tokens_seen": 2252341248 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003205745466217301, + "loss": 0.0645, + "theoretical_loss": 3.425399972534818, + "tokens_seen": 2252603392 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032049430268014765, + "loss": 0.0642, + "theoretical_loss": 3.4253677800979494, + "tokens_seen": 2252865536 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003204140587385652, + "loss": 0.0677, + "theoretical_loss": 3.425335592455501, + "tokens_seen": 2253127680 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003203338147969828, + "loss": 0.0668, + "theoretical_loss": 3.4253034096062014, + "tokens_seen": 2253389824 + }, + { + "epoch": 0.68, + "learning_rate": 0.00032025357085540044, + "loss": 0.0706, + "theoretical_loss": 3.4252712315487797, + "tokens_seen": 2253651968 + }, + { + "epoch": 0.68, + "learning_rate": 0.000320173326913818, + "loss": 0.069, + "theoretical_loss": 3.4252390582819645, + "tokens_seen": 2253914112 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003200930829722356, + "loss": 0.0667, + "theoretical_loss": 3.4252068898044863, + "tokens_seen": 2254176256 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": -6.209603725437773e-06, + "objective/train/docs_used": 820753, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2472572326660156, + "objective/train/original_loss": 1.2472572326660156, + "objective/train/theoretical_loss": 3.425174726115075, + "objective/train/tokens_used": 2274898400, + "objective/train/value_avg": -0.007427215576171875, + "objective/train/value_loss": 9.021081496030092e-05, + "objective/train/value_max": -4.1961669921875e-05, + "objective/train/value_min": -0.2496337890625, + "objective/train/value_reward_corr": 0.7706789534692144, + "objective/train/value_std": 0.01107025146484375, + "objective/train/weight_avg": 1.0000383853912354, + "objective/train/weighted_lm_loss": 1.247596263885498, + "objective/train/weights_max": 1.1017274856567383, + "objective/train/weights_min": 0.736079752445221, + "theoretical_loss": 3.425174726115075, + "tokens_seen": 2254438400 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003200128390306532, + "loss": 0.0646, + "theoretical_loss": 3.425174726115075, + "tokens_seen": 2254438400 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003199325950890708, + "loss": 0.0659, + "theoretical_loss": 3.425142567212461, + "tokens_seen": 2254700544 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003198523511474884, + "loss": 0.0656, + "theoretical_loss": 3.4251104130953762, + "tokens_seen": 2254962688 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031977210720590595, + "loss": 0.0693, + "theoretical_loss": 3.4250782637625514, + "tokens_seen": 2255224832 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003196918632643235, + "loss": 0.0673, + "theoretical_loss": 3.4250461192127193, + "tokens_seen": 2255486976 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003196116193227411, + "loss": 0.067, + "theoretical_loss": 3.4250139794446124, + "tokens_seen": 2255749120 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031953137538115873, + "loss": 0.0634, + "theoretical_loss": 3.4249818444569637, + "tokens_seen": 2256011264 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031945113143957635, + "loss": 0.065, + "theoretical_loss": 3.424949714248507, + "tokens_seen": 2256273408 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003193708874979939, + "loss": 0.0664, + "theoretical_loss": 3.4249175888179764, + "tokens_seen": 2256535552 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003192906435564115, + "loss": 0.0644, + "theoretical_loss": 3.424885468164106, + "tokens_seen": 2256797696 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003192103996148291, + "loss": 0.0673, + "theoretical_loss": 3.4248533522856315, + "tokens_seen": 2257059840 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031913015567324663, + "loss": 0.0704, + "theoretical_loss": 3.424821241181288, + "tokens_seen": 2257321984 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031904991173166425, + "loss": 0.0676, + "theoretical_loss": 3.4247891348498114, + "tokens_seen": 2257584128 + }, + { + "epoch": 0.68, + "objective/train/advantage_avg": 0.001051482162438333, + "objective/train/docs_used": 822060, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2332792282104492, + "objective/train/original_loss": 1.2332792282104492, + "objective/train/theoretical_loss": 3.4247730834735037, + "objective/train/tokens_used": 2278175200, + "objective/train/value_avg": -0.00476837158203125, + "objective/train/value_loss": 7.368699880316854e-05, + "objective/train/value_max": -3.218650817871094e-05, + "objective/train/value_min": -0.31494140625, + "objective/train/value_reward_corr": 0.7164398341766667, + "objective/train/value_std": 0.0090789794921875, + "objective/train/weight_avg": 1.0010877847671509, + "objective/train/weighted_lm_loss": 1.2348461151123047, + "objective/train/weights_max": 1.1168068647384644, + "objective/train/weights_min": 0.8260654807090759, + "theoretical_loss": 3.4247730834735037, + "tokens_seen": 2257715200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031896966779008186, + "loss": 0.0651, + "theoretical_loss": 3.424757033289939, + "tokens_seen": 2257846272 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031888942384849947, + "loss": 0.0678, + "theoretical_loss": 3.424724936500407, + "tokens_seen": 2258108416 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031880917990691703, + "loss": 0.0657, + "theoretical_loss": 3.424692844479953, + "tokens_seen": 2258370560 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031872893596533464, + "loss": 0.0689, + "theoretical_loss": 3.424660757227315, + "tokens_seen": 2258632704 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031864869202375226, + "loss": 0.0696, + "theoretical_loss": 3.4246286747412316, + "tokens_seen": 2258894848 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031856844808216976, + "loss": 0.0691, + "theoretical_loss": 3.4245965970204413, + "tokens_seen": 2259156992 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003184882041405874, + "loss": 0.0661, + "theoretical_loss": 3.4245645240636833, + "tokens_seen": 2259419136 + }, + { + "epoch": 0.68, + "learning_rate": 0.000318407960199005, + "loss": 0.0696, + "theoretical_loss": 3.4245324558696986, + "tokens_seen": 2259681280 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003183277162574226, + "loss": 0.0673, + "theoretical_loss": 3.4245003924372264, + "tokens_seen": 2259943424 + }, + { + "epoch": 0.68, + "learning_rate": 0.00031824747231584016, + "loss": 0.0637, + "theoretical_loss": 3.424468333765008, + "tokens_seen": 2260205568 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031816722837425777, + "loss": 0.0672, + "theoretical_loss": 3.4244362798517844, + "tokens_seen": 2260467712 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003180869844326754, + "loss": 0.0651, + "theoretical_loss": 3.4244042306962976, + "tokens_seen": 2260729856 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 4.142043326282874e-05, + "objective/train/docs_used": 823330, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.305637240409851, + "objective/train/original_loss": 1.3056371212005615, + "objective/train/theoretical_loss": 3.42437218629729, + "objective/train/tokens_used": 2281452000, + "objective/train/value_avg": -0.00960540771484375, + "objective/train/value_loss": 0.0003837741387542337, + "objective/train/value_max": -5.227327346801758e-05, + "objective/train/value_min": -0.70068359375, + "objective/train/value_reward_corr": 0.7206865724791649, + "objective/train/value_std": 0.0199737548828125, + "objective/train/weight_avg": 1.000213861465454, + "objective/train/weighted_lm_loss": 1.3052417039871216, + "objective/train/weights_max": 1.5599620342254639, + "objective/train/weights_min": 0.37613826990127563, + "theoretical_loss": 3.42437218629729, + "tokens_seen": 2260992000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003180067404910929, + "loss": 0.0677, + "theoretical_loss": 3.42437218629729, + "tokens_seen": 2260992000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003179264965495105, + "loss": 0.0686, + "theoretical_loss": 3.4243401466535044, + "tokens_seen": 2261254144 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003178462526079281, + "loss": 0.0697, + "theoretical_loss": 3.424308111763683, + "tokens_seen": 2261516288 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031776600866634567, + "loss": 0.0664, + "theoretical_loss": 3.4242760816265707, + "tokens_seen": 2261778432 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003176857647247633, + "loss": 0.0697, + "theoretical_loss": 3.4242440562409113, + "tokens_seen": 2262040576 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003176055207831809, + "loss": 0.0686, + "theoretical_loss": 3.424212035605449, + "tokens_seen": 2262302720 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003175252768415985, + "loss": 0.0694, + "theoretical_loss": 3.4241800197189294, + "tokens_seen": 2262564864 + }, + { + "epoch": 0.69, + "learning_rate": 0.000317445032900016, + "loss": 0.0673, + "theoretical_loss": 3.4241480085800977, + "tokens_seen": 2262827008 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003173647889584336, + "loss": 0.0685, + "theoretical_loss": 3.4241160021877004, + "tokens_seen": 2263089152 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031728454501685124, + "loss": 0.0694, + "theoretical_loss": 3.424084000540484, + "tokens_seen": 2263351296 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003172043010752688, + "loss": 0.0662, + "theoretical_loss": 3.424052003637195, + "tokens_seen": 2263613440 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003171240571336864, + "loss": 0.0681, + "theoretical_loss": 3.4240200114765815, + "tokens_seen": 2263875584 + }, + { + "epoch": 0.69, + "learning_rate": 0.000317043813192104, + "loss": 0.0653, + "theoretical_loss": 3.423988024057391, + "tokens_seen": 2264137728 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.0005332150030881166, + "objective/train/docs_used": 824641, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.458601951599121, + "objective/train/original_loss": 1.4586018323898315, + "objective/train/theoretical_loss": 3.4239720321254388, + "objective/train/tokens_used": 2284728800, + "objective/train/value_avg": -0.005550384521484375, + "objective/train/value_loss": 0.00015455740503966808, + "objective/train/value_max": -2.2470951080322266e-05, + "objective/train/value_min": -0.2269287109375, + "objective/train/value_reward_corr": 0.6511970689407572, + "objective/train/value_std": 0.01013946533203125, + "objective/train/weight_avg": 1.0006014108657837, + "objective/train/weighted_lm_loss": 1.4605647325515747, + "objective/train/weights_max": 1.1240557432174683, + "objective/train/weights_min": 0.36819323897361755, + "theoretical_loss": 3.4239720321254388, + "tokens_seen": 2264268800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031696356925052163, + "loss": 0.0692, + "theoretical_loss": 3.423956041378373, + "tokens_seen": 2264399872 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003168833253089392, + "loss": 0.0659, + "theoretical_loss": 3.423924063438275, + "tokens_seen": 2264662016 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031680308136735675, + "loss": 0.0656, + "theoretical_loss": 3.4238920902358467, + "tokens_seen": 2264924160 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031672283742577436, + "loss": 0.0666, + "theoretical_loss": 3.423860121769839, + "tokens_seen": 2265186304 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003166425934841919, + "loss": 0.0662, + "theoretical_loss": 3.423828158039001, + "tokens_seen": 2265448448 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031656234954260953, + "loss": 0.0696, + "theoretical_loss": 3.4237961990420844, + "tokens_seen": 2265710592 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031648210560102715, + "loss": 0.0668, + "theoretical_loss": 3.42376424477784, + "tokens_seen": 2265972736 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031640186165944476, + "loss": 0.0675, + "theoretical_loss": 3.4237322952450198, + "tokens_seen": 2266234880 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003163216177178623, + "loss": 0.0677, + "theoretical_loss": 3.423700350442376, + "tokens_seen": 2266497024 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003162413737762799, + "loss": 0.0659, + "theoretical_loss": 3.4236684103686614, + "tokens_seen": 2266759168 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003161611298346975, + "loss": 0.0681, + "theoretical_loss": 3.423636475022629, + "tokens_seen": 2267021312 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031608088589311505, + "loss": 0.0671, + "theoretical_loss": 3.423604544403032, + "tokens_seen": 2267283456 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.00025811055093072355, + "objective/train/docs_used": 825734, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2673412561416626, + "objective/train/original_loss": 1.2673412561416626, + "objective/train/theoretical_loss": 3.4235726185086257, + "objective/train/tokens_used": 2288005600, + "objective/train/value_avg": -0.006771087646484375, + "objective/train/value_loss": 0.00013087765546515584, + "objective/train/value_max": -4.83393669128418e-05, + "objective/train/value_min": -0.45654296875, + "objective/train/value_reward_corr": 0.6977213837669967, + "objective/train/value_std": 0.01183319091796875, + "objective/train/weight_avg": 1.0003224611282349, + "objective/train/weighted_lm_loss": 1.2671499252319336, + "objective/train/weights_max": 1.501617670059204, + "objective/train/weights_min": 0.6095831990242004, + "theoretical_loss": 3.4235726185086257, + "tokens_seen": 2267545600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031600064195153266, + "loss": 0.0663, + "theoretical_loss": 3.4235726185086257, + "tokens_seen": 2267545600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031592039800995027, + "loss": 0.0665, + "theoretical_loss": 3.423540697338164, + "tokens_seen": 2267807744 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003158401540683679, + "loss": 0.0672, + "theoretical_loss": 3.423508780890402, + "tokens_seen": 2268069888 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031575991012678544, + "loss": 0.0678, + "theoretical_loss": 3.4234768691640953, + "tokens_seen": 2268332032 + }, + { + "epoch": 0.69, + "learning_rate": 0.000315679666185203, + "loss": 0.0661, + "theoretical_loss": 3.4234449621580003, + "tokens_seen": 2268594176 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003155994222436206, + "loss": 0.067, + "theoretical_loss": 3.4234130598708727, + "tokens_seen": 2268856320 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031551917830203817, + "loss": 0.0667, + "theoretical_loss": 3.42338116230147, + "tokens_seen": 2269118464 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003154389343604558, + "loss": 0.0661, + "theoretical_loss": 3.4233492694485497, + "tokens_seen": 2269380608 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003153586904188734, + "loss": 0.0641, + "theoretical_loss": 3.4233173813108695, + "tokens_seen": 2269642752 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031527844647729096, + "loss": 0.0663, + "theoretical_loss": 3.423285497887188, + "tokens_seen": 2269904896 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031519820253570857, + "loss": 0.0673, + "theoretical_loss": 3.4232536191762635, + "tokens_seen": 2270167040 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003151179585941262, + "loss": 0.0682, + "theoretical_loss": 3.423221745176856, + "tokens_seen": 2270429184 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031503771465254374, + "loss": 0.0648, + "theoretical_loss": 3.423189875887725, + "tokens_seen": 2270691328 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.0004905235837213695, + "objective/train/docs_used": 826907, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.274688720703125, + "objective/train/original_loss": 1.274688720703125, + "objective/train/theoretical_loss": 3.4231739430091253, + "objective/train/tokens_used": 2291282400, + "objective/train/value_avg": -0.005580902099609375, + "objective/train/value_loss": 9.647780825616792e-05, + "objective/train/value_max": -3.1948089599609375e-05, + "objective/train/value_min": -0.318603515625, + "objective/train/value_reward_corr": 0.7587791405220843, + "objective/train/value_std": 0.0113525390625, + "objective/train/weight_avg": 1.0005372762680054, + "objective/train/weighted_lm_loss": 1.2754013538360596, + "objective/train/weights_max": 1.1139694452285767, + "objective/train/weights_min": 0.7064666748046875, + "theoretical_loss": 3.4231739430091253, + "tokens_seen": 2270822400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003149574707109613, + "loss": 0.0647, + "theoretical_loss": 3.4231580113076303, + "tokens_seen": 2270953472 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003148772267693789, + "loss": 0.0669, + "theoretical_loss": 3.4231261514353335, + "tokens_seen": 2271215616 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003147969828277965, + "loss": 0.0649, + "theoretical_loss": 3.4230942962695954, + "tokens_seen": 2271477760 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003147167388862141, + "loss": 0.0707, + "theoretical_loss": 3.4230624458091774, + "tokens_seen": 2271739904 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003146364949446317, + "loss": 0.0684, + "theoretical_loss": 3.423030600052842, + "tokens_seen": 2272002048 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003145562510030493, + "loss": 0.0646, + "theoretical_loss": 3.422998758999352, + "tokens_seen": 2272264192 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031447600706146687, + "loss": 0.0629, + "theoretical_loss": 3.4229669226474697, + "tokens_seen": 2272526336 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003143957631198844, + "loss": 0.067, + "theoretical_loss": 3.422935090995959, + "tokens_seen": 2272788480 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031431551917830204, + "loss": 0.0675, + "theoretical_loss": 3.4229032640435846, + "tokens_seen": 2273050624 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031423527523671965, + "loss": 0.0655, + "theoretical_loss": 3.42287144178911, + "tokens_seen": 2273312768 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003141550312951372, + "loss": 0.0715, + "theoretical_loss": 3.4228396242313006, + "tokens_seen": 2273574912 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003140747873535548, + "loss": 0.0646, + "theoretical_loss": 3.422807811368922, + "tokens_seen": 2273837056 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.0011075290385633707, + "objective/train/docs_used": 828122, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2920111417770386, + "objective/train/original_loss": 1.292011022567749, + "objective/train/theoretical_loss": 3.422776003200739, + "objective/train/tokens_used": 2294559200, + "objective/train/value_avg": -0.007415771484375, + "objective/train/value_loss": 0.00022583450481761247, + "objective/train/value_max": -2.193450927734375e-05, + "objective/train/value_min": -0.55029296875, + "objective/train/value_reward_corr": 0.7401537018891523, + "objective/train/value_std": 0.0174713134765625, + "objective/train/weight_avg": 1.001213788986206, + "objective/train/weighted_lm_loss": 1.2926075458526611, + "objective/train/weights_max": 1.284966230392456, + "objective/train/weights_min": 0.3930663764476776, + "theoretical_loss": 3.422776003200739, + "tokens_seen": 2274099200 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031399454341197243, + "loss": 0.0658, + "theoretical_loss": 3.422776003200739, + "tokens_seen": 2274099200 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031391429947039004, + "loss": 0.0709, + "theoretical_loss": 3.4227441997255195, + "tokens_seen": 2274361344 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031383405552880755, + "loss": 0.0694, + "theoretical_loss": 3.4227124009420296, + "tokens_seen": 2274623488 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031375381158722516, + "loss": 0.0654, + "theoretical_loss": 3.422680606849036, + "tokens_seen": 2274885632 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003136735676456428, + "loss": 0.0657, + "theoretical_loss": 3.4226488174453076, + "tokens_seen": 2275147776 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031359332370406033, + "loss": 0.0678, + "theoretical_loss": 3.4226170327296113, + "tokens_seen": 2275409920 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031351307976247795, + "loss": 0.0699, + "theoretical_loss": 3.422585252700717, + "tokens_seen": 2275672064 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031343283582089556, + "loss": 0.0677, + "theoretical_loss": 3.422553477357393, + "tokens_seen": 2275934208 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003133525918793131, + "loss": 0.0669, + "theoretical_loss": 3.4225217066984093, + "tokens_seen": 2276196352 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003132723479377307, + "loss": 0.0646, + "theoretical_loss": 3.422489940722536, + "tokens_seen": 2276458496 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003131921039961483, + "loss": 0.0664, + "theoretical_loss": 3.422458179428543, + "tokens_seen": 2276720640 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003131118600545659, + "loss": 0.0651, + "theoretical_loss": 3.422426422815202, + "tokens_seen": 2276982784 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031303161611298346, + "loss": 0.0669, + "theoretical_loss": 3.4223946708812845, + "tokens_seen": 2277244928 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": -0.00016944411618169397, + "objective/train/docs_used": 829293, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4860248565673828, + "objective/train/original_loss": 1.4860246181488037, + "objective/train/theoretical_loss": 3.422378796668726, + "objective/train/tokens_used": 2297836000, + "objective/train/value_avg": -0.0090484619140625, + "objective/train/value_loss": 0.00036971012013964355, + "objective/train/value_max": -3.349781036376953e-05, + "objective/train/value_min": -0.95263671875, + "objective/train/value_reward_corr": 0.7871249688210622, + "objective/train/value_std": 0.0220489501953125, + "objective/train/weight_avg": 0.9999957084655762, + "objective/train/weighted_lm_loss": 1.485609769821167, + "objective/train/weights_max": 1.4590624570846558, + "objective/train/weights_min": 0.37661492824554443, + "theoretical_loss": 3.422378796668726, + "tokens_seen": 2277376000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031295137217140107, + "loss": 0.068, + "theoretical_loss": 3.422362923625562, + "tokens_seen": 2277507072 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003128711282298187, + "loss": 0.0669, + "theoretical_loss": 3.422331181046807, + "tokens_seen": 2277769216 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031279088428823624, + "loss": 0.0667, + "theoretical_loss": 3.4222994431437925, + "tokens_seen": 2278031360 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003127106403466538, + "loss": 0.0657, + "theoretical_loss": 3.4222677099152916, + "tokens_seen": 2278293504 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003126303964050714, + "loss": 0.0666, + "theoretical_loss": 3.4222359813600782, + "tokens_seen": 2278555648 + }, + { + "epoch": 0.69, + "learning_rate": 0.000312550152463489, + "loss": 0.0646, + "theoretical_loss": 3.422204257476927, + "tokens_seen": 2278817792 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003124699085219066, + "loss": 0.0645, + "theoretical_loss": 3.4221725382646118, + "tokens_seen": 2279079936 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003123896645803242, + "loss": 0.0663, + "theoretical_loss": 3.422140823721908, + "tokens_seen": 2279342080 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003123094206387418, + "loss": 0.0684, + "theoretical_loss": 3.422109113847592, + "tokens_seen": 2279604224 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031222917669715937, + "loss": 0.0643, + "theoretical_loss": 3.422077408640439, + "tokens_seen": 2279866368 + }, + { + "epoch": 0.69, + "learning_rate": 0.000312148932755577, + "loss": 0.0671, + "theoretical_loss": 3.422045708099226, + "tokens_seen": 2280128512 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031206868881399454, + "loss": 0.0687, + "theoretical_loss": 3.42201401222273, + "tokens_seen": 2280390656 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.000552852638065815, + "objective/train/docs_used": 830439, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.256763219833374, + "objective/train/original_loss": 1.256762981414795, + "objective/train/theoretical_loss": 3.421982321009728, + "objective/train/tokens_used": 2301112800, + "objective/train/value_avg": -0.007610321044921875, + "objective/train/value_loss": 0.0004640833067242056, + "objective/train/value_max": -4.297494888305664e-05, + "objective/train/value_min": -0.60205078125, + "objective/train/value_reward_corr": 0.6248717318173257, + "objective/train/value_std": 0.0151824951171875, + "objective/train/weight_avg": 1.000746250152588, + "objective/train/weighted_lm_loss": 1.257375717163086, + "objective/train/weights_max": 1.4147088527679443, + "objective/train/weights_min": 0.3931143283843994, + "theoretical_loss": 3.421982321009728, + "tokens_seen": 2280652800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031198844487241215, + "loss": 0.0666, + "theoretical_loss": 3.421982321009728, + "tokens_seen": 2280652800 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003119082009308297, + "loss": 0.0676, + "theoretical_loss": 3.4219506344589985, + "tokens_seen": 2280914944 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003118279569892473, + "loss": 0.0658, + "theoretical_loss": 3.421918952569319, + "tokens_seen": 2281177088 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031174771304766494, + "loss": 0.0643, + "theoretical_loss": 3.4218872753394702, + "tokens_seen": 2281439232 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003116674691060825, + "loss": 0.0663, + "theoretical_loss": 3.4218556027682294, + "tokens_seen": 2281701376 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003115872251645001, + "loss": 0.0645, + "theoretical_loss": 3.4218239348543777, + "tokens_seen": 2281963520 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031150698122291766, + "loss": 0.0678, + "theoretical_loss": 3.4217922715966944, + "tokens_seen": 2282225664 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003114267372813352, + "loss": 0.0662, + "theoretical_loss": 3.421760612993961, + "tokens_seen": 2282487808 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031134649333975284, + "loss": 0.0663, + "theoretical_loss": 3.421728959044958, + "tokens_seen": 2282749952 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031126624939817045, + "loss": 0.0659, + "theoretical_loss": 3.4216973097484678, + "tokens_seen": 2283012096 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031118600545658806, + "loss": 0.0666, + "theoretical_loss": 3.4216656651032715, + "tokens_seen": 2283274240 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003111057615150056, + "loss": 0.0654, + "theoretical_loss": 3.421634025108152, + "tokens_seen": 2283536384 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031102551757342323, + "loss": 0.0659, + "theoretical_loss": 3.421602389761893, + "tokens_seen": 2283798528 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.0004953542375005782, + "objective/train/docs_used": 831582, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3190346956253052, + "objective/train/original_loss": 1.3190345764160156, + "objective/train/theoretical_loss": 3.4215865738317057, + "objective/train/tokens_used": 2304389600, + "objective/train/value_avg": -0.005458831787109375, + "objective/train/value_loss": 0.00012545128993224353, + "objective/train/value_max": -2.467632293701172e-05, + "objective/train/value_min": -0.343017578125, + "objective/train/value_reward_corr": 0.6331208949384891, + "objective/train/value_std": 0.0095672607421875, + "objective/train/weight_avg": 1.0005526542663574, + "objective/train/weighted_lm_loss": 1.3202760219573975, + "objective/train/weights_max": 1.2151522636413574, + "objective/train/weights_min": 0.3685888350009918, + "theoretical_loss": 3.4215865738317057, + "tokens_seen": 2283929600 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003109452736318408, + "loss": 0.0664, + "theoretical_loss": 3.421570759063277, + "tokens_seen": 2284060672 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031086502969025835, + "loss": 0.0667, + "theoretical_loss": 3.4215391330110885, + "tokens_seen": 2284322816 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031078478574867596, + "loss": 0.0677, + "theoretical_loss": 3.421507511604111, + "tokens_seen": 2284584960 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003107045418070936, + "loss": 0.0648, + "theoretical_loss": 3.42147589484113, + "tokens_seen": 2284847104 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003106242978655112, + "loss": 0.0681, + "theoretical_loss": 3.4214442827209313, + "tokens_seen": 2285109248 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031054405392392874, + "loss": 0.0632, + "theoretical_loss": 3.4214126752423, + "tokens_seen": 2285371392 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031046380998234636, + "loss": 0.0652, + "theoretical_loss": 3.4213810724040217, + "tokens_seen": 2285633536 + }, + { + "epoch": 0.69, + "learning_rate": 0.00031038356604076397, + "loss": 0.0649, + "theoretical_loss": 3.421349474204884, + "tokens_seen": 2285895680 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003103033220991815, + "loss": 0.0661, + "theoretical_loss": 3.4213178806436737, + "tokens_seen": 2286157824 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003102230781575991, + "loss": 0.0637, + "theoretical_loss": 3.4212862917191784, + "tokens_seen": 2286419968 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003101428342160167, + "loss": 0.0658, + "theoretical_loss": 3.4212547074301862, + "tokens_seen": 2286682112 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003100625902744343, + "loss": 0.0644, + "theoretical_loss": 3.4212231277754848, + "tokens_seen": 2286944256 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": -0.0007914595771580935, + "objective/train/docs_used": 832763, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3256409168243408, + "objective/train/original_loss": 1.3256409168243408, + "objective/train/theoretical_loss": 3.4211915527538643, + "objective/train/tokens_used": 2307666400, + "objective/train/value_avg": -0.007171630859375, + "objective/train/value_loss": 0.00014895742060616612, + "objective/train/value_max": -2.9981136322021484e-05, + "objective/train/value_min": -0.1917724609375, + "objective/train/value_reward_corr": 0.8010973247790141, + "objective/train/value_std": 0.0142364501953125, + "objective/train/weight_avg": 0.9992815256118774, + "objective/train/weighted_lm_loss": 1.3248015642166138, + "objective/train/weights_max": 1.183910846710205, + "objective/train/weights_min": 0.6507918834686279, + "theoretical_loss": 3.4211915527538643, + "tokens_seen": 2287206400 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030998234633285187, + "loss": 0.0651, + "theoretical_loss": 3.4211915527538643, + "tokens_seen": 2287206400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003099021023912695, + "loss": 0.0649, + "theoretical_loss": 3.4211599823641134, + "tokens_seen": 2287468544 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003098218584496871, + "loss": 0.0668, + "theoretical_loss": 3.421128416605022, + "tokens_seen": 2287730688 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003097416145081046, + "loss": 0.0661, + "theoretical_loss": 3.4210968554753807, + "tokens_seen": 2287992832 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003096613705665222, + "loss": 0.0651, + "theoretical_loss": 3.42106529897398, + "tokens_seen": 2288254976 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003095811266249398, + "loss": 0.0669, + "theoretical_loss": 3.4210337470996106, + "tokens_seen": 2288517120 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003095008826833574, + "loss": 0.0663, + "theoretical_loss": 3.4210021998510647, + "tokens_seen": 2288779264 + }, + { + "epoch": 0.69, + "learning_rate": 0.000309420638741775, + "loss": 0.0664, + "theoretical_loss": 3.4209706572271346, + "tokens_seen": 2289041408 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003093403948001926, + "loss": 0.0667, + "theoretical_loss": 3.4209391192266128, + "tokens_seen": 2289303552 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003092601508586102, + "loss": 0.0672, + "theoretical_loss": 3.420907585848292, + "tokens_seen": 2289565696 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003091799069170278, + "loss": 0.0669, + "theoretical_loss": 3.4208760570909655, + "tokens_seen": 2289827840 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030909966297544534, + "loss": 0.0648, + "theoretical_loss": 3.420844532953428, + "tokens_seen": 2290089984 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030901941903386295, + "loss": 0.0677, + "theoretical_loss": 3.420813013434473, + "tokens_seen": 2290352128 + }, + { + "epoch": 0.69, + "objective/train/advantage_avg": 0.0008832691819407046, + "objective/train/docs_used": 833966, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3830344676971436, + "objective/train/original_loss": 1.3830342292785645, + "objective/train/theoretical_loss": 3.4207972554065877, + "objective/train/tokens_used": 2310943200, + "objective/train/value_avg": -0.00794219970703125, + "objective/train/value_loss": 0.0003235115436837077, + "objective/train/value_max": -1.2636184692382812e-05, + "objective/train/value_min": -0.5380859375, + "objective/train/value_reward_corr": 0.663530368934879, + "objective/train/value_std": 0.0168304443359375, + "objective/train/weight_avg": 1.001022219657898, + "objective/train/weighted_lm_loss": 1.3845484256744385, + "objective/train/weights_max": 1.5663223266601562, + "objective/train/weights_min": 0.3698508143424988, + "theoretical_loss": 3.4207972554065877, + "tokens_seen": 2290483200 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003089391750922805, + "loss": 0.0655, + "theoretical_loss": 3.4207814985328957, + "tokens_seen": 2290614272 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003088589311506981, + "loss": 0.0656, + "theoretical_loss": 3.4207499882474917, + "tokens_seen": 2290876416 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030877868720911573, + "loss": 0.0633, + "theoretical_loss": 3.4207184825770565, + "tokens_seen": 2291138560 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030869844326753335, + "loss": 0.0644, + "theoretical_loss": 3.420686981520386, + "tokens_seen": 2291400704 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003086181993259509, + "loss": 0.0664, + "theoretical_loss": 3.420655485076277, + "tokens_seen": 2291662848 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030853795538436846, + "loss": 0.0652, + "theoretical_loss": 3.420623993243527, + "tokens_seen": 2291924992 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003084577114427861, + "loss": 0.0661, + "theoretical_loss": 3.420592506020933, + "tokens_seen": 2292187136 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030837746750120363, + "loss": 0.0646, + "theoretical_loss": 3.420561023407293, + "tokens_seen": 2292449280 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030829722355962125, + "loss": 0.0674, + "theoretical_loss": 3.420529545401406, + "tokens_seen": 2292711424 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030821697961803886, + "loss": 0.067, + "theoretical_loss": 3.42049807200207, + "tokens_seen": 2292973568 + }, + { + "epoch": 0.69, + "learning_rate": 0.00030813673567645647, + "loss": 0.066, + "theoretical_loss": 3.420466603208085, + "tokens_seen": 2293235712 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030805649173487403, + "loss": 0.0634, + "theoretical_loss": 3.4204351390182506, + "tokens_seen": 2293497856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7361949066418735, + "debugging/entropy-1-grams": 5.412433223050153, + "debugging/length": 455.6666666666667, + "debugging/num_segments": 15, + "debugging/raw_token_scores_avg": 0.004264768213033676, + "debugging/raw_token_scores_std": 0.017437797039747238, + "debugging/score": 0.0066926720884253575, + "debugging/score_std": 0.006256546529562537, + "epoch": 0.7, + "objective/train/advantage_avg": 0.0012232189765200019, + "objective/train/docs_used": 835219, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2494778633117676, + "objective/train/original_loss": 1.249477744102478, + "objective/train/theoretical_loss": 3.4204036794313675, + "objective/train/tokens_used": 2314220000, + "objective/train/value_avg": -0.005489349365234375, + "objective/train/value_loss": 0.00015417773101944476, + "objective/train/value_max": -4.5418739318847656e-05, + "objective/train/value_min": -0.390380859375, + "objective/train/value_reward_corr": 0.7059713839052515, + "objective/train/value_std": 0.01250457763671875, + "objective/train/weight_avg": 1.0012928247451782, + "objective/train/weighted_lm_loss": 1.251007080078125, + "objective/train/weights_max": 1.470705509185791, + "objective/train/weights_min": 0.3033490478992462, + "theoretical_loss": 3.4204036794313675, + "tokens_seen": 2293760000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003079762477932916, + "loss": 0.0675, + "theoretical_loss": 3.4204036794313675, + "tokens_seen": 2293760000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003078960038517092, + "loss": 0.0671, + "theoretical_loss": 3.4203722244462353, + "tokens_seen": 2294022144 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030781575991012676, + "loss": 0.0693, + "theoretical_loss": 3.4203407740616565, + "tokens_seen": 2294284288 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003077355159685444, + "loss": 0.0697, + "theoretical_loss": 3.4203093282764314, + "tokens_seen": 2294546432 + }, + { + "epoch": 0.7, + "learning_rate": 0.000307655272026962, + "loss": 0.0653, + "theoretical_loss": 3.420277887089363, + "tokens_seen": 2294808576 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003075750280853796, + "loss": 0.0639, + "theoretical_loss": 3.420246450499253, + "tokens_seen": 2295070720 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030749478414379716, + "loss": 0.0641, + "theoretical_loss": 3.420215018504905, + "tokens_seen": 2295332864 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030741454020221477, + "loss": 0.063, + "theoretical_loss": 3.4201835911051224, + "tokens_seen": 2295595008 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030733429626063233, + "loss": 0.0684, + "theoretical_loss": 3.4201521682987086, + "tokens_seen": 2295857152 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003072540523190499, + "loss": 0.068, + "theoretical_loss": 3.420120750084468, + "tokens_seen": 2296119296 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003071738083774675, + "loss": 0.0672, + "theoretical_loss": 3.4200893364612055, + "tokens_seen": 2296381440 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003070935644358851, + "loss": 0.0688, + "theoretical_loss": 3.4200579274277256, + "tokens_seen": 2296643584 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030701332049430267, + "loss": 0.067, + "theoretical_loss": 3.420026522982835, + "tokens_seen": 2296905728 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.001160003594122827, + "objective/train/docs_used": 836410, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2669320106506348, + "objective/train/original_loss": 1.2669320106506348, + "objective/train/theoretical_loss": 3.4200108224807373, + "objective/train/tokens_used": 2317496800, + "objective/train/value_avg": -0.00878143310546875, + "objective/train/value_loss": 0.00025325262686237693, + "objective/train/value_max": -1.9252300262451172e-05, + "objective/train/value_min": -0.50830078125, + "objective/train/value_reward_corr": 0.6975027171589683, + "objective/train/value_std": 0.01409149169921875, + "objective/train/weight_avg": 1.0012704133987427, + "objective/train/weighted_lm_loss": 1.268456220626831, + "objective/train/weights_max": 1.1904091835021973, + "objective/train/weights_min": 0.3858168125152588, + "theoretical_loss": 3.4200108224807373, + "tokens_seen": 2297036800 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003069330765527203, + "loss": 0.0657, + "theoretical_loss": 3.4199951231253394, + "tokens_seen": 2297167872 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003068528326111379, + "loss": 0.0666, + "theoretical_loss": 3.4199637278540447, + "tokens_seen": 2297430016 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030677258866955545, + "loss": 0.0646, + "theoretical_loss": 3.4199323371677584, + "tokens_seen": 2297692160 + }, + { + "epoch": 0.7, + "learning_rate": 0.000306692344727973, + "loss": 0.0659, + "theoretical_loss": 3.419900951065288, + "tokens_seen": 2297954304 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003066121007863906, + "loss": 0.0662, + "theoretical_loss": 3.419869569545441, + "tokens_seen": 2298216448 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030653185684480824, + "loss": 0.0654, + "theoretical_loss": 3.419838192607026, + "tokens_seen": 2298478592 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003064516129032258, + "loss": 0.0629, + "theoretical_loss": 3.4198068202488514, + "tokens_seen": 2298740736 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003063713689616434, + "loss": 0.067, + "theoretical_loss": 3.419775452469727, + "tokens_seen": 2299002880 + }, + { + "epoch": 0.7, + "learning_rate": 0.000306291125020061, + "loss": 0.0658, + "theoretical_loss": 3.4197440892684616, + "tokens_seen": 2299265024 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003062108810784786, + "loss": 0.066, + "theoretical_loss": 3.4197127306438664, + "tokens_seen": 2299527168 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030613063713689614, + "loss": 0.0685, + "theoretical_loss": 3.4196813765947507, + "tokens_seen": 2299789312 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030605039319531375, + "loss": 0.0681, + "theoretical_loss": 3.4196500271199266, + "tokens_seen": 2300051456 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": -8.435761628788896e-06, + "objective/train/docs_used": 837515, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3882620334625244, + "objective/train/original_loss": 1.3882619142532349, + "objective/train/theoretical_loss": 3.4196186822182044, + "objective/train/tokens_used": 2320773600, + "objective/train/value_avg": -0.0076446533203125, + "objective/train/value_loss": 0.0006114847492426634, + "objective/train/value_max": -7.486343383789062e-05, + "objective/train/value_min": -0.92431640625, + "objective/train/value_reward_corr": 0.6498722767181959, + "objective/train/value_std": 0.02294921875, + "objective/train/weight_avg": 1.000264286994934, + "objective/train/weighted_lm_loss": 1.3892766237258911, + "objective/train/weights_max": 1.806443452835083, + "objective/train/weights_min": 0.36988890171051025, + "theoretical_loss": 3.4196186822182044, + "tokens_seen": 2300313600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030597014925373136, + "loss": 0.0678, + "theoretical_loss": 3.4196186822182044, + "tokens_seen": 2300313600 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003058899053121489, + "loss": 0.0653, + "theoretical_loss": 3.419587341888397, + "tokens_seen": 2300575744 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030580966137056653, + "loss": 0.0643, + "theoretical_loss": 3.4195560061293158, + "tokens_seen": 2300837888 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030572941742898415, + "loss": 0.0673, + "theoretical_loss": 3.4195246749397743, + "tokens_seen": 2301100032 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030564917348740176, + "loss": 0.064, + "theoretical_loss": 3.4194933483185856, + "tokens_seen": 2301362176 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030556892954581926, + "loss": 0.0661, + "theoretical_loss": 3.4194620262645627, + "tokens_seen": 2301624320 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003054886856042369, + "loss": 0.0685, + "theoretical_loss": 3.4194307087765203, + "tokens_seen": 2301886464 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003054084416626545, + "loss": 0.0698, + "theoretical_loss": 3.4193993958532722, + "tokens_seen": 2302148608 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030532819772107205, + "loss": 0.0649, + "theoretical_loss": 3.4193680874936345, + "tokens_seen": 2302410752 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030524795377948966, + "loss": 0.0669, + "theoretical_loss": 3.419336783696422, + "tokens_seen": 2302672896 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030516770983790727, + "loss": 0.0666, + "theoretical_loss": 3.41930548446045, + "tokens_seen": 2302935040 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030508746589632483, + "loss": 0.0689, + "theoretical_loss": 3.419274189784536, + "tokens_seen": 2303197184 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003050072219547424, + "loss": 0.0676, + "theoretical_loss": 3.4192428996674957, + "tokens_seen": 2303459328 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": -9.100284660235047e-05, + "objective/train/docs_used": 838679, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.309449315071106, + "objective/train/original_loss": 1.309449315071106, + "objective/train/theoretical_loss": 3.419227256318184, + "objective/train/tokens_used": 2324050400, + "objective/train/value_avg": -0.00968170166015625, + "objective/train/value_loss": 0.0003752364427782595, + "objective/train/value_max": -3.272294998168945e-05, + "objective/train/value_min": -0.994140625, + "objective/train/value_reward_corr": 0.7353203166864406, + "objective/train/value_std": 0.0184783935546875, + "objective/train/weight_avg": 1.0000666379928589, + "objective/train/weighted_lm_loss": 1.3097712993621826, + "objective/train/weights_max": 1.30930495262146, + "objective/train/weights_min": 0.22673387825489044, + "theoretical_loss": 3.419227256318184, + "tokens_seen": 2303590400 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030492697801316, + "loss": 0.0696, + "theoretical_loss": 3.4192116141081463, + "tokens_seen": 2303721472 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003048467340715776, + "loss": 0.0676, + "theoretical_loss": 3.4191803331053063, + "tokens_seen": 2303983616 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030476649012999517, + "loss": 0.0675, + "theoretical_loss": 3.419149056657793, + "tokens_seen": 2304245760 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003046862461884128, + "loss": 0.0648, + "theoretical_loss": 3.4191177847644254, + "tokens_seen": 2304507904 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003046060022468304, + "loss": 0.0676, + "theoretical_loss": 3.419086517424022, + "tokens_seen": 2304770048 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030452575830524796, + "loss": 0.0666, + "theoretical_loss": 3.4190552546354023, + "tokens_seen": 2305032192 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030444551436366557, + "loss": 0.0673, + "theoretical_loss": 3.419023996397386, + "tokens_seen": 2305294336 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003043652704220831, + "loss": 0.0651, + "theoretical_loss": 3.418992742708794, + "tokens_seen": 2305556480 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030428502648050074, + "loss": 0.0654, + "theoretical_loss": 3.418961493568446, + "tokens_seen": 2305818624 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003042047825389183, + "loss": 0.0672, + "theoretical_loss": 3.4189302489751636, + "tokens_seen": 2306080768 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003041245385973359, + "loss": 0.0665, + "theoretical_loss": 3.418899008927769, + "tokens_seen": 2306342912 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003040442946557535, + "loss": 0.0657, + "theoretical_loss": 3.4188677734250836, + "tokens_seen": 2306605056 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.000588314956985414, + "objective/train/docs_used": 839893, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4173892736434937, + "objective/train/original_loss": 1.417389154434204, + "objective/train/theoretical_loss": 3.4188365424659293, + "objective/train/tokens_used": 2327327200, + "objective/train/value_avg": -0.0101318359375, + "objective/train/value_loss": 0.00021919727441854775, + "objective/train/value_max": -3.707408905029297e-05, + "objective/train/value_min": -0.5791015625, + "objective/train/value_reward_corr": 0.7850794856275933, + "objective/train/value_std": 0.019073486328125, + "objective/train/weight_avg": 1.0006909370422363, + "objective/train/weighted_lm_loss": 1.4180923700332642, + "objective/train/weights_max": 1.4146628379821777, + "objective/train/weights_min": 0.38787099719047546, + "theoretical_loss": 3.4188365424659293, + "tokens_seen": 2306867200 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003039640507141711, + "loss": 0.067, + "theoretical_loss": 3.4188365424659293, + "tokens_seen": 2306867200 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003038838067725887, + "loss": 0.066, + "theoretical_loss": 3.4188053160491303, + "tokens_seen": 2307129344 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030380356283100625, + "loss": 0.0664, + "theoretical_loss": 3.418774094173509, + "tokens_seen": 2307391488 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030372331888942387, + "loss": 0.0659, + "theoretical_loss": 3.4187428768378894, + "tokens_seen": 2307653632 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003036430749478414, + "loss": 0.0672, + "theoretical_loss": 3.4187116640410955, + "tokens_seen": 2307915776 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030356283100625904, + "loss": 0.0657, + "theoretical_loss": 3.418680455781953, + "tokens_seen": 2308177920 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030348258706467665, + "loss": 0.0668, + "theoretical_loss": 3.4186492520592853, + "tokens_seen": 2308440064 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003034023431230942, + "loss": 0.0707, + "theoretical_loss": 3.4186180528719188, + "tokens_seen": 2308702208 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003033220991815118, + "loss": 0.0684, + "theoretical_loss": 3.41858685821868, + "tokens_seen": 2308964352 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003032418552399294, + "loss": 0.0681, + "theoretical_loss": 3.418555668098395, + "tokens_seen": 2309226496 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030316161129834694, + "loss": 0.0683, + "theoretical_loss": 3.41852448250989, + "tokens_seen": 2309488640 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030308136735676455, + "loss": 0.0652, + "theoretical_loss": 3.4184933014519925, + "tokens_seen": 2309750784 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030300112341518216, + "loss": 0.065, + "theoretical_loss": 3.4184621249235305, + "tokens_seen": 2310012928 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.00043035694397985935, + "objective/train/docs_used": 841155, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.224597454071045, + "objective/train/original_loss": 1.224597454071045, + "objective/train/theoretical_loss": 3.418446538357472, + "objective/train/tokens_used": 2330604000, + "objective/train/value_avg": -0.0084686279296875, + "objective/train/value_loss": 0.00034951031557284296, + "objective/train/value_max": -5.066394805908203e-05, + "objective/train/value_min": -0.443115234375, + "objective/train/value_reward_corr": 0.6553620318807173, + "objective/train/value_std": 0.014984130859375, + "objective/train/weight_avg": 1.0005806684494019, + "objective/train/weighted_lm_loss": 1.2254345417022705, + "objective/train/weights_max": 1.3982270956039429, + "objective/train/weights_min": 0.3952797055244446, + "theoretical_loss": 3.418446538357472, + "tokens_seen": 2310144000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003029208794735998, + "loss": 0.0658, + "theoretical_loss": 3.418430952923332, + "tokens_seen": 2310275072 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030284063553201733, + "loss": 0.0684, + "theoretical_loss": 3.418399785450226, + "tokens_seen": 2310537216 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030276039159043495, + "loss": 0.065, + "theoretical_loss": 3.418368622503041, + "tokens_seen": 2310799360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030268014764885256, + "loss": 0.0659, + "theoretical_loss": 3.4183374640806066, + "tokens_seen": 2311061504 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030259990370727006, + "loss": 0.0671, + "theoretical_loss": 3.4183063101817526, + "tokens_seen": 2311323648 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003025196597656877, + "loss": 0.0686, + "theoretical_loss": 3.41827516080531, + "tokens_seen": 2311585792 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003024394158241053, + "loss": 0.0699, + "theoretical_loss": 3.418244015950108, + "tokens_seen": 2311847936 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003023591718825229, + "loss": 0.0655, + "theoretical_loss": 3.418212875614979, + "tokens_seen": 2312110080 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030227892794094046, + "loss": 0.0662, + "theoretical_loss": 3.4181817397987553, + "tokens_seen": 2312372224 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030219868399935807, + "loss": 0.0669, + "theoretical_loss": 3.4181506085002673, + "tokens_seen": 2312634368 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003021184400577757, + "loss": 0.0666, + "theoretical_loss": 3.4181194817183487, + "tokens_seen": 2312896512 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003020381961161932, + "loss": 0.0668, + "theoretical_loss": 3.418088359451832, + "tokens_seen": 2313158656 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.0005539424018934369, + "objective/train/docs_used": 842373, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2508080005645752, + "objective/train/original_loss": 1.250807762145996, + "objective/train/theoretical_loss": 3.41805724169955, + "objective/train/tokens_used": 2333880800, + "objective/train/value_avg": -0.005340576171875, + "objective/train/value_loss": 0.00012613584112841636, + "objective/train/value_max": -3.0934810638427734e-05, + "objective/train/value_min": -0.2215576171875, + "objective/train/value_reward_corr": 0.6626319954391378, + "objective/train/value_std": 0.00977325439453125, + "objective/train/weight_avg": 1.0006122589111328, + "objective/train/weighted_lm_loss": 1.251070261001587, + "objective/train/weights_max": 1.145806074142456, + "objective/train/weights_min": 0.3807637691497803, + "theoretical_loss": 3.41805724169955, + "tokens_seen": 2313420800 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003019579521746108, + "loss": 0.0679, + "theoretical_loss": 3.41805724169955, + "tokens_seen": 2313420800 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003018777082330284, + "loss": 0.0677, + "theoretical_loss": 3.4180261284603373, + "tokens_seen": 2313682944 + }, + { + "epoch": 0.7, + "learning_rate": 0.000301797464291446, + "loss": 0.0693, + "theoretical_loss": 3.417995019733028, + "tokens_seen": 2313945088 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003017172203498636, + "loss": 0.0661, + "theoretical_loss": 3.4179639155164567, + "tokens_seen": 2314207232 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003016369764082812, + "loss": 0.0666, + "theoretical_loss": 3.4179328158094586, + "tokens_seen": 2314469376 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003015567324666988, + "loss": 0.0661, + "theoretical_loss": 3.417901720610869, + "tokens_seen": 2314731520 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003014764885251163, + "loss": 0.0642, + "theoretical_loss": 3.417870629919524, + "tokens_seen": 2314993664 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003013962445835339, + "loss": 0.0678, + "theoretical_loss": 3.41783954373426, + "tokens_seen": 2315255808 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030131600064195154, + "loss": 0.0656, + "theoretical_loss": 3.4178084620539138, + "tokens_seen": 2315517952 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003012357567003691, + "loss": 0.0656, + "theoretical_loss": 3.4177773848773225, + "tokens_seen": 2315780096 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003011555127587867, + "loss": 0.0679, + "theoretical_loss": 3.4177463122033243, + "tokens_seen": 2316042240 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003010752688172043, + "loss": 0.0672, + "theoretical_loss": 3.4177152440307568, + "tokens_seen": 2316304384 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030099502487562194, + "loss": 0.067, + "theoretical_loss": 3.417684180358459, + "tokens_seen": 2316566528 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.0001357531436951831, + "objective/train/docs_used": 843608, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4189345836639404, + "objective/train/original_loss": 1.4189344644546509, + "objective/train/theoretical_loss": 3.417668650209548, + "objective/train/tokens_used": 2337157600, + "objective/train/value_avg": -0.00809478759765625, + "objective/train/value_loss": 0.00034320837585255504, + "objective/train/value_max": -5.3882598876953125e-05, + "objective/train/value_min": -0.5009765625, + "objective/train/value_reward_corr": 0.620115318252616, + "objective/train/value_std": 0.0153350830078125, + "objective/train/weight_avg": 1.0002853870391846, + "objective/train/weighted_lm_loss": 1.4190088510513306, + "objective/train/weights_max": 1.3826122283935547, + "objective/train/weights_min": 0.36843207478523254, + "theoretical_loss": 3.417668650209548, + "tokens_seen": 2316697600 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003009147809340395, + "loss": 0.0687, + "theoretical_loss": 3.417653121185269, + "tokens_seen": 2316828672 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030083453699245705, + "loss": 0.0704, + "theoretical_loss": 3.4176220665100274, + "tokens_seen": 2317090816 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030075429305087466, + "loss": 0.0681, + "theoretical_loss": 3.4175910163315733, + "tokens_seen": 2317352960 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003006740491092922, + "loss": 0.0684, + "theoretical_loss": 3.417559970648747, + "tokens_seen": 2317615104 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030059380516770984, + "loss": 0.0689, + "theoretical_loss": 3.4175289294603894, + "tokens_seen": 2317877248 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030051356122612745, + "loss": 0.0689, + "theoretical_loss": 3.4174978927653417, + "tokens_seen": 2318139392 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030043331728454506, + "loss": 0.0683, + "theoretical_loss": 3.4174668605624454, + "tokens_seen": 2318401536 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003003530733429626, + "loss": 0.0667, + "theoretical_loss": 3.4174358328505425, + "tokens_seen": 2318663680 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003002728294013802, + "loss": 0.067, + "theoretical_loss": 3.4174048096284757, + "tokens_seen": 2318925824 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003001925854597978, + "loss": 0.0674, + "theoretical_loss": 3.4173737908950876, + "tokens_seen": 2319187968 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030011234151821535, + "loss": 0.0671, + "theoretical_loss": 3.417342776649221, + "tokens_seen": 2319450112 + }, + { + "epoch": 0.7, + "learning_rate": 0.00030003209757663296, + "loss": 0.0699, + "theoretical_loss": 3.4173117668897204, + "tokens_seen": 2319712256 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": -0.00041747145587578416, + "objective/train/docs_used": 844817, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1701602935791016, + "objective/train/original_loss": 1.1701600551605225, + "objective/train/theoretical_loss": 3.4172807616154293, + "objective/train/tokens_used": 2340434400, + "objective/train/value_avg": -0.00962066650390625, + "objective/train/value_loss": 0.00041081372182816267, + "objective/train/value_max": -4.363059997558594e-05, + "objective/train/value_min": -0.92236328125, + "objective/train/value_reward_corr": 0.7432690809678663, + "objective/train/value_std": 0.0228118896484375, + "objective/train/weight_avg": 0.9997594356536865, + "objective/train/weighted_lm_loss": 1.1693514585494995, + "objective/train/weights_max": 2.4330854415893555, + "objective/train/weights_min": 0.15194809436798096, + "theoretical_loss": 3.4172807616154293, + "tokens_seen": 2319974400 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002999518536350506, + "loss": 0.0652, + "theoretical_loss": 3.4172807616154293, + "tokens_seen": 2319974400 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002998716096934682, + "loss": 0.0679, + "theoretical_loss": 3.417249760825193, + "tokens_seen": 2320236544 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029979136575188574, + "loss": 0.0648, + "theoretical_loss": 3.417218764517856, + "tokens_seen": 2320498688 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029971112181030336, + "loss": 0.0689, + "theoretical_loss": 3.4171877726922633, + "tokens_seen": 2320760832 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002996308778687209, + "loss": 0.067, + "theoretical_loss": 3.417156785347262, + "tokens_seen": 2321022976 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002995506339271385, + "loss": 0.0672, + "theoretical_loss": 3.4171258024816975, + "tokens_seen": 2321285120 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002994703899855561, + "loss": 0.0677, + "theoretical_loss": 3.4170948240944163, + "tokens_seen": 2321547264 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002993901460439737, + "loss": 0.0667, + "theoretical_loss": 3.4170638501842663, + "tokens_seen": 2321809408 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029930990210239126, + "loss": 0.0676, + "theoretical_loss": 3.417032880750094, + "tokens_seen": 2322071552 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029922965816080887, + "loss": 0.0658, + "theoretical_loss": 3.417001915790749, + "tokens_seen": 2322333696 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002991494142192265, + "loss": 0.0647, + "theoretical_loss": 3.416970955305078, + "tokens_seen": 2322595840 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029906917027764404, + "loss": 0.0677, + "theoretical_loss": 3.416939999291931, + "tokens_seen": 2322857984 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002989889263360616, + "loss": 0.0683, + "theoretical_loss": 3.416909047750157, + "tokens_seen": 2323120128 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.0003804284206125885, + "objective/train/docs_used": 845757, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2605044841766357, + "objective/train/original_loss": 1.2605046033859253, + "objective/train/theoretical_loss": 3.4168935736556754, + "objective/train/tokens_used": 2343711200, + "objective/train/value_avg": -0.00946044921875, + "objective/train/value_loss": 0.0002300405321875587, + "objective/train/value_max": -2.6881694793701172e-05, + "objective/train/value_min": -0.8173828125, + "objective/train/value_reward_corr": 0.7705075250052723, + "objective/train/value_std": 0.018768310546875, + "objective/train/weight_avg": 1.0004947185516357, + "objective/train/weighted_lm_loss": 1.2603367567062378, + "objective/train/weights_max": 1.7017693519592285, + "objective/train/weights_min": 0.7406313419342041, + "theoretical_loss": 3.4168935736556754, + "tokens_seen": 2323251200 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002989086823944792, + "loss": 0.0674, + "theoretical_loss": 3.4168781006786055, + "tokens_seen": 2323382272 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002988284384528968, + "loss": 0.0684, + "theoretical_loss": 3.4168471580761266, + "tokens_seen": 2323644416 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002987481945113144, + "loss": 0.0671, + "theoretical_loss": 3.416816219941571, + "tokens_seen": 2323906560 + }, + { + "epoch": 0.7, + "learning_rate": 0.000298667950569732, + "loss": 0.066, + "theoretical_loss": 3.4167852862737895, + "tokens_seen": 2324168704 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002985877066281496, + "loss": 0.0679, + "theoretical_loss": 3.4167543570716337, + "tokens_seen": 2324430848 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029850746268656717, + "loss": 0.0661, + "theoretical_loss": 3.416723432333956, + "tokens_seen": 2324692992 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002984272187449847, + "loss": 0.0661, + "theoretical_loss": 3.416692512059607, + "tokens_seen": 2324955136 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029834697480340234, + "loss": 0.065, + "theoretical_loss": 3.416661596247441, + "tokens_seen": 2325217280 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029826673086181995, + "loss": 0.0692, + "theoretical_loss": 3.41663068489631, + "tokens_seen": 2325479424 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002981864869202375, + "loss": 0.0656, + "theoretical_loss": 3.4165997780050685, + "tokens_seen": 2325741568 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002981062429786551, + "loss": 0.0675, + "theoretical_loss": 3.41656887557257, + "tokens_seen": 2326003712 + }, + { + "epoch": 0.7, + "learning_rate": 0.00029802599903707273, + "loss": 0.0687, + "theoretical_loss": 3.416537977597668, + "tokens_seen": 2326265856 + }, + { + "epoch": 0.7, + "objective/train/advantage_avg": 0.001136346603743732, + "objective/train/docs_used": 847012, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3613319396972656, + "objective/train/original_loss": 1.3613317012786865, + "objective/train/theoretical_loss": 3.4165070840792184, + "objective/train/tokens_used": 2346988000, + "objective/train/value_avg": -0.006256103515625, + "objective/train/value_loss": 0.00011094258661614731, + "objective/train/value_max": -2.9802322387695312e-05, + "objective/train/value_min": -0.837890625, + "objective/train/value_reward_corr": 0.7206140669800114, + "objective/train/value_std": 0.01192474365234375, + "objective/train/weight_avg": 1.0011875629425049, + "objective/train/weighted_lm_loss": 1.3630400896072388, + "objective/train/weights_max": 1.1782879829406738, + "objective/train/weights_min": 0.373849481344223, + "theoretical_loss": 3.4165070840792184, + "tokens_seen": 2326528000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029794575509549035, + "loss": 0.0674, + "theoretical_loss": 3.4165070840792184, + "tokens_seen": 2326528000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029786551115390785, + "loss": 0.0664, + "theoretical_loss": 3.4164761950160765, + "tokens_seen": 2326790144 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029778526721232546, + "loss": 0.066, + "theoretical_loss": 3.4164453104070973, + "tokens_seen": 2327052288 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002977050232707431, + "loss": 0.0644, + "theoretical_loss": 3.416414430251137, + "tokens_seen": 2327314432 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029762477932916063, + "loss": 0.0678, + "theoretical_loss": 3.4163835545470524, + "tokens_seen": 2327576576 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029754453538757825, + "loss": 0.0615, + "theoretical_loss": 3.4163526832937, + "tokens_seen": 2327838720 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029746429144599586, + "loss": 0.0672, + "theoretical_loss": 3.4163218164899374, + "tokens_seen": 2328100864 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029738404750441347, + "loss": 0.0663, + "theoretical_loss": 3.416290954134622, + "tokens_seen": 2328363008 + }, + { + "epoch": 0.71, + "learning_rate": 0.000297303803562831, + "loss": 0.0679, + "theoretical_loss": 3.4162600962266128, + "tokens_seen": 2328625152 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002972235596212486, + "loss": 0.0671, + "theoretical_loss": 3.4162292427647674, + "tokens_seen": 2328887296 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002971433156796662, + "loss": 0.0664, + "theoretical_loss": 3.4161983937479454, + "tokens_seen": 2329149440 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029706307173808376, + "loss": 0.0657, + "theoretical_loss": 3.4161675491750056, + "tokens_seen": 2329411584 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002969828277965014, + "loss": 0.0684, + "theoretical_loss": 3.416136709044809, + "tokens_seen": 2329673728 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.0002161753218388185, + "objective/train/docs_used": 848291, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2818440198898315, + "objective/train/original_loss": 1.281843900680542, + "objective/train/theoretical_loss": 3.416121290645383, + "objective/train/tokens_used": 2350264800, + "objective/train/value_avg": -0.007343292236328125, + "objective/train/value_loss": 0.00019503793737385422, + "objective/train/value_max": -6.812810897827148e-05, + "objective/train/value_min": -0.22900390625, + "objective/train/value_reward_corr": 0.672381420425753, + "objective/train/value_std": 0.0130462646484375, + "objective/train/weight_avg": 1.0003025531768799, + "objective/train/weighted_lm_loss": 1.2819976806640625, + "objective/train/weights_max": 1.2044645547866821, + "objective/train/weights_min": 0.3711864948272705, + "theoretical_loss": 3.416121290645383, + "tokens_seen": 2329804800 + }, + { + "epoch": 0.71, + "learning_rate": 0.000296902583854919, + "loss": 0.0651, + "theoretical_loss": 3.416105873356215, + "tokens_seen": 2329935872 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029682233991333654, + "loss": 0.0664, + "theoretical_loss": 3.4160750421080843, + "tokens_seen": 2330198016 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002967420959717541, + "loss": 0.0658, + "theoretical_loss": 3.4160442152992783, + "tokens_seen": 2330460160 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002966618520301717, + "loss": 0.0674, + "theoretical_loss": 3.4160133929286585, + "tokens_seen": 2330722304 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029658160808858933, + "loss": 0.067, + "theoretical_loss": 3.4159825749950867, + "tokens_seen": 2330984448 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002965013641470069, + "loss": 0.0683, + "theoretical_loss": 3.4159517614974257, + "tokens_seen": 2331246592 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002964211202054245, + "loss": 0.0668, + "theoretical_loss": 3.4159209524345373, + "tokens_seen": 2331508736 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002963408762638421, + "loss": 0.0668, + "theoretical_loss": 3.4158901478052863, + "tokens_seen": 2331770880 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029626063232225967, + "loss": 0.0691, + "theoretical_loss": 3.4158593476085346, + "tokens_seen": 2332033024 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002961803883806773, + "loss": 0.0664, + "theoretical_loss": 3.4158285518431475, + "tokens_seen": 2332295168 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029610014443909484, + "loss": 0.0689, + "theoretical_loss": 3.4157977605079894, + "tokens_seen": 2332557312 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029601990049751245, + "loss": 0.0632, + "theoretical_loss": 3.415766973601924, + "tokens_seen": 2332819456 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.000803662755060941, + "objective/train/docs_used": 849464, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3308948278427124, + "objective/train/original_loss": 1.3308947086334229, + "objective/train/theoretical_loss": 3.415736191123818, + "objective/train/tokens_used": 2353541600, + "objective/train/value_avg": -0.00688934326171875, + "objective/train/value_loss": 0.0001628411264391616, + "objective/train/value_max": -6.0617923736572266e-05, + "objective/train/value_min": -0.380859375, + "objective/train/value_reward_corr": 0.7531091940899427, + "objective/train/value_std": 0.0137786865234375, + "objective/train/weight_avg": 1.000876784324646, + "objective/train/weighted_lm_loss": 1.3320674896240234, + "objective/train/weights_max": 1.4635417461395264, + "objective/train/weights_min": 0.37232381105422974, + "theoretical_loss": 3.415736191123818, + "tokens_seen": 2333081600 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029593965655593, + "loss": 0.0665, + "theoretical_loss": 3.415736191123818, + "tokens_seen": 2333081600 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002958594126143476, + "loss": 0.0674, + "theoretical_loss": 3.4157054130725366, + "tokens_seen": 2333343744 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029577916867276524, + "loss": 0.0686, + "theoretical_loss": 3.415674639446946, + "tokens_seen": 2333605888 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002956989247311828, + "loss": 0.0675, + "theoretical_loss": 3.415643870245913, + "tokens_seen": 2333868032 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002956186807896004, + "loss": 0.0683, + "theoretical_loss": 3.4156131054683034, + "tokens_seen": 2334130176 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029553843684801797, + "loss": 0.0681, + "theoretical_loss": 3.415582345112986, + "tokens_seen": 2334392320 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002954581929064356, + "loss": 0.0716, + "theoretical_loss": 3.415551589178828, + "tokens_seen": 2334654464 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029537794896485314, + "loss": 0.0679, + "theoretical_loss": 3.415520837664698, + "tokens_seen": 2334916608 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029529770502327075, + "loss": 0.0658, + "theoretical_loss": 3.415490090569464, + "tokens_seen": 2335178752 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029521746108168836, + "loss": 0.0685, + "theoretical_loss": 3.415459347891996, + "tokens_seen": 2335440896 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002951372171401059, + "loss": 0.0657, + "theoretical_loss": 3.4154286096311623, + "tokens_seen": 2335703040 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029505697319852353, + "loss": 0.0645, + "theoretical_loss": 3.4153978757858336, + "tokens_seen": 2335965184 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029497672925694115, + "loss": 0.066, + "theoretical_loss": 3.41536714635488, + "tokens_seen": 2336227328 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.001232532667927444, + "objective/train/docs_used": 850768, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.270790457725525, + "objective/train/original_loss": 1.2707905769348145, + "objective/train/theoretical_loss": 3.415351783294441, + "objective/train/tokens_used": 2356818400, + "objective/train/value_avg": -0.005054473876953125, + "objective/train/value_loss": 0.0001378721499349922, + "objective/train/value_max": -2.586841583251953e-05, + "objective/train/value_min": -0.1971435546875, + "objective/train/value_reward_corr": 0.6732705521244904, + "objective/train/value_std": 0.0104217529296875, + "objective/train/weight_avg": 1.0012966394424438, + "objective/train/weighted_lm_loss": 1.272763967514038, + "objective/train/weights_max": 1.1968432664871216, + "objective/train/weights_min": 0.36855649948120117, + "theoretical_loss": 3.415351783294441, + "tokens_seen": 2336358400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029489648531535865, + "loss": 0.0682, + "theoretical_loss": 3.4153364213371726, + "tokens_seen": 2336489472 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029481624137377626, + "loss": 0.0655, + "theoretical_loss": 3.415305700731582, + "tokens_seen": 2336751616 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002947359974321939, + "loss": 0.0683, + "theoretical_loss": 3.41527498453698, + "tokens_seen": 2337013760 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002946557534906115, + "loss": 0.0684, + "theoretical_loss": 3.4152442727522385, + "tokens_seen": 2337275904 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029457550954902905, + "loss": 0.0696, + "theoretical_loss": 3.41521356537623, + "tokens_seen": 2337538048 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029449526560744666, + "loss": 0.0668, + "theoretical_loss": 3.415182862407827, + "tokens_seen": 2337800192 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029441502166586427, + "loss": 0.0661, + "theoretical_loss": 3.4151521638459035, + "tokens_seen": 2338062336 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002943347777242818, + "loss": 0.0697, + "theoretical_loss": 3.4151214696893324, + "tokens_seen": 2338324480 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002942545337826994, + "loss": 0.0672, + "theoretical_loss": 3.4150907799369876, + "tokens_seen": 2338586624 + }, + { + "epoch": 0.71, + "learning_rate": 0.000294174289841117, + "loss": 0.0676, + "theoretical_loss": 3.4150600945877443, + "tokens_seen": 2338848768 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002940940458995346, + "loss": 0.069, + "theoretical_loss": 3.415029413640477, + "tokens_seen": 2339110912 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029401380195795217, + "loss": 0.0671, + "theoretical_loss": 3.414998737094061, + "tokens_seen": 2339373056 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": -0.00019418902229517698, + "objective/train/docs_used": 851815, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.414489984512329, + "objective/train/original_loss": 1.414489984512329, + "objective/train/theoretical_loss": 3.414968064947372, + "objective/train/tokens_used": 2360095200, + "objective/train/value_avg": -0.00991058349609375, + "objective/train/value_loss": 0.0003953047562390566, + "objective/train/value_max": -0.00011324882507324219, + "objective/train/value_min": -0.529296875, + "objective/train/value_reward_corr": 0.6825710898331492, + "objective/train/value_std": 0.0172119140625, + "objective/train/weight_avg": 0.9999842047691345, + "objective/train/weighted_lm_loss": 1.4128410816192627, + "objective/train/weights_max": 1.31961989402771, + "objective/train/weights_min": 0.37487199902534485, + "theoretical_loss": 3.414968064947372, + "tokens_seen": 2339635200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002939335580163698, + "loss": 0.0673, + "theoretical_loss": 3.414968064947372, + "tokens_seen": 2339635200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002938533140747874, + "loss": 0.0661, + "theoretical_loss": 3.4149373971992856, + "tokens_seen": 2339897344 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002937730701332049, + "loss": 0.0678, + "theoretical_loss": 3.4149067338486794, + "tokens_seen": 2340159488 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002936928261916225, + "loss": 0.0665, + "theoretical_loss": 3.4148760748944293, + "tokens_seen": 2340421632 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002936125822500401, + "loss": 0.0661, + "theoretical_loss": 3.4148454203354133, + "tokens_seen": 2340683776 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029353233830845774, + "loss": 0.0673, + "theoretical_loss": 3.4148147701705094, + "tokens_seen": 2340945920 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002934520943668753, + "loss": 0.0669, + "theoretical_loss": 3.414784124398595, + "tokens_seen": 2341208064 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002933718504252929, + "loss": 0.0689, + "theoretical_loss": 3.4147534830185493, + "tokens_seen": 2341470208 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002932916064837105, + "loss": 0.0674, + "theoretical_loss": 3.414722846029251, + "tokens_seen": 2341732352 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002932113625421281, + "loss": 0.0695, + "theoretical_loss": 3.4146922134295794, + "tokens_seen": 2341994496 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029313111860054564, + "loss": 0.0685, + "theoretical_loss": 3.414661585218415, + "tokens_seen": 2342256640 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029305087465896325, + "loss": 0.0682, + "theoretical_loss": 3.4146309613946366, + "tokens_seen": 2342518784 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002929706307173808, + "loss": 0.0677, + "theoretical_loss": 3.4146003419571267, + "tokens_seen": 2342780928 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.00029292894760146737, + "objective/train/docs_used": 852909, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2010201215744019, + "objective/train/original_loss": 1.2010200023651123, + "objective/train/theoretical_loss": 3.4145850338828723, + "objective/train/tokens_used": 2363372000, + "objective/train/value_avg": -0.007549285888671875, + "objective/train/value_loss": 0.0002901647530961782, + "objective/train/value_max": -1.5437602996826172e-05, + "objective/train/value_min": -0.414306640625, + "objective/train/value_reward_corr": 0.6223776772873889, + "objective/train/value_std": 0.01342010498046875, + "objective/train/weight_avg": 1.000418782234192, + "objective/train/weighted_lm_loss": 1.2023299932479858, + "objective/train/weights_max": 1.4874529838562012, + "objective/train/weights_min": 0.40334662795066833, + "theoretical_loss": 3.4145850338828723, + "tokens_seen": 2342912000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002928903867757984, + "loss": 0.0682, + "theoretical_loss": 3.414569726904765, + "tokens_seen": 2343043072 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029281014283421604, + "loss": 0.0673, + "theoretical_loss": 3.414539116236434, + "tokens_seen": 2343305216 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029272989889263365, + "loss": 0.066, + "theoretical_loss": 3.4145085099510144, + "tokens_seen": 2343567360 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002926496549510512, + "loss": 0.0674, + "theoretical_loss": 3.4144779080473895, + "tokens_seen": 2343829504 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029256941100946877, + "loss": 0.0649, + "theoretical_loss": 3.4144473105244413, + "tokens_seen": 2344091648 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002924891670678864, + "loss": 0.0668, + "theoretical_loss": 3.4144167173810533, + "tokens_seen": 2344353792 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029240892312630394, + "loss": 0.068, + "theoretical_loss": 3.414386128616109, + "tokens_seen": 2344615936 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029232867918472155, + "loss": 0.0685, + "theoretical_loss": 3.414355544228492, + "tokens_seen": 2344878080 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029224843524313916, + "loss": 0.0697, + "theoretical_loss": 3.414324964217087, + "tokens_seen": 2345140224 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002921681913015568, + "loss": 0.0658, + "theoretical_loss": 3.414294388580779, + "tokens_seen": 2345402368 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029208794735997433, + "loss": 0.0664, + "theoretical_loss": 3.4142638173184525, + "tokens_seen": 2345664512 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002920077034183919, + "loss": 0.066, + "theoretical_loss": 3.4142332504289934, + "tokens_seen": 2345926656 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.0005077946116216481, + "objective/train/docs_used": 854185, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4195713996887207, + "objective/train/original_loss": 1.4195713996887207, + "objective/train/theoretical_loss": 3.414202687911288, + "objective/train/tokens_used": 2366648800, + "objective/train/value_avg": -0.0115814208984375, + "objective/train/value_loss": 0.0005665815551765263, + "objective/train/value_max": -5.02467155456543e-05, + "objective/train/value_min": -0.97509765625, + "objective/train/value_reward_corr": 0.7525287514571477, + "objective/train/value_std": 0.0290374755859375, + "objective/train/weight_avg": 1.0007684230804443, + "objective/train/weighted_lm_loss": 1.4191945791244507, + "objective/train/weights_max": 1.751630187034607, + "objective/train/weights_min": 0.3687090575695038, + "theoretical_loss": 3.414202687911288, + "tokens_seen": 2346188800 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002919274594768095, + "loss": 0.0681, + "theoretical_loss": 3.414202687911288, + "tokens_seen": 2346188800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029184721553522706, + "loss": 0.0672, + "theoretical_loss": 3.414172129764222, + "tokens_seen": 2346450944 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002917669715936447, + "loss": 0.0681, + "theoretical_loss": 3.414141575986682, + "tokens_seen": 2346713088 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002916867276520623, + "loss": 0.0669, + "theoretical_loss": 3.4141110265775563, + "tokens_seen": 2346975232 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002916064837104799, + "loss": 0.0664, + "theoretical_loss": 3.414080481535732, + "tokens_seen": 2347237376 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029152623976889746, + "loss": 0.0644, + "theoretical_loss": 3.414049940860097, + "tokens_seen": 2347499520 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029144599582731507, + "loss": 0.0666, + "theoretical_loss": 3.4140194045495393, + "tokens_seen": 2347761664 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029136575188573263, + "loss": 0.0661, + "theoretical_loss": 3.413988872602949, + "tokens_seen": 2348023808 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002912855079441502, + "loss": 0.0659, + "theoretical_loss": 3.4139583450192137, + "tokens_seen": 2348285952 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002912052640025678, + "loss": 0.0655, + "theoretical_loss": 3.413927821797224, + "tokens_seen": 2348548096 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002911250200609854, + "loss": 0.0681, + "theoretical_loss": 3.41389730293587, + "tokens_seen": 2348810240 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029104477611940297, + "loss": 0.065, + "theoretical_loss": 3.413866788434042, + "tokens_seen": 2349072384 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002909645321778206, + "loss": 0.0678, + "theoretical_loss": 3.413836278290631, + "tokens_seen": 2349334528 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.0006469209911301732, + "objective/train/docs_used": 855374, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4136708974838257, + "objective/train/original_loss": 1.4136707782745361, + "objective/train/theoretical_loss": 3.413821024852985, + "objective/train/tokens_used": 2369925600, + "objective/train/value_avg": -0.0093231201171875, + "objective/train/value_loss": 0.0005805552937090397, + "objective/train/value_max": -1.722574234008789e-05, + "objective/train/value_min": -0.9482421875, + "objective/train/value_reward_corr": 0.6200741516231335, + "objective/train/value_std": 0.0212554931640625, + "objective/train/weight_avg": 1.0009031295776367, + "objective/train/weighted_lm_loss": 1.4141875505447388, + "objective/train/weights_max": 1.8178532123565674, + "objective/train/weights_min": 0.3728525638580322, + "theoretical_loss": 3.413821024852985, + "tokens_seen": 2349465600 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002908842882362382, + "loss": 0.0673, + "theoretical_loss": 3.4138057725045274, + "tokens_seen": 2349596672 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029080404429465576, + "loss": 0.0698, + "theoretical_loss": 3.413775271074624, + "tokens_seen": 2349858816 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002907238003530733, + "loss": 0.0671, + "theoretical_loss": 3.413744773999812, + "tokens_seen": 2350120960 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002906435564114909, + "loss": 0.0654, + "theoretical_loss": 3.413714281278985, + "tokens_seen": 2350383104 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029056331246990854, + "loss": 0.0674, + "theoretical_loss": 3.413683792911035, + "tokens_seen": 2350645248 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002904830685283261, + "loss": 0.067, + "theoretical_loss": 3.413653308894855, + "tokens_seen": 2350907392 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002904028245867437, + "loss": 0.0659, + "theoretical_loss": 3.4136228292293396, + "tokens_seen": 2351169536 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002903225806451613, + "loss": 0.0671, + "theoretical_loss": 3.4135923539133826, + "tokens_seen": 2351431680 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029024233670357894, + "loss": 0.067, + "theoretical_loss": 3.413561882945878, + "tokens_seen": 2351693824 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029016209276199644, + "loss": 0.0658, + "theoretical_loss": 3.4135314163257213, + "tokens_seen": 2351955968 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029008184882041405, + "loss": 0.0674, + "theoretical_loss": 3.4135009540518073, + "tokens_seen": 2352218112 + }, + { + "epoch": 0.71, + "learning_rate": 0.00029000160487883166, + "loss": 0.0657, + "theoretical_loss": 3.4134704961230327, + "tokens_seen": 2352480256 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": 0.0007358468137681484, + "objective/train/docs_used": 856640, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4628479480743408, + "objective/train/original_loss": 1.4628478288650513, + "objective/train/theoretical_loss": 3.4134400425382925, + "objective/train/tokens_used": 2373202400, + "objective/train/value_avg": -0.010711669921875, + "objective/train/value_loss": 0.00020279559248592705, + "objective/train/value_max": -1.7762184143066406e-05, + "objective/train/value_min": -0.291259765625, + "objective/train/value_reward_corr": 0.7869439892749879, + "objective/train/value_std": 0.0175018310546875, + "objective/train/weight_avg": 1.0008323192596436, + "objective/train/weighted_lm_loss": 1.4642149209976196, + "objective/train/weights_max": 1.1599798202514648, + "objective/train/weights_min": 0.3715236186981201, + "theoretical_loss": 3.4134400425382925, + "tokens_seen": 2352742400 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002899213609372492, + "loss": 0.0672, + "theoretical_loss": 3.4134400425382925, + "tokens_seen": 2352742400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028984111699566684, + "loss": 0.0668, + "theoretical_loss": 3.4134095932964836, + "tokens_seen": 2353004544 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028976087305408445, + "loss": 0.0681, + "theoretical_loss": 3.413379148396503, + "tokens_seen": 2353266688 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028968062911250206, + "loss": 0.0655, + "theoretical_loss": 3.413348707837248, + "tokens_seen": 2353528832 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028960038517091957, + "loss": 0.0667, + "theoretical_loss": 3.4133182716176167, + "tokens_seen": 2353790976 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002895201412293372, + "loss": 0.0651, + "theoretical_loss": 3.4132878397365065, + "tokens_seen": 2354053120 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002894398972877548, + "loss": 0.0665, + "theoretical_loss": 3.4132574121928165, + "tokens_seen": 2354315264 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028935965334617235, + "loss": 0.0661, + "theoretical_loss": 3.413226988985446, + "tokens_seen": 2354577408 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028927940940458996, + "loss": 0.065, + "theoretical_loss": 3.4131965701132927, + "tokens_seen": 2354839552 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002891991654630076, + "loss": 0.0641, + "theoretical_loss": 3.4131661555752584, + "tokens_seen": 2355101696 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002891189215214252, + "loss": 0.0663, + "theoretical_loss": 3.413135745370242, + "tokens_seen": 2355363840 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002890386775798427, + "loss": 0.0676, + "theoretical_loss": 3.4131053394971445, + "tokens_seen": 2355625984 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002889584336382603, + "loss": 0.0668, + "theoretical_loss": 3.4130749379548666, + "tokens_seen": 2355888128 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": -0.0010917705949395895, + "objective/train/docs_used": 857700, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3285942077636719, + "objective/train/original_loss": 1.3285940885543823, + "objective/train/theoretical_loss": 3.4130597388074415, + "objective/train/tokens_used": 2376479200, + "objective/train/value_avg": -0.014007568359375, + "objective/train/value_loss": 0.00037045456701889634, + "objective/train/value_max": -5.739927291870117e-05, + "objective/train/value_min": -0.51806640625, + "objective/train/value_reward_corr": 0.8645235383798338, + "objective/train/value_std": 0.0283355712890625, + "objective/train/weight_avg": 0.999081552028656, + "objective/train/weighted_lm_loss": 1.326906442642212, + "objective/train/weights_max": 1.4649479389190674, + "objective/train/weights_min": 0.37004411220550537, + "theoretical_loss": 3.4130597388074415, + "tokens_seen": 2356019200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002888781896966779, + "loss": 0.067, + "theoretical_loss": 3.41304454074231, + "tokens_seen": 2356150272 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002887979457550955, + "loss": 0.0664, + "theoretical_loss": 3.4130141478583758, + "tokens_seen": 2356412416 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002887177018135131, + "loss": 0.066, + "theoretical_loss": 3.4129837593019667, + "tokens_seen": 2356674560 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002886374578719307, + "loss": 0.0672, + "theoretical_loss": 3.412953375071985, + "tokens_seen": 2356936704 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028855721393034826, + "loss": 0.0662, + "theoretical_loss": 3.412922995167334, + "tokens_seen": 2357198848 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028847696998876587, + "loss": 0.0662, + "theoretical_loss": 3.4128926195869163, + "tokens_seen": 2357460992 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028839672604718343, + "loss": 0.0634, + "theoretical_loss": 3.4128622483296365, + "tokens_seen": 2357723136 + }, + { + "epoch": 0.71, + "learning_rate": 0.00028831648210560104, + "loss": 0.0674, + "theoretical_loss": 3.412831881394399, + "tokens_seen": 2357985280 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002882362381640186, + "loss": 0.0669, + "theoretical_loss": 3.4128015187801073, + "tokens_seen": 2358247424 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002881559942224362, + "loss": 0.0649, + "theoretical_loss": 3.4127711604856668, + "tokens_seen": 2358509568 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002880757502808538, + "loss": 0.0665, + "theoretical_loss": 3.4127408065099827, + "tokens_seen": 2358771712 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002879955063392714, + "loss": 0.0666, + "theoretical_loss": 3.4127104568519613, + "tokens_seen": 2359033856 + }, + { + "epoch": 0.71, + "objective/train/advantage_avg": -9.175229934044182e-06, + "objective/train/docs_used": 858941, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2591488361358643, + "objective/train/original_loss": 1.2591485977172852, + "objective/train/theoretical_loss": 3.412680111510508, + "objective/train/tokens_used": 2379756000, + "objective/train/value_avg": -0.0064544677734375, + "objective/train/value_loss": 0.00019788608187809587, + "objective/train/value_max": -3.510713577270508e-05, + "objective/train/value_min": -0.3271484375, + "objective/train/value_reward_corr": 0.6843129270556736, + "objective/train/value_std": 0.0122528076171875, + "objective/train/weight_avg": 1.0000793933868408, + "objective/train/weighted_lm_loss": 1.2583329677581787, + "objective/train/weights_max": 1.2182163000106812, + "objective/train/weights_min": 0.3679918348789215, + "theoretical_loss": 3.412680111510508, + "tokens_seen": 2359296000 + }, + { + "epoch": 0.71, + "learning_rate": 0.000287915262397689, + "loss": 0.0645, + "theoretical_loss": 3.412680111510508, + "tokens_seen": 2359296000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028783501845610655, + "loss": 0.0677, + "theoretical_loss": 3.41264977048453, + "tokens_seen": 2359558144 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028775477451452417, + "loss": 0.0692, + "theoretical_loss": 3.412619433772934, + "tokens_seen": 2359820288 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002876745305729417, + "loss": 0.0677, + "theoretical_loss": 3.412589101374627, + "tokens_seen": 2360082432 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028759428663135934, + "loss": 0.0664, + "theoretical_loss": 3.4125587732885174, + "tokens_seen": 2360344576 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028751404268977695, + "loss": 0.0663, + "theoretical_loss": 3.4125284495135126, + "tokens_seen": 2360606720 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002874337987481945, + "loss": 0.0668, + "theoretical_loss": 3.412498130048522, + "tokens_seen": 2360868864 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002873535548066121, + "loss": 0.0681, + "theoretical_loss": 3.412467814892454, + "tokens_seen": 2361131008 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002872733108650297, + "loss": 0.066, + "theoretical_loss": 3.4124375040442176, + "tokens_seen": 2361393152 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002871930669234473, + "loss": 0.0671, + "theoretical_loss": 3.4124071975027235, + "tokens_seen": 2361655296 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028711282298186485, + "loss": 0.0667, + "theoretical_loss": 3.4123768952668807, + "tokens_seen": 2361917440 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028703257904028246, + "loss": 0.0678, + "theoretical_loss": 3.412346597335601, + "tokens_seen": 2362179584 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002869523350987001, + "loss": 0.0684, + "theoretical_loss": 3.4123163037077937, + "tokens_seen": 2362441728 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.0010815730784088373, + "objective/train/docs_used": 860261, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.416516900062561, + "objective/train/original_loss": 1.4165170192718506, + "objective/train/theoretical_loss": 3.4123011585073524, + "objective/train/tokens_used": 2383032800, + "objective/train/value_avg": -0.005901336669921875, + "objective/train/value_loss": 8.771286957198754e-05, + "objective/train/value_max": -2.9802322387695312e-05, + "objective/train/value_min": -0.2247314453125, + "objective/train/value_reward_corr": 0.7080153186998583, + "objective/train/value_std": 0.01003265380859375, + "objective/train/weight_avg": 1.0011248588562012, + "objective/train/weighted_lm_loss": 1.4178658723831177, + "objective/train/weights_max": 1.1204938888549805, + "objective/train/weights_min": 0.7841757535934448, + "theoretical_loss": 3.4123011585073524, + "tokens_seen": 2362572800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028687209115711763, + "loss": 0.0659, + "theoretical_loss": 3.4122860143823717, + "tokens_seen": 2362703872 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028679184721553525, + "loss": 0.0649, + "theoretical_loss": 3.4122557293582454, + "tokens_seen": 2362966016 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028671160327395286, + "loss": 0.0662, + "theoretical_loss": 3.412225448634328, + "tokens_seen": 2363228160 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028663135933237036, + "loss": 0.0658, + "theoretical_loss": 3.4121951722095316, + "tokens_seen": 2363490304 + }, + { + "epoch": 0.72, + "learning_rate": 0.000286551115390788, + "loss": 0.0685, + "theoretical_loss": 3.412164900082769, + "tokens_seen": 2363752448 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002864708714492056, + "loss": 0.065, + "theoretical_loss": 3.412134632252953, + "tokens_seen": 2364014592 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002863906275076232, + "loss": 0.0641, + "theoretical_loss": 3.4121043687189982, + "tokens_seen": 2364276736 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028631038356604076, + "loss": 0.0693, + "theoretical_loss": 3.4120741094798186, + "tokens_seen": 2364538880 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002862301396244584, + "loss": 0.0657, + "theoretical_loss": 3.412043854534328, + "tokens_seen": 2364801024 + }, + { + "epoch": 0.72, + "learning_rate": 0.000286149895682876, + "loss": 0.0655, + "theoretical_loss": 3.4120136038814417, + "tokens_seen": 2365063168 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002860696517412935, + "loss": 0.0674, + "theoretical_loss": 3.411983357520075, + "tokens_seen": 2365325312 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002859894077997111, + "loss": 0.0659, + "theoretical_loss": 3.4119531154491436, + "tokens_seen": 2365587456 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.0008249745587818325, + "objective/train/docs_used": 861421, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1259411573410034, + "objective/train/original_loss": 1.1259410381317139, + "objective/train/theoretical_loss": 3.411922877667563, + "objective/train/tokens_used": 2386309600, + "objective/train/value_avg": -0.00743865966796875, + "objective/train/value_loss": 0.00017807947006076574, + "objective/train/value_max": -4.869699478149414e-05, + "objective/train/value_min": -0.32421875, + "objective/train/value_reward_corr": 0.6612214544166373, + "objective/train/value_std": 0.01175689697265625, + "objective/train/weight_avg": 1.0009032487869263, + "objective/train/weighted_lm_loss": 1.1269689798355103, + "objective/train/weights_max": 1.2590364217758179, + "objective/train/weights_min": 0.3725539743900299, + "theoretical_loss": 3.411922877667563, + "tokens_seen": 2365849600 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002859091638581287, + "loss": 0.0651, + "theoretical_loss": 3.411922877667563, + "tokens_seen": 2365849600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028582891991654633, + "loss": 0.0663, + "theoretical_loss": 3.4118926441742508, + "tokens_seen": 2366111744 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002857486759749639, + "loss": 0.0655, + "theoretical_loss": 3.4118624149681227, + "tokens_seen": 2366373888 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002856684320333815, + "loss": 0.0649, + "theoretical_loss": 3.4118321900480963, + "tokens_seen": 2366636032 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002855881880917991, + "loss": 0.0658, + "theoretical_loss": 3.4118019694130894, + "tokens_seen": 2366898176 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028550794415021667, + "loss": 0.0692, + "theoretical_loss": 3.41177175306202, + "tokens_seen": 2367160320 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028542770020863423, + "loss": 0.0669, + "theoretical_loss": 3.4117415409938068, + "tokens_seen": 2367422464 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028534745626705184, + "loss": 0.0667, + "theoretical_loss": 3.411711333207368, + "tokens_seen": 2367684608 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028526721232546945, + "loss": 0.0657, + "theoretical_loss": 3.411681129701623, + "tokens_seen": 2367946752 + }, + { + "epoch": 0.72, + "learning_rate": 0.000285186968383887, + "loss": 0.0648, + "theoretical_loss": 3.411650930475491, + "tokens_seen": 2368208896 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002851067244423046, + "loss": 0.066, + "theoretical_loss": 3.4116207355278934, + "tokens_seen": 2368471040 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028502648050072224, + "loss": 0.0668, + "theoretical_loss": 3.411590544857749, + "tokens_seen": 2368733184 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002849462365591398, + "loss": 0.0675, + "theoretical_loss": 3.4115603584639795, + "tokens_seen": 2368995328 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.0003487260837573558, + "objective/train/docs_used": 862685, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2241593599319458, + "objective/train/original_loss": 1.2241593599319458, + "objective/train/theoretical_loss": 3.411545266870398, + "objective/train/tokens_used": 2389586400, + "objective/train/value_avg": -0.00579833984375, + "objective/train/value_loss": 0.00012634368613362312, + "objective/train/value_max": -2.9325485229492188e-05, + "objective/train/value_min": -0.55712890625, + "objective/train/value_reward_corr": 0.6420565850004247, + "objective/train/value_std": 0.00954437255859375, + "objective/train/weight_avg": 1.0004075765609741, + "objective/train/weighted_lm_loss": 1.2246220111846924, + "objective/train/weights_max": 1.3289666175842285, + "objective/train/weights_min": 0.371211975812912, + "theoretical_loss": 3.411545266870398, + "tokens_seen": 2369126400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028486599261755735, + "loss": 0.065, + "theoretical_loss": 3.4115301763455057, + "tokens_seen": 2369257472 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028478574867597497, + "loss": 0.0666, + "theoretical_loss": 3.411499998501249, + "tokens_seen": 2369519616 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002847055047343925, + "loss": 0.0668, + "theoretical_loss": 3.4114698249301316, + "tokens_seen": 2369781760 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028462526079281014, + "loss": 0.0657, + "theoretical_loss": 3.411439655631076, + "tokens_seen": 2370043904 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028454501685122775, + "loss": 0.0665, + "theoretical_loss": 3.4114094906030052, + "tokens_seen": 2370306048 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028446477290964536, + "loss": 0.0672, + "theoretical_loss": 3.4113793298448414, + "tokens_seen": 2370568192 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002843845289680629, + "loss": 0.0681, + "theoretical_loss": 3.411349173355508, + "tokens_seen": 2370830336 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002843042850264805, + "loss": 0.0691, + "theoretical_loss": 3.4113190211339304, + "tokens_seen": 2371092480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002842240410848981, + "loss": 0.0677, + "theoretical_loss": 3.4112888731790316, + "tokens_seen": 2371354624 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028414379714331565, + "loss": 0.0662, + "theoretical_loss": 3.4112587294897367, + "tokens_seen": 2371616768 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028406355320173326, + "loss": 0.0666, + "theoretical_loss": 3.4112285900649706, + "tokens_seen": 2371878912 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002839833092601509, + "loss": 0.0649, + "theoretical_loss": 3.411198454903659, + "tokens_seen": 2372141056 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.0007301963050849736, + "objective/train/docs_used": 863878, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3005666732788086, + "objective/train/original_loss": 1.3005667924880981, + "objective/train/theoretical_loss": 3.4111683240047275, + "objective/train/tokens_used": 2392863200, + "objective/train/value_avg": -0.00737762451171875, + "objective/train/value_loss": 0.0002043836284428835, + "objective/train/value_max": -3.218650817871094e-05, + "objective/train/value_min": -0.67529296875, + "objective/train/value_reward_corr": 0.7171774505277242, + "objective/train/value_std": 0.0146636962890625, + "objective/train/weight_avg": 1.0008224248886108, + "objective/train/weighted_lm_loss": 1.3015154600143433, + "objective/train/weights_max": 1.142139196395874, + "objective/train/weights_min": 0.37391793727874756, + "theoretical_loss": 3.4111683240047275, + "tokens_seen": 2372403200 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002839030653185685, + "loss": 0.0669, + "theoretical_loss": 3.4111683240047275, + "tokens_seen": 2372403200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028382282137698605, + "loss": 0.0626, + "theoretical_loss": 3.4111381973671024, + "tokens_seen": 2372665344 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028374257743540366, + "loss": 0.0652, + "theoretical_loss": 3.4111080749897105, + "tokens_seen": 2372927488 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002836623334938212, + "loss": 0.067, + "theoretical_loss": 3.4110779568714786, + "tokens_seen": 2373189632 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002835820895522388, + "loss": 0.0672, + "theoretical_loss": 3.4110478430113345, + "tokens_seen": 2373451776 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002835018456106564, + "loss": 0.0641, + "theoretical_loss": 3.4110177334082055, + "tokens_seen": 2373713920 + }, + { + "epoch": 0.72, + "learning_rate": 0.000283421601669074, + "loss": 0.0683, + "theoretical_loss": 3.4109876280610196, + "tokens_seen": 2373976064 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002833413577274916, + "loss": 0.0626, + "theoretical_loss": 3.4109575269687062, + "tokens_seen": 2374238208 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028326111378590917, + "loss": 0.0681, + "theoretical_loss": 3.4109274301301937, + "tokens_seen": 2374500352 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002831808698443268, + "loss": 0.0647, + "theoretical_loss": 3.410897337544412, + "tokens_seen": 2374762496 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028310062590274434, + "loss": 0.0645, + "theoretical_loss": 3.41086724921029, + "tokens_seen": 2375024640 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002830203819611619, + "loss": 0.0669, + "theoretical_loss": 3.410837165126758, + "tokens_seen": 2375286784 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002829401380195795, + "loss": 0.0678, + "theoretical_loss": 3.4108070852927472, + "tokens_seen": 2375548928 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 6.751207547495142e-05, + "objective/train/docs_used": 865150, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3704978227615356, + "objective/train/original_loss": 1.370497703552246, + "objective/train/theoretical_loss": 3.410792046968978, + "objective/train/tokens_used": 2396140000, + "objective/train/value_avg": -0.00868988037109375, + "objective/train/value_loss": 0.00020805127860512584, + "objective/train/value_max": -3.7610530853271484e-05, + "objective/train/value_min": -0.970703125, + "objective/train/value_reward_corr": 0.7735973443730089, + "objective/train/value_std": 0.016876220703125, + "objective/train/weight_avg": 1.0001626014709473, + "objective/train/weighted_lm_loss": 1.3697094917297363, + "objective/train/weights_max": 1.2211940288543701, + "objective/train/weights_min": 0.3830130398273468, + "theoretical_loss": 3.410792046968978, + "tokens_seen": 2375680000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028285989407799713, + "loss": 0.0657, + "theoretical_loss": 3.4107770097071874, + "tokens_seen": 2375811072 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002827796501364147, + "loss": 0.0651, + "theoretical_loss": 3.410746938369011, + "tokens_seen": 2376073216 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002826994061948323, + "loss": 0.0681, + "theoretical_loss": 3.410716871277149, + "tokens_seen": 2376335360 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002826191622532499, + "loss": 0.065, + "theoretical_loss": 3.4106868084305337, + "tokens_seen": 2376597504 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028253891831166747, + "loss": 0.0676, + "theoretical_loss": 3.4106567498280973, + "tokens_seen": 2376859648 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028245867437008503, + "loss": 0.0672, + "theoretical_loss": 3.410626695468773, + "tokens_seen": 2377121792 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028237843042850264, + "loss": 0.0653, + "theoretical_loss": 3.410596645351494, + "tokens_seen": 2377383936 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028229818648692025, + "loss": 0.065, + "theoretical_loss": 3.4105665994751933, + "tokens_seen": 2377646080 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002822179425453378, + "loss": 0.0643, + "theoretical_loss": 3.4105365578388054, + "tokens_seen": 2377908224 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002821376986037554, + "loss": 0.0673, + "theoretical_loss": 3.410506520441265, + "tokens_seen": 2378170368 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028205745466217304, + "loss": 0.0662, + "theoretical_loss": 3.410476487281506, + "tokens_seen": 2378432512 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028197721072059065, + "loss": 0.065, + "theoretical_loss": 3.410446458358464, + "tokens_seen": 2378694656 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.0003571182896848768, + "objective/train/docs_used": 866449, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.122450828552246, + "objective/train/original_loss": 1.122450828552246, + "objective/train/theoretical_loss": 3.4104164336710747, + "objective/train/tokens_used": 2399416800, + "objective/train/value_avg": -0.00919342041015625, + "objective/train/value_loss": 0.00014748242392670363, + "objective/train/value_max": -5.346536636352539e-05, + "objective/train/value_min": -0.291015625, + "objective/train/value_reward_corr": 0.7777066932130247, + "objective/train/value_std": 0.015655517578125, + "objective/train/weight_avg": 1.0004256963729858, + "objective/train/weighted_lm_loss": 1.1227384805679321, + "objective/train/weights_max": 1.2215328216552734, + "objective/train/weights_min": 0.40386396646499634, + "theoretical_loss": 3.4104164336710747, + "tokens_seen": 2378956800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028189696677900815, + "loss": 0.065, + "theoretical_loss": 3.4104164336710747, + "tokens_seen": 2378956800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028181672283742577, + "loss": 0.0648, + "theoretical_loss": 3.4103864132182733, + "tokens_seen": 2379218944 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002817364788958434, + "loss": 0.0667, + "theoretical_loss": 3.410356396998997, + "tokens_seen": 2379481088 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028165623495426094, + "loss": 0.0703, + "theoretical_loss": 3.410326385012182, + "tokens_seen": 2379743232 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028157599101267855, + "loss": 0.0667, + "theoretical_loss": 3.4102963772567656, + "tokens_seen": 2380005376 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028149574707109616, + "loss": 0.0664, + "theoretical_loss": 3.4102663737316847, + "tokens_seen": 2380267520 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002814155031295138, + "loss": 0.065, + "theoretical_loss": 3.410236374435878, + "tokens_seen": 2380529664 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002813352591879313, + "loss": 0.0653, + "theoretical_loss": 3.410206379368283, + "tokens_seen": 2380791808 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002812550152463489, + "loss": 0.066, + "theoretical_loss": 3.4101763885278387, + "tokens_seen": 2381053952 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002811747713047665, + "loss": 0.0675, + "theoretical_loss": 3.410146401913484, + "tokens_seen": 2381316096 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028109452736318406, + "loss": 0.0654, + "theoretical_loss": 3.410116419524158, + "tokens_seen": 2381578240 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002810142834216017, + "loss": 0.0659, + "theoretical_loss": 3.4100864413588003, + "tokens_seen": 2381840384 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002809340394800193, + "loss": 0.0652, + "theoretical_loss": 3.410056467416352, + "tokens_seen": 2382102528 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": -0.000377790245693177, + "objective/train/docs_used": 867590, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3330551385879517, + "objective/train/original_loss": 1.3330551385879517, + "objective/train/theoretical_loss": 3.410041482028387, + "objective/train/tokens_used": 2402693600, + "objective/train/value_avg": -0.01041412353515625, + "objective/train/value_loss": 0.00034967754618264735, + "objective/train/value_max": -1.3828277587890625e-05, + "objective/train/value_min": -0.447998046875, + "objective/train/value_reward_corr": 0.7093849786916704, + "objective/train/value_std": 0.019927978515625, + "objective/train/weight_avg": 0.9997868537902832, + "objective/train/weighted_lm_loss": 1.3321644067764282, + "objective/train/weights_max": 1.4849282503128052, + "objective/train/weights_min": 0.376959890127182, + "theoretical_loss": 3.410041482028387, + "tokens_seen": 2382233600 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002808537955384369, + "loss": 0.067, + "theoretical_loss": 3.4100264976957524, + "tokens_seen": 2382364672 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028077355159685446, + "loss": 0.0665, + "theoretical_loss": 3.4099965321959433, + "tokens_seen": 2382626816 + }, + { + "epoch": 0.72, + "learning_rate": 0.000280693307655272, + "loss": 0.0656, + "theoretical_loss": 3.4099665709158655, + "tokens_seen": 2382888960 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028061306371368963, + "loss": 0.0659, + "theoretical_loss": 3.409936613854461, + "tokens_seen": 2383151104 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002805328197721072, + "loss": 0.0675, + "theoretical_loss": 3.4099066610106714, + "tokens_seen": 2383413248 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002804525758305248, + "loss": 0.0659, + "theoretical_loss": 3.40987671238344, + "tokens_seen": 2383675392 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002803723318889424, + "loss": 0.066, + "theoretical_loss": 3.4098467679717084, + "tokens_seen": 2383937536 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028029208794735997, + "loss": 0.0664, + "theoretical_loss": 3.4098168277744203, + "tokens_seen": 2384199680 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002802118440057776, + "loss": 0.0666, + "theoretical_loss": 3.4097868917905196, + "tokens_seen": 2384461824 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028013160006419514, + "loss": 0.0654, + "theoretical_loss": 3.40975696001895, + "tokens_seen": 2384723968 + }, + { + "epoch": 0.72, + "learning_rate": 0.00028005135612261276, + "loss": 0.0684, + "theoretical_loss": 3.4097270324586555, + "tokens_seen": 2384986112 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002799711121810303, + "loss": 0.0654, + "theoretical_loss": 3.4096971091085817, + "tokens_seen": 2385248256 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 0.0015336308861151338, + "objective/train/docs_used": 868828, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.295317530632019, + "objective/train/original_loss": 1.29531729221344, + "objective/train/theoretical_loss": 3.4096671899676725, + "objective/train/tokens_used": 2405970400, + "objective/train/value_avg": -0.01094818115234375, + "objective/train/value_loss": 0.00022329458442982286, + "objective/train/value_max": -6.657838821411133e-05, + "objective/train/value_min": -0.353759765625, + "objective/train/value_reward_corr": 0.7366697275718553, + "objective/train/value_std": 0.017852783203125, + "objective/train/weight_avg": 1.0016379356384277, + "objective/train/weighted_lm_loss": 1.2963742017745972, + "objective/train/weights_max": 1.3134781122207642, + "objective/train/weights_min": 0.3717447817325592, + "theoretical_loss": 3.4096671899676725, + "tokens_seen": 2385510400 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002798908682394479, + "loss": 0.0686, + "theoretical_loss": 3.4096671899676725, + "tokens_seen": 2385510400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027981062429786554, + "loss": 0.0661, + "theoretical_loss": 3.4096372750348745, + "tokens_seen": 2385772544 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002797303803562831, + "loss": 0.0667, + "theoretical_loss": 3.409607364309133, + "tokens_seen": 2386034688 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002796501364147007, + "loss": 0.0699, + "theoretical_loss": 3.4095774577893936, + "tokens_seen": 2386296832 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027956989247311827, + "loss": 0.0654, + "theoretical_loss": 3.4095475554746044, + "tokens_seen": 2386558976 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002794896485315359, + "loss": 0.0678, + "theoretical_loss": 3.409517657363711, + "tokens_seen": 2386821120 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027940940458995344, + "loss": 0.0676, + "theoretical_loss": 3.4094877634556617, + "tokens_seen": 2387083264 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027932916064837105, + "loss": 0.0678, + "theoretical_loss": 3.4094578737494037, + "tokens_seen": 2387345408 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027924891670678866, + "loss": 0.068, + "theoretical_loss": 3.4094279882438854, + "tokens_seen": 2387607552 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002791686727652062, + "loss": 0.0636, + "theoretical_loss": 3.409398106938055, + "tokens_seen": 2387869696 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027908842882362384, + "loss": 0.0679, + "theoretical_loss": 3.4093682298308616, + "tokens_seen": 2388131840 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027900818488204145, + "loss": 0.0665, + "theoretical_loss": 3.4093383569212548, + "tokens_seen": 2388393984 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027892794094045895, + "loss": 0.0667, + "theoretical_loss": 3.4093084882081834, + "tokens_seen": 2388656128 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": 9.959724411601201e-05, + "objective/train/docs_used": 870071, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3700393438339233, + "objective/train/original_loss": 1.370039463043213, + "objective/train/theoretical_loss": 3.409293555425021, + "objective/train/tokens_used": 2409247200, + "objective/train/value_avg": -0.00676727294921875, + "objective/train/value_loss": 0.00039645720971748233, + "objective/train/value_max": -6.866455078125e-05, + "objective/train/value_min": -0.9521484375, + "objective/train/value_reward_corr": 0.675174519670669, + "objective/train/value_std": 0.01494598388671875, + "objective/train/weight_avg": 1.000266432762146, + "objective/train/weighted_lm_loss": 1.3696532249450684, + "objective/train/weights_max": 1.2567331790924072, + "objective/train/weights_min": 0.3683270514011383, + "theoretical_loss": 3.409293555425021, + "tokens_seen": 2388787200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027884769699887657, + "loss": 0.0718, + "theoretical_loss": 3.4092786236905983, + "tokens_seen": 2388918272 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002787674530572942, + "loss": 0.065, + "theoretical_loss": 3.4092487633674495, + "tokens_seen": 2389180416 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002786872091157118, + "loss": 0.0668, + "theoretical_loss": 3.4092189072376877, + "tokens_seen": 2389442560 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027860696517412935, + "loss": 0.0681, + "theoretical_loss": 3.4091890553002644, + "tokens_seen": 2389704704 + }, + { + "epoch": 0.72, + "learning_rate": 0.00027852672123254696, + "loss": 0.0678, + "theoretical_loss": 3.4091592075541306, + "tokens_seen": 2389966848 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002784464772909646, + "loss": 0.0655, + "theoretical_loss": 3.4091293639982387, + "tokens_seen": 2390228992 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002783662333493821, + "loss": 0.0651, + "theoretical_loss": 3.4090995246315403, + "tokens_seen": 2390491136 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002782859894077997, + "loss": 0.0651, + "theoretical_loss": 3.4090696894529886, + "tokens_seen": 2390753280 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002782057454662173, + "loss": 0.0667, + "theoretical_loss": 3.409039858461537, + "tokens_seen": 2391015424 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002781255015246349, + "loss": 0.0676, + "theoretical_loss": 3.4090100316561385, + "tokens_seen": 2391277568 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002780452575830525, + "loss": 0.0672, + "theoretical_loss": 3.408980209035746, + "tokens_seen": 2391539712 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002779650136414701, + "loss": 0.067, + "theoretical_loss": 3.4089503905993155, + "tokens_seen": 2391801856 + }, + { + "epoch": 0.72, + "objective/train/advantage_avg": -2.867977491405327e-05, + "objective/train/docs_used": 871244, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4574215412139893, + "objective/train/original_loss": 1.4574213027954102, + "objective/train/theoretical_loss": 3.4089205763458, + "objective/train/tokens_used": 2412524000, + "objective/train/value_avg": -0.00861358642578125, + "objective/train/value_loss": 0.00014166087203193456, + "objective/train/value_max": -4.5418739318847656e-05, + "objective/train/value_min": -0.31787109375, + "objective/train/value_reward_corr": 0.7858477079902386, + "objective/train/value_std": 0.0163726806640625, + "objective/train/weight_avg": 1.000041127204895, + "objective/train/weighted_lm_loss": 1.457660436630249, + "objective/train/weights_max": 1.1719762086868286, + "objective/train/weights_min": 0.7179513573646545, + "theoretical_loss": 3.4089205763458, + "tokens_seen": 2392064000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002778847696998877, + "loss": 0.0673, + "theoretical_loss": 3.4089205763458, + "tokens_seen": 2392064000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002778045257583052, + "loss": 0.0676, + "theoretical_loss": 3.408890766274155, + "tokens_seen": 2392326144 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002777242818167228, + "loss": 0.067, + "theoretical_loss": 3.408860960383336, + "tokens_seen": 2392588288 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027764403787514043, + "loss": 0.0661, + "theoretical_loss": 3.4088311586722986, + "tokens_seen": 2392850432 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027756379393355804, + "loss": 0.0665, + "theoretical_loss": 3.4088013611399983, + "tokens_seen": 2393112576 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002774835499919756, + "loss": 0.0669, + "theoretical_loss": 3.408771567785392, + "tokens_seen": 2393374720 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002774033060503932, + "loss": 0.0666, + "theoretical_loss": 3.4087417786074363, + "tokens_seen": 2393636864 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002773230621088108, + "loss": 0.0679, + "theoretical_loss": 3.4087119936050887, + "tokens_seen": 2393899008 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002772428181672284, + "loss": 0.0675, + "theoretical_loss": 3.4086822127773067, + "tokens_seen": 2394161152 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027716257422564594, + "loss": 0.0645, + "theoretical_loss": 3.4086524361230475, + "tokens_seen": 2394423296 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027708233028406355, + "loss": 0.0659, + "theoretical_loss": 3.40862266364127, + "tokens_seen": 2394685440 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027700208634248117, + "loss": 0.0643, + "theoretical_loss": 3.4085928953309335, + "tokens_seen": 2394947584 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002769218424008987, + "loss": 0.0677, + "theoretical_loss": 3.408563131190996, + "tokens_seen": 2395209728 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.00042437895899638534, + "objective/train/docs_used": 872384, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2476065158843994, + "objective/train/original_loss": 1.2476065158843994, + "objective/train/theoretical_loss": 3.408548250684601, + "objective/train/tokens_used": 2415800800, + "objective/train/value_avg": -0.006153106689453125, + "objective/train/value_loss": 0.00014658182044513524, + "objective/train/value_max": -3.4809112548828125e-05, + "objective/train/value_min": -0.2880859375, + "objective/train/value_reward_corr": 0.5964869899054704, + "objective/train/value_std": 0.0099029541015625, + "objective/train/weight_avg": 1.0004934072494507, + "objective/train/weighted_lm_loss": 1.2491647005081177, + "objective/train/weights_max": 1.2718428373336792, + "objective/train/weights_min": 0.37988168001174927, + "theoretical_loss": 3.408548250684601, + "tokens_seen": 2395340800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027684159845931634, + "loss": 0.0662, + "theoretical_loss": 3.408533371220417, + "tokens_seen": 2395471872 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027676135451773395, + "loss": 0.0664, + "theoretical_loss": 3.408503615418156, + "tokens_seen": 2395734016 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002766811105761515, + "loss": 0.0659, + "theoretical_loss": 3.4084738637831746, + "tokens_seen": 2395996160 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027660086663456907, + "loss": 0.0647, + "theoretical_loss": 3.408444116314432, + "tokens_seen": 2396258304 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002765206226929867, + "loss": 0.0677, + "theoretical_loss": 3.4084143730108893, + "tokens_seen": 2396520448 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027644037875140424, + "loss": 0.0671, + "theoretical_loss": 3.408384633871509, + "tokens_seen": 2396782592 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027636013480982185, + "loss": 0.0685, + "theoretical_loss": 3.408354898895251, + "tokens_seen": 2397044736 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027627989086823946, + "loss": 0.067, + "theoretical_loss": 3.408325168081078, + "tokens_seen": 2397306880 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002761996469266571, + "loss": 0.0666, + "theoretical_loss": 3.4082954414279523, + "tokens_seen": 2397569024 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027611940298507464, + "loss": 0.066, + "theoretical_loss": 3.408265718934837, + "tokens_seen": 2397831168 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027603915904349225, + "loss": 0.0687, + "theoretical_loss": 3.4082360006006955, + "tokens_seen": 2398093312 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002759589151019098, + "loss": 0.0663, + "theoretical_loss": 3.4082062864244906, + "tokens_seen": 2398355456 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.0004930827999487519, + "objective/train/docs_used": 873551, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2975027561187744, + "objective/train/original_loss": 1.2975027561187744, + "objective/train/theoretical_loss": 3.4081765764051863, + "objective/train/tokens_used": 2419077600, + "objective/train/value_avg": -0.008544921875, + "objective/train/value_loss": 0.00024381437106058002, + "objective/train/value_max": -2.9981136322021484e-05, + "objective/train/value_min": -0.282958984375, + "objective/train/value_reward_corr": 0.7140946149248976, + "objective/train/value_std": 0.0169219970703125, + "objective/train/weight_avg": 1.0006061792373657, + "objective/train/weighted_lm_loss": 1.2970629930496216, + "objective/train/weights_max": 1.1696314811706543, + "objective/train/weights_min": 0.3694954514503479, + "theoretical_loss": 3.4081765764051863, + "tokens_seen": 2398617600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027587867116032736, + "loss": 0.0653, + "theoretical_loss": 3.4081765764051863, + "tokens_seen": 2398617600 + }, + { + "epoch": 0.73, + "learning_rate": 0.000275798427218745, + "loss": 0.0676, + "theoretical_loss": 3.4081468705417475, + "tokens_seen": 2398879744 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002757181832771626, + "loss": 0.0641, + "theoretical_loss": 3.408117168833138, + "tokens_seen": 2399141888 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002756379393355802, + "loss": 0.0649, + "theoretical_loss": 3.408087471278323, + "tokens_seen": 2399404032 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027555769539399776, + "loss": 0.0657, + "theoretical_loss": 3.4080577778762686, + "tokens_seen": 2399666176 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002754774514524154, + "loss": 0.0692, + "theoretical_loss": 3.4080280886259393, + "tokens_seen": 2399928320 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027539720751083293, + "loss": 0.0664, + "theoretical_loss": 3.4079984035263022, + "tokens_seen": 2400190464 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002753169635692505, + "loss": 0.0647, + "theoretical_loss": 3.4079687225763236, + "tokens_seen": 2400452608 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002752367196276681, + "loss": 0.0676, + "theoretical_loss": 3.40793904577497, + "tokens_seen": 2400714752 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002751564756860857, + "loss": 0.0689, + "theoretical_loss": 3.4079093731212087, + "tokens_seen": 2400976896 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027507623174450333, + "loss": 0.0648, + "theoretical_loss": 3.4078797046140075, + "tokens_seen": 2401239040 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002749959878029209, + "loss": 0.0669, + "theoretical_loss": 3.4078500402523346, + "tokens_seen": 2401501184 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002749157438613385, + "loss": 0.0654, + "theoretical_loss": 3.407820380035157, + "tokens_seen": 2401763328 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.00013451933045871556, + "objective/train/docs_used": 874334, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2702131271362305, + "objective/train/original_loss": 1.2702131271362305, + "objective/train/theoretical_loss": 3.4078055514804326, + "objective/train/tokens_used": 2422354400, + "objective/train/value_avg": -0.007625579833984375, + "objective/train/value_loss": 0.00018812972120940685, + "objective/train/value_max": -1.2993812561035156e-05, + "objective/train/value_min": -0.2490234375, + "objective/train/value_reward_corr": 0.7233506520390734, + "objective/train/value_std": 0.01470947265625, + "objective/train/weight_avg": 1.0002230405807495, + "objective/train/weighted_lm_loss": 1.2700724601745605, + "objective/train/weights_max": 1.20888352394104, + "objective/train/weights_min": 0.36869922280311584, + "theoretical_loss": 3.4078055514804326, + "tokens_seen": 2401894400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027483549991975606, + "loss": 0.0668, + "theoretical_loss": 3.4077907239614453, + "tokens_seen": 2402025472 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002747552559781736, + "loss": 0.0679, + "theoretical_loss": 3.4077610720301674, + "tokens_seen": 2402287616 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027467501203659123, + "loss": 0.0691, + "theoretical_loss": 3.407731424240293, + "tokens_seen": 2402549760 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027459476809500884, + "loss": 0.0658, + "theoretical_loss": 3.4077017805907923, + "tokens_seen": 2402811904 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002745145241534264, + "loss": 0.0707, + "theoretical_loss": 3.4076721410806345, + "tokens_seen": 2403074048 + }, + { + "epoch": 0.73, + "learning_rate": 0.000274434280211844, + "loss": 0.0658, + "theoretical_loss": 3.407642505708791, + "tokens_seen": 2403336192 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002743540362702616, + "loss": 0.0674, + "theoretical_loss": 3.4076128744742324, + "tokens_seen": 2403598336 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027427379232867924, + "loss": 0.0663, + "theoretical_loss": 3.4075832473759298, + "tokens_seen": 2403860480 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027419354838709674, + "loss": 0.0714, + "theoretical_loss": 3.4075536244128553, + "tokens_seen": 2404122624 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027411330444551435, + "loss": 0.0701, + "theoretical_loss": 3.4075240055839804, + "tokens_seen": 2404384768 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027403306050393197, + "loss": 0.0666, + "theoretical_loss": 3.407494390888278, + "tokens_seen": 2404646912 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002739528165623495, + "loss": 0.068, + "theoretical_loss": 3.4074647803247204, + "tokens_seen": 2404909056 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.0008375584147870541, + "objective/train/docs_used": 875512, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2580220699310303, + "objective/train/original_loss": 1.2580218315124512, + "objective/train/theoretical_loss": 3.407435173892281, + "objective/train/tokens_used": 2425631200, + "objective/train/value_avg": -0.004520416259765625, + "objective/train/value_loss": 0.0001223483559442684, + "objective/train/value_max": -3.3736228942871094e-05, + "objective/train/value_min": -0.1993408203125, + "objective/train/value_reward_corr": 0.5143376907001702, + "objective/train/value_std": 0.006969451904296875, + "objective/train/weight_avg": 1.0008904933929443, + "objective/train/weighted_lm_loss": 1.2593088150024414, + "objective/train/weights_max": 1.1975740194320679, + "objective/train/weights_min": 0.37003564834594727, + "theoretical_loss": 3.407435173892281, + "tokens_seen": 2405171200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027387257262076714, + "loss": 0.0641, + "theoretical_loss": 3.407435173892281, + "tokens_seen": 2405171200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027379232867918475, + "loss": 0.0696, + "theoretical_loss": 3.407405571589933, + "tokens_seen": 2405433344 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027371208473760236, + "loss": 0.0686, + "theoretical_loss": 3.4073759734166504, + "tokens_seen": 2405695488 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027363184079601987, + "loss": 0.0667, + "theoretical_loss": 3.4073463793714076, + "tokens_seen": 2405957632 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002735515968544375, + "loss": 0.0696, + "theoretical_loss": 3.407316789453179, + "tokens_seen": 2406219776 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002734713529128551, + "loss": 0.0664, + "theoretical_loss": 3.40728720366094, + "tokens_seen": 2406481920 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027339110897127265, + "loss": 0.0688, + "theoretical_loss": 3.407257621993665, + "tokens_seen": 2406744064 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027331086502969026, + "loss": 0.0658, + "theoretical_loss": 3.4072280444503305, + "tokens_seen": 2407006208 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002732306210881079, + "loss": 0.0679, + "theoretical_loss": 3.407198471029912, + "tokens_seen": 2407268352 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002731503771465255, + "loss": 0.0677, + "theoretical_loss": 3.4071689017313864, + "tokens_seen": 2407530496 + }, + { + "epoch": 0.73, + "learning_rate": 0.000273070133204943, + "loss": 0.0687, + "theoretical_loss": 3.40713933655373, + "tokens_seen": 2407792640 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002729898892633606, + "loss": 0.0712, + "theoretical_loss": 3.40710977549592, + "tokens_seen": 2408054784 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002729096453217782, + "loss": 0.0681, + "theoretical_loss": 3.4070802185569344, + "tokens_seen": 2408316928 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.000534991966560483, + "objective/train/docs_used": 876655, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3676766157150269, + "objective/train/original_loss": 1.3676767349243164, + "objective/train/theoretical_loss": 3.4070654416316812, + "objective/train/tokens_used": 2428908000, + "objective/train/value_avg": -0.0054931640625, + "objective/train/value_loss": 0.0001075658219633624, + "objective/train/value_max": -6.0617923736572266e-05, + "objective/train/value_min": -0.206298828125, + "objective/train/value_reward_corr": 0.6255496585240548, + "objective/train/value_std": 0.00858306884765625, + "objective/train/weight_avg": 1.0005849599838257, + "objective/train/weighted_lm_loss": 1.3686153888702393, + "objective/train/weights_max": 1.2291204929351807, + "objective/train/weights_min": 0.396826833486557, + "theoretical_loss": 3.4070654416316812, + "tokens_seen": 2408448000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002728294013801958, + "loss": 0.0686, + "theoretical_loss": 3.407050665735751, + "tokens_seen": 2408579072 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002727491574386134, + "loss": 0.0671, + "theoretical_loss": 3.407021117031347, + "tokens_seen": 2408841216 + }, + { + "epoch": 0.73, + "learning_rate": 0.000272668913497031, + "loss": 0.0659, + "theoretical_loss": 3.4069915724427027, + "tokens_seen": 2409103360 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002725886695554486, + "loss": 0.0663, + "theoretical_loss": 3.4069620319687957, + "tokens_seen": 2409365504 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027250842561386617, + "loss": 0.0693, + "theoretical_loss": 3.406932495608606, + "tokens_seen": 2409627648 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027242818167228373, + "loss": 0.0672, + "theoretical_loss": 3.4069029633611128, + "tokens_seen": 2409889792 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027234793773070134, + "loss": 0.0677, + "theoretical_loss": 3.4068734352252967, + "tokens_seen": 2410151936 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002722676937891189, + "loss": 0.0667, + "theoretical_loss": 3.406843911200138, + "tokens_seen": 2410414080 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002721874498475365, + "loss": 0.0687, + "theoretical_loss": 3.406814391284617, + "tokens_seen": 2410676224 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027210720590595413, + "loss": 0.0654, + "theoretical_loss": 3.4067848754777152, + "tokens_seen": 2410938368 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002720269619643717, + "loss": 0.068, + "theoretical_loss": 3.4067553637784145, + "tokens_seen": 2411200512 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002719467180227893, + "loss": 0.0683, + "theoretical_loss": 3.406725856185696, + "tokens_seen": 2411462656 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.00015087482461240143, + "objective/train/docs_used": 877919, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4046005010604858, + "objective/train/original_loss": 1.4046005010604858, + "objective/train/theoretical_loss": 3.4066963526985425, + "objective/train/tokens_used": 2432184800, + "objective/train/value_avg": -0.008453369140625, + "objective/train/value_loss": 0.00016879015311133116, + "objective/train/value_max": -5.8770179748535156e-05, + "objective/train/value_min": -0.48583984375, + "objective/train/value_reward_corr": 0.6949893811513895, + "objective/train/value_std": 0.01419830322265625, + "objective/train/weight_avg": 1.0002318620681763, + "objective/train/weighted_lm_loss": 1.4044239521026611, + "objective/train/weights_max": 1.409144401550293, + "objective/train/weights_min": 0.3955934941768646, + "theoretical_loss": 3.4066963526985425, + "tokens_seen": 2411724800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027186647408120686, + "loss": 0.0671, + "theoretical_loss": 3.4066963526985425, + "tokens_seen": 2411724800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027178623013962447, + "loss": 0.0675, + "theoretical_loss": 3.4066668533159365, + "tokens_seen": 2411986944 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027170598619804203, + "loss": 0.0689, + "theoretical_loss": 3.4066373580368614, + "tokens_seen": 2412249088 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027162574225645964, + "loss": 0.0692, + "theoretical_loss": 3.4066078668602997, + "tokens_seen": 2412511232 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027154549831487725, + "loss": 0.0674, + "theoretical_loss": 3.4065783797852354, + "tokens_seen": 2412773376 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002714652543732948, + "loss": 0.0669, + "theoretical_loss": 3.4065488968106528, + "tokens_seen": 2413035520 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002713850104317124, + "loss": 0.069, + "theoretical_loss": 3.406519417935536, + "tokens_seen": 2413297664 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027130476649013004, + "loss": 0.0686, + "theoretical_loss": 3.4064899431588698, + "tokens_seen": 2413559808 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002712245225485476, + "loss": 0.0669, + "theoretical_loss": 3.4064604724796395, + "tokens_seen": 2413821952 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027114427860696515, + "loss": 0.0682, + "theoretical_loss": 3.4064310058968306, + "tokens_seen": 2414084096 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027106403466538277, + "loss": 0.0637, + "theoretical_loss": 3.406401543409429, + "tokens_seen": 2414346240 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002709837907238004, + "loss": 0.0689, + "theoretical_loss": 3.4063720850164203, + "tokens_seen": 2414608384 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027090354678221794, + "loss": 0.0674, + "theoretical_loss": 3.406342630716792, + "tokens_seen": 2414870528 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": -0.00010549866419751197, + "objective/train/docs_used": 879191, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2191466093063354, + "objective/train/original_loss": 1.219146728515625, + "objective/train/theoretical_loss": 3.406327905101679, + "objective/train/tokens_used": 2435461600, + "objective/train/value_avg": -0.006084442138671875, + "objective/train/value_loss": 0.00020990363555029035, + "objective/train/value_max": -2.9981136322021484e-05, + "objective/train/value_min": -0.39013671875, + "objective/train/value_reward_corr": 0.6207213945178252, + "objective/train/value_std": 0.0113372802734375, + "objective/train/weight_avg": 0.999985933303833, + "objective/train/weighted_lm_loss": 1.2187398672103882, + "objective/train/weights_max": 1.2805371284484863, + "objective/train/weights_min": 0.37227270007133484, + "theoretical_loss": 3.406327905101679, + "tokens_seen": 2415001600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027082330284063555, + "loss": 0.068, + "theoretical_loss": 3.406313180509531, + "tokens_seen": 2415132672 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027074305889905316, + "loss": 0.0677, + "theoretical_loss": 3.406283734393624, + "tokens_seen": 2415394816 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027066281495747067, + "loss": 0.066, + "theoretical_loss": 3.4062542923680588, + "tokens_seen": 2415656960 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002705825710158883, + "loss": 0.0682, + "theoretical_loss": 3.406224854431824, + "tokens_seen": 2415919104 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002705023270743059, + "loss": 0.0685, + "theoretical_loss": 3.406195420583907, + "tokens_seen": 2416181248 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002704220831327235, + "loss": 0.0662, + "theoretical_loss": 3.4061659908232973, + "tokens_seen": 2416443392 + }, + { + "epoch": 0.73, + "learning_rate": 0.00027034183919114106, + "loss": 0.067, + "theoretical_loss": 3.4061365651489837, + "tokens_seen": 2416705536 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002702615952495587, + "loss": 0.0687, + "theoretical_loss": 3.406107143559956, + "tokens_seen": 2416967680 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002701813513079763, + "loss": 0.0678, + "theoretical_loss": 3.4060777260552038, + "tokens_seen": 2417229824 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002701011073663938, + "loss": 0.0686, + "theoretical_loss": 3.4060483126337173, + "tokens_seen": 2417491968 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002700208634248114, + "loss": 0.0694, + "theoretical_loss": 3.4060189032944868, + "tokens_seen": 2417754112 + }, + { + "epoch": 0.73, + "learning_rate": 0.000269940619483229, + "loss": 0.0661, + "theoretical_loss": 3.4059894980365035, + "tokens_seen": 2418016256 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": -0.00011731717677321285, + "objective/train/docs_used": 880852, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3383729457855225, + "objective/train/original_loss": 1.3383729457855225, + "objective/train/theoretical_loss": 3.4059600968587587, + "objective/train/tokens_used": 2438738400, + "objective/train/value_avg": -0.00749969482421875, + "objective/train/value_loss": 0.0005007804138585925, + "objective/train/value_max": -2.485513687133789e-05, + "objective/train/value_min": -0.61767578125, + "objective/train/value_reward_corr": 0.7091478847452439, + "objective/train/value_std": 0.0192413330078125, + "objective/train/weight_avg": 1.0000945329666138, + "objective/train/weighted_lm_loss": 1.3375656604766846, + "objective/train/weights_max": 1.3170450925827026, + "objective/train/weights_min": 0.3693002760410309, + "theoretical_loss": 3.4059600968587587, + "tokens_seen": 2418278400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026986037554164663, + "loss": 0.0676, + "theoretical_loss": 3.4059600968587587, + "tokens_seen": 2418278400 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002697801316000642, + "loss": 0.0665, + "theoretical_loss": 3.4059306997602437, + "tokens_seen": 2418540544 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002696998876584818, + "loss": 0.066, + "theoretical_loss": 3.405901306739951, + "tokens_seen": 2418802688 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002696196437168994, + "loss": 0.0658, + "theoretical_loss": 3.405871917796872, + "tokens_seen": 2419064832 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026953939977531697, + "loss": 0.0696, + "theoretical_loss": 3.405842532930001, + "tokens_seen": 2419326976 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026945915583373453, + "loss": 0.0652, + "theoretical_loss": 3.4058131521383292, + "tokens_seen": 2419589120 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026937891189215214, + "loss": 0.0652, + "theoretical_loss": 3.4057837754208515, + "tokens_seen": 2419851264 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026929866795056976, + "loss": 0.0677, + "theoretical_loss": 3.4057544027765605, + "tokens_seen": 2420113408 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002692184240089873, + "loss": 0.0701, + "theoretical_loss": 3.4057250342044516, + "tokens_seen": 2420375552 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002691381800674049, + "loss": 0.0651, + "theoretical_loss": 3.405695669703518, + "tokens_seen": 2420637696 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026905793612582254, + "loss": 0.0663, + "theoretical_loss": 3.405666309272755, + "tokens_seen": 2420899840 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002689776921842401, + "loss": 0.0683, + "theoretical_loss": 3.4056369529111583, + "tokens_seen": 2421161984 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026889744824265766, + "loss": 0.0678, + "theoretical_loss": 3.4056076006177234, + "tokens_seen": 2421424128 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.0005559303681366146, + "objective/train/docs_used": 881885, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2067722082138062, + "objective/train/original_loss": 1.2067723274230957, + "objective/train/theoretical_loss": 3.4055929259962525, + "objective/train/tokens_used": 2442015200, + "objective/train/value_avg": -0.004528045654296875, + "objective/train/value_loss": 7.930587162263691e-05, + "objective/train/value_max": -4.4345855712890625e-05, + "objective/train/value_min": -0.2314453125, + "objective/train/value_reward_corr": 0.7267003287627125, + "objective/train/value_std": 0.0087890625, + "objective/train/weight_avg": 1.0005950927734375, + "objective/train/weighted_lm_loss": 1.207557201385498, + "objective/train/weights_max": 1.1026510000228882, + "objective/train/weights_min": 0.8195127248764038, + "theoretical_loss": 3.4055929259962525, + "tokens_seen": 2421555200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026881720430107527, + "loss": 0.0661, + "theoretical_loss": 3.4055782523914453, + "tokens_seen": 2421686272 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002687369603594929, + "loss": 0.0679, + "theoretical_loss": 3.405548908231321, + "tokens_seen": 2421948416 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026865671641791044, + "loss": 0.0666, + "theoretical_loss": 3.405519568136347, + "tokens_seen": 2422210560 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026857647247632805, + "loss": 0.0714, + "theoretical_loss": 3.40549023210552, + "tokens_seen": 2422472704 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026849622853474566, + "loss": 0.0705, + "theoretical_loss": 3.4054609001378378, + "tokens_seen": 2422734848 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002684159845931632, + "loss": 0.0684, + "theoretical_loss": 3.4054315722322976, + "tokens_seen": 2422996992 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002683357406515808, + "loss": 0.0665, + "theoretical_loss": 3.4054022483878983, + "tokens_seen": 2423259136 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002682554967099984, + "loss": 0.0683, + "theoretical_loss": 3.4053729286036374, + "tokens_seen": 2423521280 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026817525276841595, + "loss": 0.0662, + "theoretical_loss": 3.405343612878514, + "tokens_seen": 2423783424 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026809500882683357, + "loss": 0.0652, + "theoretical_loss": 3.405314301211527, + "tokens_seen": 2424045568 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002680147648852512, + "loss": 0.0653, + "theoretical_loss": 3.4052849936016765, + "tokens_seen": 2424307712 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002679345209436688, + "loss": 0.0671, + "theoretical_loss": 3.4052556900479614, + "tokens_seen": 2424569856 + }, + { + "epoch": 0.73, + "objective/train/advantage_avg": 0.0001301930460613221, + "objective/train/docs_used": 882990, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4599616527557373, + "objective/train/original_loss": 1.4599618911743164, + "objective/train/theoretical_loss": 3.405226390549383, + "objective/train/tokens_used": 2445292000, + "objective/train/value_avg": -0.006389617919921875, + "objective/train/value_loss": 0.0001866083184722811, + "objective/train/value_max": -4.100799560546875e-05, + "objective/train/value_min": -0.35546875, + "objective/train/value_reward_corr": 0.6365628212502612, + "objective/train/value_std": 0.01094818115234375, + "objective/train/weight_avg": 1.0002139806747437, + "objective/train/weighted_lm_loss": 1.4597046375274658, + "objective/train/weights_max": 1.174597978591919, + "objective/train/weights_min": 0.3728525638580322, + "theoretical_loss": 3.405226390549383, + "tokens_seen": 2424832000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026785427700208635, + "loss": 0.0676, + "theoretical_loss": 3.405226390549383, + "tokens_seen": 2424832000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026777403306050396, + "loss": 0.0685, + "theoretical_loss": 3.4051970951049406, + "tokens_seen": 2425094144 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002676937891189215, + "loss": 0.0694, + "theoretical_loss": 3.4051678037136357, + "tokens_seen": 2425356288 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002676135451773391, + "loss": 0.0674, + "theoretical_loss": 3.40513851637447, + "tokens_seen": 2425618432 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002675333012357567, + "loss": 0.0679, + "theoretical_loss": 3.4051092330864443, + "tokens_seen": 2425880576 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002674530572941743, + "loss": 0.0674, + "theoretical_loss": 3.405079953848561, + "tokens_seen": 2426142720 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002673728133525919, + "loss": 0.068, + "theoretical_loss": 3.4050506786598222, + "tokens_seen": 2426404864 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002672925694110095, + "loss": 0.0687, + "theoretical_loss": 3.405021407519231, + "tokens_seen": 2426667008 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002672123254694271, + "loss": 0.068, + "theoretical_loss": 3.4049921404257897, + "tokens_seen": 2426929152 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026713208152784465, + "loss": 0.0652, + "theoretical_loss": 3.4049628773785017, + "tokens_seen": 2427191296 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002670518375862622, + "loss": 0.0648, + "theoretical_loss": 3.4049336183763717, + "tokens_seen": 2427453440 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002669715936446798, + "loss": 0.0677, + "theoretical_loss": 3.4049043634184026, + "tokens_seen": 2427715584 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026689134970309743, + "loss": 0.068, + "theoretical_loss": 3.4048751125035994, + "tokens_seen": 2427977728 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0006799577386118472, + "objective/train/docs_used": 884091, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.223225474357605, + "objective/train/original_loss": 1.2232253551483154, + "objective/train/theoretical_loss": 3.404860488562074, + "objective/train/tokens_used": 2448568800, + "objective/train/value_avg": -0.006168365478515625, + "objective/train/value_loss": 9.300230158260092e-05, + "objective/train/value_max": -3.0219554901123047e-05, + "objective/train/value_min": -0.19287109375, + "objective/train/value_reward_corr": 0.6994599916806957, + "objective/train/value_std": 0.0108184814453125, + "objective/train/weight_avg": 1.0007256269454956, + "objective/train/weighted_lm_loss": 1.2240463495254517, + "objective/train/weights_max": 1.1006338596343994, + "objective/train/weights_min": 0.6186822056770325, + "theoretical_loss": 3.404860488562074, + "tokens_seen": 2428108800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026681110576151504, + "loss": 0.0671, + "theoretical_loss": 3.404845865630967, + "tokens_seen": 2428239872 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002667308618199326, + "loss": 0.0674, + "theoretical_loss": 3.40481662279951, + "tokens_seen": 2428502016 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002666506178783502, + "loss": 0.0671, + "theoretical_loss": 3.4047873840082348, + "tokens_seen": 2428764160 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002665703739367678, + "loss": 0.0673, + "theoretical_loss": 3.404758149256146, + "tokens_seen": 2429026304 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026649012999518533, + "loss": 0.0671, + "theoretical_loss": 3.4047289185422507, + "tokens_seen": 2429288448 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026640988605360294, + "loss": 0.0653, + "theoretical_loss": 3.404699691865555, + "tokens_seen": 2429550592 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026632964211202055, + "loss": 0.068, + "theoretical_loss": 3.4046704692250658, + "tokens_seen": 2429812736 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002662493981704381, + "loss": 0.0663, + "theoretical_loss": 3.404641250619791, + "tokens_seen": 2430074880 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002661691542288557, + "loss": 0.0694, + "theoretical_loss": 3.404612036048737, + "tokens_seen": 2430337024 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026608891028727334, + "loss": 0.0683, + "theoretical_loss": 3.404582825510913, + "tokens_seen": 2430599168 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026600866634569095, + "loss": 0.0667, + "theoretical_loss": 3.404553619005326, + "tokens_seen": 2430861312 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026592842240410846, + "loss": 0.0672, + "theoretical_loss": 3.404524416530986, + "tokens_seen": 2431123456 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0008247354999184608, + "objective/train/docs_used": 885315, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3540970087051392, + "objective/train/original_loss": 1.3540968894958496, + "objective/train/theoretical_loss": 3.4044952180869013, + "objective/train/tokens_used": 2451845600, + "objective/train/value_avg": -0.00623321533203125, + "objective/train/value_loss": 0.00027920593856833875, + "objective/train/value_max": -4.13060188293457e-05, + "objective/train/value_min": -0.845703125, + "objective/train/value_reward_corr": 0.6324752214183904, + "objective/train/value_std": 0.0144500732421875, + "objective/train/weight_avg": 1.000954508781433, + "objective/train/weighted_lm_loss": 1.3552262783050537, + "objective/train/weights_max": 2.2237422466278076, + "objective/train/weights_min": 0.38558143377304077, + "theoretical_loss": 3.4044952180869013, + "tokens_seen": 2431385600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026584817846252607, + "loss": 0.0673, + "theoretical_loss": 3.4044952180869013, + "tokens_seen": 2431385600 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002657679345209437, + "loss": 0.0688, + "theoretical_loss": 3.4044660236720805, + "tokens_seen": 2431647744 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026568769057936124, + "loss": 0.0655, + "theoretical_loss": 3.404436833285535, + "tokens_seen": 2431909888 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026560744663777885, + "loss": 0.0671, + "theoretical_loss": 3.4044076469262734, + "tokens_seen": 2432172032 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026552720269619646, + "loss": 0.0652, + "theoretical_loss": 3.4043784645933064, + "tokens_seen": 2432434176 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002654469587546141, + "loss": 0.0671, + "theoretical_loss": 3.4043492862856457, + "tokens_seen": 2432696320 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002653667148130316, + "loss": 0.0685, + "theoretical_loss": 3.404320112002301, + "tokens_seen": 2432958464 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002652864708714492, + "loss": 0.0677, + "theoretical_loss": 3.4042909417422846, + "tokens_seen": 2433220608 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002652062269298668, + "loss": 0.0678, + "theoretical_loss": 3.404261775504608, + "tokens_seen": 2433482752 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026512598298828436, + "loss": 0.0657, + "theoretical_loss": 3.404232613288283, + "tokens_seen": 2433744896 + }, + { + "epoch": 0.74, + "learning_rate": 0.000265045739046702, + "loss": 0.0675, + "theoretical_loss": 3.404203455092323, + "tokens_seen": 2434007040 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002649654951051196, + "loss": 0.0651, + "theoretical_loss": 3.40417430091574, + "tokens_seen": 2434269184 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002648852511635372, + "loss": 0.0675, + "theoretical_loss": 3.404145150757548, + "tokens_seen": 2434531328 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0004898194456472993, + "objective/train/docs_used": 886508, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3692785501480103, + "objective/train/original_loss": 1.369278907775879, + "objective/train/theoretical_loss": 3.40413057718504, + "objective/train/tokens_used": 2455122400, + "objective/train/value_avg": -0.005901336669921875, + "objective/train/value_loss": 0.0002257556770928204, + "objective/train/value_max": -3.170967102050781e-05, + "objective/train/value_min": -0.65185546875, + "objective/train/value_reward_corr": 0.6215079431063899, + "objective/train/value_std": 0.01128387451171875, + "objective/train/weight_avg": 1.0005888938903809, + "objective/train/weighted_lm_loss": 1.3697773218154907, + "objective/train/weights_max": 1.3338719606399536, + "objective/train/weights_min": 0.3692946434020996, + "theoretical_loss": 3.40413057718504, + "tokens_seen": 2434662400 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026480500722195476, + "loss": 0.0683, + "theoretical_loss": 3.4041160046167596, + "tokens_seen": 2434793472 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002647247632803723, + "loss": 0.0665, + "theoretical_loss": 3.4040868624923895, + "tokens_seen": 2435055616 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026464451933878993, + "loss": 0.0687, + "theoretical_loss": 3.4040577243834513, + "tokens_seen": 2435317760 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002645642753972075, + "loss": 0.0649, + "theoretical_loss": 3.4040285902889598, + "tokens_seen": 2435579904 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002644840314556251, + "loss": 0.0682, + "theoretical_loss": 3.40399946020793, + "tokens_seen": 2435842048 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002644037875140427, + "loss": 0.0665, + "theoretical_loss": 3.403970334139377, + "tokens_seen": 2436104192 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026432354357246033, + "loss": 0.0655, + "theoretical_loss": 3.403941212082317, + "tokens_seen": 2436366336 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002642432996308779, + "loss": 0.0678, + "theoretical_loss": 3.4039120940357654, + "tokens_seen": 2436628480 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026416305568929544, + "loss": 0.0663, + "theoretical_loss": 3.4038829799987385, + "tokens_seen": 2436890624 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026408281174771306, + "loss": 0.0674, + "theoretical_loss": 3.4038538699702534, + "tokens_seen": 2437152768 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002640025678061306, + "loss": 0.0664, + "theoretical_loss": 3.403824763949326, + "tokens_seen": 2437414912 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026392232386454823, + "loss": 0.0673, + "theoretical_loss": 3.403795661934976, + "tokens_seen": 2437677056 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": -0.0008872678154148161, + "objective/train/docs_used": 887686, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3116480112075806, + "objective/train/original_loss": 1.3116480112075806, + "objective/train/theoretical_loss": 3.4037665639262182, + "objective/train/tokens_used": 2458399200, + "objective/train/value_avg": -0.007190704345703125, + "objective/train/value_loss": 0.000523583497852087, + "objective/train/value_max": -2.753734588623047e-05, + "objective/train/value_min": -0.70947265625, + "objective/train/value_reward_corr": 0.6604272960853255, + "objective/train/value_std": 0.0171356201171875, + "objective/train/weight_avg": 0.9993419051170349, + "objective/train/weighted_lm_loss": 1.309940218925476, + "objective/train/weights_max": 1.3254317045211792, + "objective/train/weights_min": 0.37620142102241516, + "theoretical_loss": 3.4037665639262182, + "tokens_seen": 2437939200 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026384207992296584, + "loss": 0.0643, + "theoretical_loss": 3.4037665639262182, + "tokens_seen": 2437939200 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002637618359813834, + "loss": 0.0654, + "theoretical_loss": 3.403737469922073, + "tokens_seen": 2438201344 + }, + { + "epoch": 0.74, + "learning_rate": 0.000263681592039801, + "loss": 0.0675, + "theoretical_loss": 3.4037083799215573, + "tokens_seen": 2438463488 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026360134809821857, + "loss": 0.0647, + "theoretical_loss": 3.40367929392369, + "tokens_seen": 2438725632 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002635211041566362, + "loss": 0.066, + "theoretical_loss": 3.4036502119274914, + "tokens_seen": 2438987776 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026344086021505374, + "loss": 0.0633, + "theoretical_loss": 3.4036211339319795, + "tokens_seen": 2439249920 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026336061627347135, + "loss": 0.0658, + "theoretical_loss": 3.4035920599361753, + "tokens_seen": 2439512064 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026328037233188897, + "loss": 0.0686, + "theoretical_loss": 3.403562989939098, + "tokens_seen": 2439774208 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002632001283903065, + "loss": 0.0677, + "theoretical_loss": 3.4035339239397686, + "tokens_seen": 2440036352 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026311988444872414, + "loss": 0.0687, + "theoretical_loss": 3.4035048619372072, + "tokens_seen": 2440298496 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026303964050714175, + "loss": 0.0664, + "theoretical_loss": 3.403475803930436, + "tokens_seen": 2440560640 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002629593965655593, + "loss": 0.0662, + "theoretical_loss": 3.4034467499184755, + "tokens_seen": 2440822784 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026287915262397687, + "loss": 0.0665, + "theoretical_loss": 3.4034176999003485, + "tokens_seen": 2441084928 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 8.833639731165022e-05, + "objective/train/docs_used": 888977, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1548739671707153, + "objective/train/original_loss": 1.1548740863800049, + "objective/train/theoretical_loss": 3.4034031763886667, + "objective/train/tokens_used": 2461676000, + "objective/train/value_avg": -0.00930023193359375, + "objective/train/value_loss": 0.00023279798915609717, + "objective/train/value_max": -6.973743438720703e-05, + "objective/train/value_min": -0.62548828125, + "objective/train/value_reward_corr": 0.76527345823869, + "objective/train/value_std": 0.016143798828125, + "objective/train/weight_avg": 1.0001966953277588, + "objective/train/weighted_lm_loss": 1.1562342643737793, + "objective/train/weights_max": 1.419206142425537, + "objective/train/weights_min": 0.4449949264526367, + "theoretical_loss": 3.4034031763886667, + "tokens_seen": 2441216000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002627989086823945, + "loss": 0.0649, + "theoretical_loss": 3.4033886538750764, + "tokens_seen": 2441347072 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002627186647408121, + "loss": 0.0667, + "theoretical_loss": 3.4033596118416822, + "tokens_seen": 2441609216 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026263842079922965, + "loss": 0.0632, + "theoretical_loss": 3.4033305737991886, + "tokens_seen": 2441871360 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026255817685764726, + "loss": 0.068, + "theoretical_loss": 3.403301539746619, + "tokens_seen": 2442133504 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002624779329160649, + "loss": 0.0666, + "theoretical_loss": 3.4032725096829965, + "tokens_seen": 2442395648 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002623976889744824, + "loss": 0.0669, + "theoretical_loss": 3.4032434836073455, + "tokens_seen": 2442657792 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002623174450329, + "loss": 0.0688, + "theoretical_loss": 3.40321446151869, + "tokens_seen": 2442919936 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002622372010913176, + "loss": 0.0678, + "theoretical_loss": 3.4031854434160556, + "tokens_seen": 2443182080 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002621569571497352, + "loss": 0.0681, + "theoretical_loss": 3.4031564292984653, + "tokens_seen": 2443444224 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002620767132081528, + "loss": 0.0667, + "theoretical_loss": 3.403127419164946, + "tokens_seen": 2443706368 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002619964692665704, + "loss": 0.0681, + "theoretical_loss": 3.4030984130145225, + "tokens_seen": 2443968512 + }, + { + "epoch": 0.74, + "learning_rate": 0.000261916225324988, + "loss": 0.0689, + "theoretical_loss": 3.4030694108462214, + "tokens_seen": 2444230656 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0005917379749007523, + "objective/train/docs_used": 890203, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4914954900741577, + "objective/train/original_loss": 1.4914956092834473, + "objective/train/theoretical_loss": 3.4030404126590685, + "objective/train/tokens_used": 2464952800, + "objective/train/value_avg": -0.00826263427734375, + "objective/train/value_loss": 0.00019185063138138503, + "objective/train/value_max": -5.066394805908203e-05, + "objective/train/value_min": -0.35595703125, + "objective/train/value_reward_corr": 0.7125985312722368, + "objective/train/value_std": 0.01407623291015625, + "objective/train/weight_avg": 1.0006788969039917, + "objective/train/weighted_lm_loss": 1.4919967651367188, + "objective/train/weights_max": 1.2016677856445312, + "objective/train/weights_min": 0.3696744740009308, + "theoretical_loss": 3.4030404126590685, + "tokens_seen": 2444492800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026183598138340556, + "loss": 0.0687, + "theoretical_loss": 3.4030404126590685, + "tokens_seen": 2444492800 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002617557374418231, + "loss": 0.0686, + "theoretical_loss": 3.403011418452091, + "tokens_seen": 2444754944 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026167549350024073, + "loss": 0.0682, + "theoretical_loss": 3.402982428224315, + "tokens_seen": 2445017088 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026159524955865834, + "loss": 0.0687, + "theoretical_loss": 3.4029534419747685, + "tokens_seen": 2445279232 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002615150056170759, + "loss": 0.0677, + "theoretical_loss": 3.4029244597024793, + "tokens_seen": 2445541376 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002614347616754935, + "loss": 0.0657, + "theoretical_loss": 3.402895481406475, + "tokens_seen": 2445803520 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026135451773391113, + "loss": 0.065, + "theoretical_loss": 3.4028665070857844, + "tokens_seen": 2446065664 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002612742737923287, + "loss": 0.0672, + "theoretical_loss": 3.402837536739436, + "tokens_seen": 2446327808 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026119402985074624, + "loss": 0.0671, + "theoretical_loss": 3.4028085703664583, + "tokens_seen": 2446589952 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026111378590916386, + "loss": 0.0672, + "theoretical_loss": 3.402779607965882, + "tokens_seen": 2446852096 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026103354196758147, + "loss": 0.0686, + "theoretical_loss": 3.4027506495367357, + "tokens_seen": 2447114240 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026095329802599903, + "loss": 0.0666, + "theoretical_loss": 3.4027216950780494, + "tokens_seen": 2447376384 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026087305408441664, + "loss": 0.0655, + "theoretical_loss": 3.4026927445888546, + "tokens_seen": 2447638528 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0009709555306471884, + "objective/train/docs_used": 890833, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2022894620895386, + "objective/train/original_loss": 1.2022895812988281, + "objective/train/theoretical_loss": 3.402678270832513, + "objective/train/tokens_used": 2468229600, + "objective/train/value_avg": -0.006378173828125, + "objective/train/value_loss": 0.00013867423695046455, + "objective/train/value_max": -7.140636444091797e-05, + "objective/train/value_min": -0.228515625, + "objective/train/value_reward_corr": 0.6964469034006823, + "objective/train/value_std": 0.0112457275390625, + "objective/train/weight_avg": 1.001035451889038, + "objective/train/weighted_lm_loss": 1.2034066915512085, + "objective/train/weights_max": 1.19721257686615, + "objective/train/weights_min": 0.37144145369529724, + "theoretical_loss": 3.402678270832513, + "tokens_seen": 2447769600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026079281014283425, + "loss": 0.061, + "theoretical_loss": 3.4026637980681813, + "tokens_seen": 2447900672 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002607125662012518, + "loss": 0.0663, + "theoretical_loss": 3.4026348555150605, + "tokens_seen": 2448162816 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026063232225966937, + "loss": 0.0668, + "theoretical_loss": 3.402605916928524, + "tokens_seen": 2448424960 + }, + { + "epoch": 0.74, + "learning_rate": 0.000260552078318087, + "loss": 0.0659, + "theoretical_loss": 3.4025769823076035, + "tokens_seen": 2448687104 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002604718343765046, + "loss": 0.0671, + "theoretical_loss": 3.402548051651331, + "tokens_seen": 2448949248 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026039159043492215, + "loss": 0.0689, + "theoretical_loss": 3.402519124958739, + "tokens_seen": 2449211392 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026031134649333977, + "loss": 0.0644, + "theoretical_loss": 3.4024902022288606, + "tokens_seen": 2449473536 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002602311025517574, + "loss": 0.0644, + "theoretical_loss": 3.4024612834607284, + "tokens_seen": 2449735680 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026015085861017494, + "loss": 0.0618, + "theoretical_loss": 3.402432368653376, + "tokens_seen": 2449997824 + }, + { + "epoch": 0.74, + "learning_rate": 0.00026007061466859255, + "loss": 0.0668, + "theoretical_loss": 3.4024034578058373, + "tokens_seen": 2450259968 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002599903707270101, + "loss": 0.0646, + "theoretical_loss": 3.4023745509171466, + "tokens_seen": 2450522112 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025991012678542767, + "loss": 0.0638, + "theoretical_loss": 3.4023456479863383, + "tokens_seen": 2450784256 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.000830281525850296, + "objective/train/docs_used": 891914, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.410335659980774, + "objective/train/original_loss": 1.4103357791900635, + "objective/train/theoretical_loss": 3.402316749012447, + "objective/train/tokens_used": 2471506400, + "objective/train/value_avg": -0.006683349609375, + "objective/train/value_loss": 0.00012804150173906237, + "objective/train/value_max": -2.4318695068359375e-05, + "objective/train/value_min": -0.2186279296875, + "objective/train/value_reward_corr": 0.6649692993754099, + "objective/train/value_std": 0.01036834716796875, + "objective/train/weight_avg": 1.0008869171142578, + "objective/train/weighted_lm_loss": 1.4112725257873535, + "objective/train/weights_max": 1.172834873199463, + "objective/train/weights_min": 0.3791404366493225, + "theoretical_loss": 3.402316749012447, + "tokens_seen": 2451046400 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002598298828438453, + "loss": 0.0649, + "theoretical_loss": 3.402316749012447, + "tokens_seen": 2451046400 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002597496389022629, + "loss": 0.0623, + "theoretical_loss": 3.4022878539945087, + "tokens_seen": 2451308544 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002596693949606805, + "loss": 0.0664, + "theoretical_loss": 3.402258962931558, + "tokens_seen": 2451570688 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025958915101909806, + "loss": 0.0644, + "theoretical_loss": 3.4022300758226307, + "tokens_seen": 2451832832 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002595089070775157, + "loss": 0.0658, + "theoretical_loss": 3.4022011926667637, + "tokens_seen": 2452094976 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025942866313593323, + "loss": 0.0637, + "theoretical_loss": 3.402172313462993, + "tokens_seen": 2452357120 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002593484191943508, + "loss": 0.0652, + "theoretical_loss": 3.4021434382103557, + "tokens_seen": 2452619264 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002592681752527684, + "loss": 0.0643, + "theoretical_loss": 3.4021145669078887, + "tokens_seen": 2452881408 + }, + { + "epoch": 0.74, + "learning_rate": 0.000259187931311186, + "loss": 0.0651, + "theoretical_loss": 3.40208569955463, + "tokens_seen": 2453143552 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025910768736960363, + "loss": 0.0651, + "theoretical_loss": 3.402056836149617, + "tokens_seen": 2453405696 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002590274434280212, + "loss": 0.0634, + "theoretical_loss": 3.4020279766918886, + "tokens_seen": 2453667840 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002589471994864388, + "loss": 0.0661, + "theoretical_loss": 3.401999121180482, + "tokens_seen": 2453929984 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025886695554485636, + "loss": 0.0604, + "theoretical_loss": 3.401970269614438, + "tokens_seen": 2454192128 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0005356064066290855, + "objective/train/docs_used": 893100, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3348721265792847, + "objective/train/original_loss": 1.3348722457885742, + "objective/train/theoretical_loss": 3.401955845310626, + "objective/train/tokens_used": 2474783200, + "objective/train/value_avg": -0.006534576416015625, + "objective/train/value_loss": 0.00014540940173901618, + "objective/train/value_max": -1.806020736694336e-05, + "objective/train/value_min": -0.311767578125, + "objective/train/value_reward_corr": 0.7150836623406631, + "objective/train/value_std": 0.0132293701171875, + "objective/train/weight_avg": 1.0006065368652344, + "objective/train/weighted_lm_loss": 1.335172414779663, + "objective/train/weights_max": 1.1883995532989502, + "objective/train/weights_min": 0.7188173532485962, + "theoretical_loss": 3.401955845310626, + "tokens_seen": 2454323200 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002587867116032739, + "loss": 0.065, + "theoretical_loss": 3.4019414219927944, + "tokens_seen": 2454454272 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025870646766169153, + "loss": 0.0622, + "theoretical_loss": 3.4019125783145907, + "tokens_seen": 2454716416 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025862622372010914, + "loss": 0.0629, + "theoretical_loss": 3.4018837385788676, + "tokens_seen": 2454978560 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025854597977852676, + "loss": 0.0666, + "theoretical_loss": 3.401854902784665, + "tokens_seen": 2455240704 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002584657358369443, + "loss": 0.0653, + "theoretical_loss": 3.4018260709310235, + "tokens_seen": 2455502848 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002583854918953619, + "loss": 0.0634, + "theoretical_loss": 3.4017972430169836, + "tokens_seen": 2455764992 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025830524795377954, + "loss": 0.0658, + "theoretical_loss": 3.4017684190415873, + "tokens_seen": 2456027136 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025822500401219704, + "loss": 0.0643, + "theoretical_loss": 3.4017395990038755, + "tokens_seen": 2456289280 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025814476007061466, + "loss": 0.0655, + "theoretical_loss": 3.4017107829028905, + "tokens_seen": 2456551424 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025806451612903227, + "loss": 0.0632, + "theoretical_loss": 3.401681970737674, + "tokens_seen": 2456813568 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002579842721874498, + "loss": 0.0653, + "theoretical_loss": 3.4016531625072695, + "tokens_seen": 2457075712 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025790402824586744, + "loss": 0.0672, + "theoretical_loss": 3.4016243582107193, + "tokens_seen": 2457337856 + }, + { + "epoch": 0.74, + "objective/train/advantage_avg": 0.0009694792679511011, + "objective/train/docs_used": 894282, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2399897575378418, + "objective/train/original_loss": 1.2399897575378418, + "objective/train/theoretical_loss": 3.4015955578470667, + "objective/train/tokens_used": 2478060000, + "objective/train/value_avg": -0.005939483642578125, + "objective/train/value_loss": 0.00014201925660017878, + "objective/train/value_max": -2.6702880859375e-05, + "objective/train/value_min": -0.2325439453125, + "objective/train/value_reward_corr": 0.7003671634648543, + "objective/train/value_std": 0.01181793212890625, + "objective/train/weight_avg": 1.0010325908660889, + "objective/train/weighted_lm_loss": 1.2412679195404053, + "objective/train/weights_max": 1.2071141004562378, + "objective/train/weights_min": 0.3796498477458954, + "theoretical_loss": 3.4015955578470667, + "tokens_seen": 2457600000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025782378430428505, + "loss": 0.063, + "theoretical_loss": 3.4015955578470667, + "tokens_seen": 2457600000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025774354036270266, + "loss": 0.0644, + "theoretical_loss": 3.4015667614153555, + "tokens_seen": 2457862144 + }, + { + "epoch": 0.74, + "learning_rate": 0.00025766329642112017, + "loss": 0.0632, + "theoretical_loss": 3.4015379689146292, + "tokens_seen": 2458124288 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002575830524795378, + "loss": 0.0661, + "theoretical_loss": 3.4015091803439326, + "tokens_seen": 2458386432 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002575028085379554, + "loss": 0.0669, + "theoretical_loss": 3.4014803957023094, + "tokens_seen": 2458648576 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025742256459637295, + "loss": 0.0644, + "theoretical_loss": 3.4014516149888054, + "tokens_seen": 2458910720 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025734232065479057, + "loss": 0.0626, + "theoretical_loss": 3.401422838202466, + "tokens_seen": 2459172864 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002572620767132082, + "loss": 0.0618, + "theoretical_loss": 3.401394065342336, + "tokens_seen": 2459435008 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002571818327716258, + "loss": 0.0662, + "theoretical_loss": 3.4013652964074614, + "tokens_seen": 2459697152 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025710158883004335, + "loss": 0.0656, + "theoretical_loss": 3.4013365313968893, + "tokens_seen": 2459959296 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002570213448884609, + "loss": 0.0656, + "theoretical_loss": 3.4013077703096655, + "tokens_seen": 2460221440 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002569411009468785, + "loss": 0.065, + "theoretical_loss": 3.4012790131448374, + "tokens_seen": 2460483584 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002568608570052961, + "loss": 0.061, + "theoretical_loss": 3.4012502599014516, + "tokens_seen": 2460745728 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": -0.00022634593187831342, + "objective/train/docs_used": 895477, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3499811887741089, + "objective/train/original_loss": 1.3499810695648193, + "objective/train/theoretical_loss": 3.401235884750002, + "objective/train/tokens_used": 2481336800, + "objective/train/value_avg": -0.006145477294921875, + "objective/train/value_loss": 0.00018551845278125256, + "objective/train/value_max": -8.094310760498047e-05, + "objective/train/value_min": -0.245849609375, + "objective/train/value_reward_corr": 0.6423102891155956, + "objective/train/value_std": 0.01097869873046875, + "objective/train/weight_avg": 0.99985671043396, + "objective/train/weighted_lm_loss": 1.350335955619812, + "objective/train/weights_max": 1.1451069116592407, + "objective/train/weights_min": 0.3721874952316284, + "theoretical_loss": 3.401235884750002, + "tokens_seen": 2460876800 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002567806130637137, + "loss": 0.0643, + "theoretical_loss": 3.4012215105785564, + "tokens_seen": 2461007872 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002567003691221313, + "loss": 0.0642, + "theoretical_loss": 3.401192765175199, + "tokens_seen": 2461270016 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002566201251805489, + "loss": 0.0656, + "theoretical_loss": 3.4011640236904284, + "tokens_seen": 2461532160 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002565398812389665, + "loss": 0.062, + "theoretical_loss": 3.401135286123293, + "tokens_seen": 2461794304 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025645963729738403, + "loss": 0.0647, + "theoretical_loss": 3.401106552472841, + "tokens_seen": 2462056448 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025637939335580165, + "loss": 0.0651, + "theoretical_loss": 3.4010778227381233, + "tokens_seen": 2462318592 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002562991494142192, + "loss": 0.0651, + "theoretical_loss": 3.4010490969181877, + "tokens_seen": 2462580736 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002562189054726368, + "loss": 0.0665, + "theoretical_loss": 3.401020375012085, + "tokens_seen": 2462842880 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025613866153105443, + "loss": 0.0659, + "theoretical_loss": 3.400991657018865, + "tokens_seen": 2463105024 + }, + { + "epoch": 0.75, + "learning_rate": 0.000256058417589472, + "loss": 0.0661, + "theoretical_loss": 3.400962942937579, + "tokens_seen": 2463367168 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002559781736478896, + "loss": 0.0644, + "theoretical_loss": 3.400934232767277, + "tokens_seen": 2463629312 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025589792970630716, + "loss": 0.0629, + "theoretical_loss": 3.400905526507011, + "tokens_seen": 2463891456 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": -0.00027899775886908174, + "objective/train/docs_used": 896700, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1354491710662842, + "objective/train/original_loss": 1.1354490518569946, + "objective/train/theoretical_loss": 3.4008768241558323, + "objective/train/tokens_used": 2484613600, + "objective/train/value_avg": -0.01045989990234375, + "objective/train/value_loss": 0.00030195803265087306, + "objective/train/value_max": -1.2993812561035156e-05, + "objective/train/value_min": -0.337890625, + "objective/train/value_reward_corr": 0.8496636557732059, + "objective/train/value_std": 0.027252197265625, + "objective/train/weight_avg": 0.9998582005500793, + "objective/train/weighted_lm_loss": 1.1343961954116821, + "objective/train/weights_max": 1.1410603523254395, + "objective/train/weights_min": 0.369240403175354, + "theoretical_loss": 3.4008768241558323, + "tokens_seen": 2464153600 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025581768576472477, + "loss": 0.0647, + "theoretical_loss": 3.4008768241558323, + "tokens_seen": 2464153600 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025573744182314233, + "loss": 0.066, + "theoretical_loss": 3.4008481257127925, + "tokens_seen": 2464415744 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025565719788155994, + "loss": 0.0632, + "theoretical_loss": 3.400819431176944, + "tokens_seen": 2464677888 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025557695393997755, + "loss": 0.0619, + "theoretical_loss": 3.4007907405473405, + "tokens_seen": 2464940032 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002554967099983951, + "loss": 0.0665, + "theoretical_loss": 3.400762053823033, + "tokens_seen": 2465202176 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002554164660568127, + "loss": 0.064, + "theoretical_loss": 3.400733371003076, + "tokens_seen": 2465464320 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025533622211523034, + "loss": 0.0665, + "theoretical_loss": 3.4007046920865225, + "tokens_seen": 2465726464 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002552559781736479, + "loss": 0.063, + "theoretical_loss": 3.4006760170724264, + "tokens_seen": 2465988608 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025517573423206546, + "loss": 0.0665, + "theoretical_loss": 3.4006473459598423, + "tokens_seen": 2466250752 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025509549029048307, + "loss": 0.0662, + "theoretical_loss": 3.4006186787478243, + "tokens_seen": 2466512896 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002550152463489007, + "loss": 0.0657, + "theoretical_loss": 3.4005900154354274, + "tokens_seen": 2466775040 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025493500240731824, + "loss": 0.0653, + "theoretical_loss": 3.400561356021707, + "tokens_seen": 2467037184 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025485475846573585, + "loss": 0.0663, + "theoretical_loss": 3.400532700505719, + "tokens_seen": 2467299328 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": -0.0005027269362471998, + "objective/train/docs_used": 897881, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.282994031906128, + "objective/train/original_loss": 1.282994031906128, + "objective/train/theoretical_loss": 3.4005183742090788, + "objective/train/tokens_used": 2487890400, + "objective/train/value_avg": -0.0072021484375, + "objective/train/value_loss": 0.0002903847489506006, + "objective/train/value_max": -3.707408905029297e-05, + "objective/train/value_min": -0.595703125, + "objective/train/value_reward_corr": 0.7093589137672446, + "objective/train/value_std": 0.0144195556640625, + "objective/train/weight_avg": 0.9996247887611389, + "objective/train/weighted_lm_loss": 1.2816740274429321, + "objective/train/weights_max": 1.3052014112472534, + "objective/train/weights_min": 0.3687020242214203, + "theoretical_loss": 3.4005183742090788, + "tokens_seen": 2467430400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025477451452415346, + "loss": 0.0669, + "theoretical_loss": 3.400504048886518, + "tokens_seen": 2467561472 + }, + { + "epoch": 0.75, + "learning_rate": 0.000254694270582571, + "loss": 0.0644, + "theoretical_loss": 3.4004754011631615, + "tokens_seen": 2467823616 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002546140266409886, + "loss": 0.0645, + "theoretical_loss": 3.4004467573347053, + "tokens_seen": 2468085760 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002545337826994062, + "loss": 0.0659, + "theoretical_loss": 3.400418117400206, + "tokens_seen": 2468347904 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002544535387578238, + "loss": 0.0639, + "theoretical_loss": 3.4003894813587223, + "tokens_seen": 2468610048 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025437329481624136, + "loss": 0.0644, + "theoretical_loss": 3.4003608492093096, + "tokens_seen": 2468872192 + }, + { + "epoch": 0.75, + "learning_rate": 0.000254293050874659, + "loss": 0.0652, + "theoretical_loss": 3.400332220951027, + "tokens_seen": 2469134336 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002542128069330766, + "loss": 0.065, + "theoretical_loss": 3.400303596582933, + "tokens_seen": 2469396480 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002541325629914941, + "loss": 0.063, + "theoretical_loss": 3.4002749761040847, + "tokens_seen": 2469658624 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002540523190499117, + "loss": 0.0644, + "theoretical_loss": 3.400246359513542, + "tokens_seen": 2469920768 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002539720751083293, + "loss": 0.0678, + "theoretical_loss": 3.4002177468103643, + "tokens_seen": 2470182912 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025389183116674693, + "loss": 0.0646, + "theoretical_loss": 3.40018913799361, + "tokens_seen": 2470445056 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": -0.0009792648488655686, + "objective/train/docs_used": 899059, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.327161431312561, + "objective/train/original_loss": 1.3271613121032715, + "objective/train/theoretical_loss": 3.4001605330623397, + "objective/train/tokens_used": 2491167200, + "objective/train/value_avg": -0.01041412353515625, + "objective/train/value_loss": 0.00031207365100272, + "objective/train/value_max": -2.5272369384765625e-05, + "objective/train/value_min": -0.96484375, + "objective/train/value_reward_corr": 0.8798099044091047, + "objective/train/value_std": 0.0277099609375, + "objective/train/weight_avg": 0.9991644620895386, + "objective/train/weighted_lm_loss": 1.3267914056777954, + "objective/train/weights_max": 1.9819515943527222, + "objective/train/weights_min": 0.3967057168483734, + "theoretical_loss": 3.4001605330623397, + "tokens_seen": 2470707200 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002538115872251645, + "loss": 0.0634, + "theoretical_loss": 3.4001605330623397, + "tokens_seen": 2470707200 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002537313432835821, + "loss": 0.0644, + "theoretical_loss": 3.400131932015613, + "tokens_seen": 2470969344 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002536510993419997, + "loss": 0.0634, + "theoretical_loss": 3.4001033348524907, + "tokens_seen": 2471231488 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002535708554004173, + "loss": 0.0662, + "theoretical_loss": 3.4000747415720336, + "tokens_seen": 2471493632 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025349061145883483, + "loss": 0.0673, + "theoretical_loss": 3.4000461521733025, + "tokens_seen": 2471755776 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025341036751725244, + "loss": 0.0655, + "theoretical_loss": 3.400017566655359, + "tokens_seen": 2472017920 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025333012357567006, + "loss": 0.0647, + "theoretical_loss": 3.3999889850172655, + "tokens_seen": 2472280064 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002532498796340876, + "loss": 0.0642, + "theoretical_loss": 3.399960407258083, + "tokens_seen": 2472542208 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025316963569250523, + "loss": 0.0647, + "theoretical_loss": 3.3999318333768747, + "tokens_seen": 2472804352 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025308939175092284, + "loss": 0.0619, + "theoretical_loss": 3.3999032633727024, + "tokens_seen": 2473066496 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002530091478093404, + "loss": 0.0675, + "theoretical_loss": 3.3998746972446305, + "tokens_seen": 2473328640 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025292890386775796, + "loss": 0.0673, + "theoretical_loss": 3.3998461349917215, + "tokens_seen": 2473590784 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025284865992617557, + "loss": 0.0644, + "theoretical_loss": 3.3998175766130396, + "tokens_seen": 2473852928 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.0003516378055792302, + "objective/train/docs_used": 900350, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.250746250152588, + "objective/train/original_loss": 1.250746250152588, + "objective/train/theoretical_loss": 3.399803298876241, + "objective/train/tokens_used": 2494444000, + "objective/train/value_avg": -0.00687408447265625, + "objective/train/value_loss": 0.00016971146396826953, + "objective/train/value_max": -4.1961669921875e-05, + "objective/train/value_min": -0.227783203125, + "objective/train/value_reward_corr": 0.6703260986924842, + "objective/train/value_std": 0.012237548828125, + "objective/train/weight_avg": 1.0004278421401978, + "objective/train/weighted_lm_loss": 1.2503207921981812, + "objective/train/weights_max": 1.2307718992233276, + "objective/train/weights_min": 0.40160250663757324, + "theoretical_loss": 3.399803298876241, + "tokens_seen": 2473984000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002527684159845932, + "loss": 0.0675, + "theoretical_loss": 3.399789022107648, + "tokens_seen": 2474115072 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025268817204301074, + "loss": 0.0657, + "theoretical_loss": 3.399760471474612, + "tokens_seen": 2474377216 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025260792810142835, + "loss": 0.0654, + "theoretical_loss": 3.399731924712996, + "tokens_seen": 2474639360 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025252768415984597, + "loss": 0.0645, + "theoretical_loss": 3.3997033818218645, + "tokens_seen": 2474901504 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002524474402182635, + "loss": 0.0676, + "theoretical_loss": 3.3996748428002834, + "tokens_seen": 2475163648 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025236719627668114, + "loss": 0.0666, + "theoretical_loss": 3.3996463076473185, + "tokens_seen": 2475425792 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002522869523350987, + "loss": 0.0659, + "theoretical_loss": 3.399617776362035, + "tokens_seen": 2475687936 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002522067083935163, + "loss": 0.0632, + "theoretical_loss": 3.3995892489435007, + "tokens_seen": 2475950080 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025212646445193387, + "loss": 0.065, + "theoretical_loss": 3.3995607253907805, + "tokens_seen": 2476212224 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002520462205103515, + "loss": 0.0658, + "theoretical_loss": 3.3995322057029425, + "tokens_seen": 2476474368 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002519659765687691, + "loss": 0.0639, + "theoretical_loss": 3.3995036898790536, + "tokens_seen": 2476736512 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025188573262718665, + "loss": 0.0641, + "theoretical_loss": 3.3994751779181813, + "tokens_seen": 2476998656 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.0007612823974341154, + "objective/train/docs_used": 901501, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2392642498016357, + "objective/train/original_loss": 1.2392642498016357, + "objective/train/theoretical_loss": 3.399446669819394, + "objective/train/tokens_used": 2497720800, + "objective/train/value_avg": -0.00814056396484375, + "objective/train/value_loss": 0.00025297680986113846, + "objective/train/value_max": -8.416175842285156e-05, + "objective/train/value_min": -0.34326171875, + "objective/train/value_reward_corr": 0.6215869301992545, + "objective/train/value_std": 0.012786865234375, + "objective/train/weight_avg": 1.000868558883667, + "objective/train/weighted_lm_loss": 1.240895390510559, + "objective/train/weights_max": 1.3520135879516602, + "objective/train/weights_min": 0.3843126595020294, + "theoretical_loss": 3.399446669819394, + "tokens_seen": 2477260800 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025180548868560426, + "loss": 0.0639, + "theoretical_loss": 3.399446669819394, + "tokens_seen": 2477260800 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002517252447440218, + "loss": 0.066, + "theoretical_loss": 3.3994181655817592, + "tokens_seen": 2477522944 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002516450008024394, + "loss": 0.0634, + "theoretical_loss": 3.3993896652043465, + "tokens_seen": 2477785088 + }, + { + "epoch": 0.75, + "learning_rate": 0.000251564756860857, + "loss": 0.0664, + "theoretical_loss": 3.399361168686224, + "tokens_seen": 2478047232 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002514845129192746, + "loss": 0.0654, + "theoretical_loss": 3.399332676026461, + "tokens_seen": 2478309376 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002514042689776922, + "loss": 0.0686, + "theoretical_loss": 3.399304187224127, + "tokens_seen": 2478571520 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002513240250361098, + "loss": 0.066, + "theoretical_loss": 3.3992757022782927, + "tokens_seen": 2478833664 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002512437810945274, + "loss": 0.0666, + "theoretical_loss": 3.3992472211880274, + "tokens_seen": 2479095808 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025116353715294495, + "loss": 0.0667, + "theoretical_loss": 3.3992187439524018, + "tokens_seen": 2479357952 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002510832932113625, + "loss": 0.0659, + "theoretical_loss": 3.399190270570487, + "tokens_seen": 2479620096 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002510030492697801, + "loss": 0.0666, + "theoretical_loss": 3.3991618010413536, + "tokens_seen": 2479882240 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025092280532819773, + "loss": 0.0676, + "theoretical_loss": 3.3991333353640742, + "tokens_seen": 2480144384 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025084256138661534, + "loss": 0.0689, + "theoretical_loss": 3.3991048735377194, + "tokens_seen": 2480406528 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": -0.00028219091473147273, + "objective/train/docs_used": 902720, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4025236368179321, + "objective/train/original_loss": 1.4025237560272217, + "objective/train/theoretical_loss": 3.399090644068349, + "objective/train/tokens_used": 2500997600, + "objective/train/value_avg": -0.00506591796875, + "objective/train/value_loss": 0.0002538026019465178, + "objective/train/value_max": -1.3828277587890625e-05, + "objective/train/value_min": -0.50634765625, + "objective/train/value_reward_corr": 0.6467943607471219, + "objective/train/value_std": 0.011993408203125, + "objective/train/weight_avg": 0.999829113483429, + "objective/train/weighted_lm_loss": 1.4015452861785889, + "objective/train/weights_max": 1.1559211015701294, + "objective/train/weights_min": 0.3705088496208191, + "theoretical_loss": 3.399090644068349, + "tokens_seen": 2480537600 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002507623174450329, + "loss": 0.0663, + "theoretical_loss": 3.399076415561362, + "tokens_seen": 2480668672 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002506820735034505, + "loss": 0.0669, + "theoretical_loss": 3.399047961434074, + "tokens_seen": 2480930816 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025060182956186813, + "loss": 0.0654, + "theoretical_loss": 3.399019511154929, + "tokens_seen": 2481192960 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025052158562028563, + "loss": 0.0647, + "theoretical_loss": 3.398991064722999, + "tokens_seen": 2481455104 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025044134167870324, + "loss": 0.0676, + "theoretical_loss": 3.3989626221373586, + "tokens_seen": 2481717248 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025036109773712086, + "loss": 0.0657, + "theoretical_loss": 3.3989341833970803, + "tokens_seen": 2481979392 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025028085379553847, + "loss": 0.0642, + "theoretical_loss": 3.398905748501239, + "tokens_seen": 2482241536 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025020060985395603, + "loss": 0.0657, + "theoretical_loss": 3.3988773174489086, + "tokens_seen": 2482503680 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025012036591237364, + "loss": 0.0667, + "theoretical_loss": 3.3988488902391643, + "tokens_seen": 2482765824 + }, + { + "epoch": 0.75, + "learning_rate": 0.00025004012197079125, + "loss": 0.0651, + "theoretical_loss": 3.3988204668710806, + "tokens_seen": 2483027968 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002499598780292088, + "loss": 0.0648, + "theoretical_loss": 3.398792047343733, + "tokens_seen": 2483290112 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002498796340876264, + "loss": 0.0669, + "theoretical_loss": 3.3987636316561978, + "tokens_seen": 2483552256 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.0003041817108169198, + "objective/train/docs_used": 903880, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3639494180679321, + "objective/train/original_loss": 1.3639495372772217, + "objective/train/theoretical_loss": 3.3987352198075502, + "objective/train/tokens_used": 2504274400, + "objective/train/value_avg": -0.005992889404296875, + "objective/train/value_loss": 0.00011486706353025511, + "objective/train/value_max": -3.3736228942871094e-05, + "objective/train/value_min": -0.3359375, + "objective/train/value_reward_corr": 0.7736587933929978, + "objective/train/value_std": 0.01282501220703125, + "objective/train/weight_avg": 1.0003571510314941, + "objective/train/weighted_lm_loss": 1.36470627784729, + "objective/train/weights_max": 1.13873291015625, + "objective/train/weights_min": 0.3690531253814697, + "theoretical_loss": 3.3987352198075502, + "tokens_seen": 2483814400 + }, + { + "epoch": 0.75, + "learning_rate": 0.000249799390146044, + "loss": 0.0663, + "theoretical_loss": 3.3987352198075502, + "tokens_seen": 2483814400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024971914620446154, + "loss": 0.0667, + "theoretical_loss": 3.3987068117968664, + "tokens_seen": 2484076544 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024963890226287915, + "loss": 0.0635, + "theoretical_loss": 3.3986784076232235, + "tokens_seen": 2484338688 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024955865832129677, + "loss": 0.0625, + "theoretical_loss": 3.3986500072856987, + "tokens_seen": 2484600832 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002494784143797143, + "loss": 0.0642, + "theoretical_loss": 3.398621610783368, + "tokens_seen": 2484862976 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024939817043813194, + "loss": 0.0666, + "theoretical_loss": 3.3985932181153107, + "tokens_seen": 2485125120 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024931792649654955, + "loss": 0.0654, + "theoretical_loss": 3.398564829280603, + "tokens_seen": 2485387264 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002492376825549671, + "loss": 0.0648, + "theoretical_loss": 3.3985364442783244, + "tokens_seen": 2485649408 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024915743861338467, + "loss": 0.0654, + "theoretical_loss": 3.398508063107553, + "tokens_seen": 2485911552 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002490771946718023, + "loss": 0.0661, + "theoretical_loss": 3.3984796857673674, + "tokens_seen": 2486173696 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002489969507302199, + "loss": 0.0653, + "theoretical_loss": 3.3984513122568467, + "tokens_seen": 2486435840 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024891670678863745, + "loss": 0.0677, + "theoretical_loss": 3.3984229425750714, + "tokens_seen": 2486697984 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024883646284705506, + "loss": 0.0654, + "theoretical_loss": 3.3983945767211203, + "tokens_seen": 2486960128 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": -8.536610403098166e-05, + "objective/train/docs_used": 905256, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3416714668273926, + "objective/train/original_loss": 1.3416712284088135, + "objective/train/theoretical_loss": 3.398380395229291, + "objective/train/tokens_used": 2507551200, + "objective/train/value_avg": -0.00720977783203125, + "objective/train/value_loss": 0.00013160381058696657, + "objective/train/value_max": -3.451108932495117e-05, + "objective/train/value_min": -0.469970703125, + "objective/train/value_reward_corr": 0.7648471489080964, + "objective/train/value_std": 0.01303863525390625, + "objective/train/weight_avg": 0.9999786019325256, + "objective/train/weighted_lm_loss": 1.3416770696640015, + "objective/train/weights_max": 1.2009410858154297, + "objective/train/weights_min": 0.5534672141075134, + "theoretical_loss": 3.398380395229291, + "tokens_seen": 2487091200 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002487562189054726, + "loss": 0.0661, + "theoretical_loss": 3.3983662146940734, + "tokens_seen": 2487222272 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024867597496389023, + "loss": 0.0661, + "theoretical_loss": 3.398337856493012, + "tokens_seen": 2487484416 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002485957310223078, + "loss": 0.0663, + "theoretical_loss": 3.3983095021170158, + "tokens_seen": 2487746560 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002485154870807254, + "loss": 0.0664, + "theoretical_loss": 3.398281151565167, + "tokens_seen": 2488008704 + }, + { + "epoch": 0.75, + "learning_rate": 0.000248435243139143, + "loss": 0.0695, + "theoretical_loss": 3.398252804836546, + "tokens_seen": 2488270848 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002483549991975606, + "loss": 0.0662, + "theoretical_loss": 3.3982244619302358, + "tokens_seen": 2488532992 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002482747552559782, + "loss": 0.0677, + "theoretical_loss": 3.398196122845317, + "tokens_seen": 2488795136 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024819451131439575, + "loss": 0.0673, + "theoretical_loss": 3.3981677875808725, + "tokens_seen": 2489057280 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024811426737281336, + "loss": 0.0649, + "theoretical_loss": 3.3981394561359854, + "tokens_seen": 2489319424 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002480340234312309, + "loss": 0.0682, + "theoretical_loss": 3.398111128509738, + "tokens_seen": 2489581568 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024795377948964853, + "loss": 0.0651, + "theoretical_loss": 3.3980828047012146, + "tokens_seen": 2489843712 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024787353554806614, + "loss": 0.0636, + "theoretical_loss": 3.3980544847094976, + "tokens_seen": 2490105856 + }, + { + "epoch": 0.75, + "objective/train/advantage_avg": 0.00030044748564250767, + "objective/train/docs_used": 906478, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2353644371032715, + "objective/train/original_loss": 1.2353644371032715, + "objective/train/theoretical_loss": 3.3980261685336712, + "objective/train/tokens_used": 2510828000, + "objective/train/value_avg": -0.0033092498779296875, + "objective/train/value_loss": 0.00019104246166534722, + "objective/train/value_max": -3.647804260253906e-05, + "objective/train/value_min": -0.28515625, + "objective/train/value_reward_corr": 0.4723460212474383, + "objective/train/value_std": 0.006641387939453125, + "objective/train/weight_avg": 1.0003799200057983, + "objective/train/weighted_lm_loss": 1.2356609106063843, + "objective/train/weights_max": 1.205199956893921, + "objective/train/weights_min": 0.3687315881252289, + "theoretical_loss": 3.3980261685336712, + "tokens_seen": 2490368000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002477932916064837, + "loss": 0.0656, + "theoretical_loss": 3.3980261685336712, + "tokens_seen": 2490368000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002477130476649013, + "loss": 0.0636, + "theoretical_loss": 3.39799785617282, + "tokens_seen": 2490630144 + }, + { + "epoch": 0.75, + "learning_rate": 0.00024763280372331887, + "loss": 0.0663, + "theoretical_loss": 3.3979695476260288, + "tokens_seen": 2490892288 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002475525597817365, + "loss": 0.0668, + "theoretical_loss": 3.397941242892382, + "tokens_seen": 2491154432 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024747231584015404, + "loss": 0.0696, + "theoretical_loss": 3.397912941970965, + "tokens_seen": 2491416576 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024739207189857166, + "loss": 0.0677, + "theoretical_loss": 3.3978846448608633, + "tokens_seen": 2491678720 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024731182795698927, + "loss": 0.0668, + "theoretical_loss": 3.3978563515611624, + "tokens_seen": 2491940864 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024723158401540683, + "loss": 0.0676, + "theoretical_loss": 3.397828062070949, + "tokens_seen": 2492203008 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024715134007382444, + "loss": 0.0638, + "theoretical_loss": 3.3977997763893093, + "tokens_seen": 2492465152 + }, + { + "epoch": 0.76, + "learning_rate": 0.000247071096132242, + "loss": 0.069, + "theoretical_loss": 3.3977714945153297, + "tokens_seen": 2492727296 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002469908521906596, + "loss": 0.0652, + "theoretical_loss": 3.397743216448098, + "tokens_seen": 2492989440 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002469106082490772, + "loss": 0.0668, + "theoretical_loss": 3.3977149421867012, + "tokens_seen": 2493251584 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002468303643074948, + "loss": 0.0684, + "theoretical_loss": 3.397686671730227, + "tokens_seen": 2493513728 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.0004943067324347794, + "objective/train/docs_used": 907814, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2799855470657349, + "objective/train/original_loss": 1.2799855470657349, + "objective/train/theoretical_loss": 3.3976725379285506, + "objective/train/tokens_used": 2514104800, + "objective/train/value_avg": -0.0073394775390625, + "objective/train/value_loss": 0.00021812194609083235, + "objective/train/value_max": -1.4722347259521484e-05, + "objective/train/value_min": -0.232177734375, + "objective/train/value_reward_corr": 0.6116615900831515, + "objective/train/value_std": 0.01178741455078125, + "objective/train/weight_avg": 1.0005911588668823, + "objective/train/weighted_lm_loss": 1.2801631689071655, + "objective/train/weights_max": 1.1481863260269165, + "objective/train/weights_min": 0.3702644109725952, + "theoretical_loss": 3.3976725379285506, + "tokens_seen": 2493644800 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002467501203659124, + "loss": 0.0656, + "theoretical_loss": 3.3976584050777636, + "tokens_seen": 2493775872 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024666987642432995, + "loss": 0.0645, + "theoretical_loss": 3.397630142228399, + "tokens_seen": 2494038016 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024658963248274757, + "loss": 0.0657, + "theoretical_loss": 3.3976018831812222, + "tokens_seen": 2494300160 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002465093885411651, + "loss": 0.0661, + "theoretical_loss": 3.3975736279353224, + "tokens_seen": 2494562304 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024642914459958274, + "loss": 0.063, + "theoretical_loss": 3.3975453764897883, + "tokens_seen": 2494824448 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024634890065800035, + "loss": 0.0651, + "theoretical_loss": 3.3975171288437096, + "tokens_seen": 2495086592 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002462686567164179, + "loss": 0.0644, + "theoretical_loss": 3.397488884996177, + "tokens_seen": 2495348736 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002461884127748355, + "loss": 0.0657, + "theoretical_loss": 3.3974606449462796, + "tokens_seen": 2495610880 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002461081688332531, + "loss": 0.0655, + "theoretical_loss": 3.3974324086931085, + "tokens_seen": 2495873024 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002460279248916707, + "loss": 0.0662, + "theoretical_loss": 3.3974041762357547, + "tokens_seen": 2496135168 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024594768095008825, + "loss": 0.0658, + "theoretical_loss": 3.397375947573309, + "tokens_seen": 2496397312 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024586743700850586, + "loss": 0.0647, + "theoretical_loss": 3.397347722704863, + "tokens_seen": 2496659456 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.0017170101637020707, + "objective/train/docs_used": 908925, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3087087869644165, + "objective/train/original_loss": 1.308708667755127, + "objective/train/theoretical_loss": 3.397319501629509, + "objective/train/tokens_used": 2517381600, + "objective/train/value_avg": -0.011444091796875, + "objective/train/value_loss": 0.00022468052338808775, + "objective/train/value_max": -0.00010311603546142578, + "objective/train/value_min": -0.313720703125, + "objective/train/value_reward_corr": 0.7452039655829534, + "objective/train/value_std": 0.0175933837890625, + "objective/train/weight_avg": 1.0018271207809448, + "objective/train/weighted_lm_loss": 1.3122689723968506, + "objective/train/weights_max": 1.1817450523376465, + "objective/train/weights_min": 0.718687117099762, + "theoretical_loss": 3.397319501629509, + "tokens_seen": 2496921600 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002457871930669235, + "loss": 0.0665, + "theoretical_loss": 3.397319501629509, + "tokens_seen": 2496921600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024570694912534103, + "loss": 0.0645, + "theoretical_loss": 3.397291284346338, + "tokens_seen": 2497183744 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024562670518375865, + "loss": 0.0652, + "theoretical_loss": 3.397263070854444, + "tokens_seen": 2497445888 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002455464612421762, + "loss": 0.066, + "theoretical_loss": 3.397234861152918, + "tokens_seen": 2497708032 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002454662173005938, + "loss": 0.0661, + "theoretical_loss": 3.397206655240854, + "tokens_seen": 2497970176 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002453859733590114, + "loss": 0.0638, + "theoretical_loss": 3.3971784531173457, + "tokens_seen": 2498232320 + }, + { + "epoch": 0.76, + "learning_rate": 0.000245305729417429, + "loss": 0.0638, + "theoretical_loss": 3.397150254781486, + "tokens_seen": 2498494464 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002452254854758466, + "loss": 0.0672, + "theoretical_loss": 3.3971220602323693, + "tokens_seen": 2498756608 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024514524153426416, + "loss": 0.0666, + "theoretical_loss": 3.3970938694690895, + "tokens_seen": 2499018752 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024506499759268177, + "loss": 0.0636, + "theoretical_loss": 3.3970656824907417, + "tokens_seen": 2499280896 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024498475365109933, + "loss": 0.0647, + "theoretical_loss": 3.3970374992964207, + "tokens_seen": 2499543040 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024490450970951694, + "loss": 0.0664, + "theoretical_loss": 3.3970093198852216, + "tokens_seen": 2499805184 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024482426576793456, + "loss": 0.0669, + "theoretical_loss": 3.3969811442562396, + "tokens_seen": 2500067328 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 3.514278068905696e-05, + "objective/train/docs_used": 910166, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2338531017303467, + "objective/train/original_loss": 1.233852744102478, + "objective/train/theoretical_loss": 3.396967057859798, + "objective/train/tokens_used": 2520658400, + "objective/train/value_avg": -0.00606536865234375, + "objective/train/value_loss": 0.00012258593051228672, + "objective/train/value_max": -2.8192996978759766e-05, + "objective/train/value_min": -0.167236328125, + "objective/train/value_reward_corr": 0.6512071554286463, + "objective/train/value_std": 0.00945281982421875, + "objective/train/weight_avg": 1.0000883340835571, + "objective/train/weighted_lm_loss": 1.234293818473816, + "objective/train/weights_max": 1.0883424282073975, + "objective/train/weights_min": 0.37021923065185547, + "theoretical_loss": 3.396967057859798, + "tokens_seen": 2500198400 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002447440218263521, + "loss": 0.0627, + "theoretical_loss": 3.396952972408571, + "tokens_seen": 2500329472 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002446637778847697, + "loss": 0.0656, + "theoretical_loss": 3.3969248043413125, + "tokens_seen": 2500591616 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002445835339431873, + "loss": 0.0641, + "theoretical_loss": 3.3968966400535594, + "tokens_seen": 2500853760 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002445032900016049, + "loss": 0.0647, + "theoretical_loss": 3.396868479544409, + "tokens_seen": 2501115904 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024442304606002246, + "loss": 0.0629, + "theoretical_loss": 3.3968403228129587, + "tokens_seen": 2501378048 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024434280211844007, + "loss": 0.0613, + "theoretical_loss": 3.396812169858306, + "tokens_seen": 2501640192 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002442625581768577, + "loss": 0.0648, + "theoretical_loss": 3.396784020679547, + "tokens_seen": 2501902336 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024418231423527524, + "loss": 0.0662, + "theoretical_loss": 3.3967558752757823, + "tokens_seen": 2502164480 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024410207029369282, + "loss": 0.0682, + "theoretical_loss": 3.396727733646108, + "tokens_seen": 2502426624 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024402182635211044, + "loss": 0.0652, + "theoretical_loss": 3.396699595789624, + "tokens_seen": 2502688768 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024394158241052802, + "loss": 0.0667, + "theoretical_loss": 3.396671461705429, + "tokens_seen": 2502950912 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024386133846894558, + "loss": 0.0652, + "theoretical_loss": 3.396643331392622, + "tokens_seen": 2503213056 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": -0.0003344157012179494, + "objective/train/docs_used": 911421, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.414215326309204, + "objective/train/original_loss": 1.414215326309204, + "objective/train/theoretical_loss": 3.3966152048503027, + "objective/train/tokens_used": 2523935200, + "objective/train/value_avg": -0.010101318359375, + "objective/train/value_loss": 0.0001809104287531227, + "objective/train/value_max": -5.739927291870117e-05, + "objective/train/value_min": -0.25341796875, + "objective/train/value_reward_corr": 0.7323826354438097, + "objective/train/value_std": 0.01412200927734375, + "objective/train/weight_avg": 0.999748945236206, + "objective/train/weighted_lm_loss": 1.4127529859542847, + "objective/train/weights_max": 1.0985801219940186, + "objective/train/weights_min": 0.38993605971336365, + "theoretical_loss": 3.3966152048503027, + "tokens_seen": 2503475200 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002437810945273632, + "loss": 0.0665, + "theoretical_loss": 3.3966152048503027, + "tokens_seen": 2503475200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024370085058578078, + "loss": 0.0629, + "theoretical_loss": 3.396587082077571, + "tokens_seen": 2503737344 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024362060664419836, + "loss": 0.0672, + "theoretical_loss": 3.3965589630735273, + "tokens_seen": 2503999488 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024354036270261595, + "loss": 0.0623, + "theoretical_loss": 3.3965308478372718, + "tokens_seen": 2504261632 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024346011876103356, + "loss": 0.0652, + "theoretical_loss": 3.3965027363679052, + "tokens_seen": 2504523776 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024337987481945115, + "loss": 0.0652, + "theoretical_loss": 3.3964746286645293, + "tokens_seen": 2504785920 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002432996308778687, + "loss": 0.063, + "theoretical_loss": 3.3964465247262448, + "tokens_seen": 2505048064 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024321938693628632, + "loss": 0.0649, + "theoretical_loss": 3.3964184245521536, + "tokens_seen": 2505310208 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002431391429947039, + "loss": 0.0674, + "theoretical_loss": 3.396390328141358, + "tokens_seen": 2505572352 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024305889905312152, + "loss": 0.0655, + "theoretical_loss": 3.3963622354929597, + "tokens_seen": 2505834496 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024297865511153908, + "loss": 0.0653, + "theoretical_loss": 3.396334146606062, + "tokens_seen": 2506096640 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024289841116995666, + "loss": 0.0632, + "theoretical_loss": 3.396306061479768, + "tokens_seen": 2506358784 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024281816722837427, + "loss": 0.0671, + "theoretical_loss": 3.3962779801131804, + "tokens_seen": 2506620928 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": -7.992433529580012e-05, + "objective/train/docs_used": 912553, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.374509572982788, + "objective/train/original_loss": 1.374509334564209, + "objective/train/theoretical_loss": 3.396263940839497, + "objective/train/tokens_used": 2527212000, + "objective/train/value_avg": -0.0071868896484375, + "objective/train/value_loss": 0.0002604563778731972, + "objective/train/value_max": -2.5451183319091797e-05, + "objective/train/value_min": -0.70751953125, + "objective/train/value_reward_corr": 0.7040844074409639, + "objective/train/value_std": 0.017181396484375, + "objective/train/weight_avg": 1.0000439882278442, + "objective/train/weighted_lm_loss": 1.3741766214370728, + "objective/train/weights_max": 1.8518978357315063, + "objective/train/weights_min": 0.4019213020801544, + "theoretical_loss": 3.396263940839497, + "tokens_seen": 2506752000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024273792328679183, + "loss": 0.0667, + "theoretical_loss": 3.3962499025054034, + "tokens_seen": 2506883072 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024265767934520945, + "loss": 0.0638, + "theoretical_loss": 3.3962218286555403, + "tokens_seen": 2507145216 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024257743540362703, + "loss": 0.0623, + "theoretical_loss": 3.3961937585626956, + "tokens_seen": 2507407360 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024249719146204464, + "loss": 0.0656, + "theoretical_loss": 3.3961656922259738, + "tokens_seen": 2507669504 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002424169475204622, + "loss": 0.0665, + "theoretical_loss": 3.3961376296444796, + "tokens_seen": 2507931648 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002423367035788798, + "loss": 0.0664, + "theoretical_loss": 3.396109570817318, + "tokens_seen": 2508193792 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002422564596372974, + "loss": 0.0654, + "theoretical_loss": 3.3960815157435946, + "tokens_seen": 2508455936 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024217621569571499, + "loss": 0.0652, + "theoretical_loss": 3.3960534644224154, + "tokens_seen": 2508718080 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024209597175413257, + "loss": 0.0671, + "theoretical_loss": 3.3960254168528863, + "tokens_seen": 2508980224 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024201572781255016, + "loss": 0.0622, + "theoretical_loss": 3.395997373034113, + "tokens_seen": 2509242368 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024193548387096774, + "loss": 0.0686, + "theoretical_loss": 3.395969332965203, + "tokens_seen": 2509504512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024185523992938533, + "loss": 0.065, + "theoretical_loss": 3.3959412966452627, + "tokens_seen": 2509766656 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": -0.0007175038335844874, + "objective/train/docs_used": 913305, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4248614311218262, + "objective/train/original_loss": 1.4248614311218262, + "objective/train/theoretical_loss": 3.3959132640733998, + "objective/train/tokens_used": 2530488800, + "objective/train/value_avg": -0.00928497314453125, + "objective/train/value_loss": 0.000361848360626027, + "objective/train/value_max": -5.4776668548583984e-05, + "objective/train/value_min": -0.84033203125, + "objective/train/value_reward_corr": 0.7510871829901664, + "objective/train/value_std": 0.0186309814453125, + "objective/train/weight_avg": 0.9994414448738098, + "objective/train/weighted_lm_loss": 1.4235568046569824, + "objective/train/weights_max": 1.756426453590393, + "objective/train/weights_min": 0.37235790491104126, + "theoretical_loss": 3.3959132640733998, + "tokens_seen": 2510028800 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002417749959878029, + "loss": 0.0658, + "theoretical_loss": 3.3959132640733998, + "tokens_seen": 2510028800 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024169475204622053, + "loss": 0.0667, + "theoretical_loss": 3.395885235248721, + "tokens_seen": 2510290944 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002416145081046381, + "loss": 0.0672, + "theoretical_loss": 3.3958572101703353, + "tokens_seen": 2510553088 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002415342641630557, + "loss": 0.0627, + "theoretical_loss": 3.3958291888373506, + "tokens_seen": 2510815232 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024145402022147328, + "loss": 0.0654, + "theoretical_loss": 3.3958011712488747, + "tokens_seen": 2511077376 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024137377627989087, + "loss": 0.0667, + "theoretical_loss": 3.395773157404017, + "tokens_seen": 2511339520 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024129353233830848, + "loss": 0.069, + "theoretical_loss": 3.395745147301887, + "tokens_seen": 2511601664 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024121328839672604, + "loss": 0.066, + "theoretical_loss": 3.395717140941593, + "tokens_seen": 2511863808 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024113304445514365, + "loss": 0.0667, + "theoretical_loss": 3.395689138322245, + "tokens_seen": 2512125952 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024105280051356124, + "loss": 0.0656, + "theoretical_loss": 3.3956611394429537, + "tokens_seen": 2512388096 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002409725565719788, + "loss": 0.0639, + "theoretical_loss": 3.395633144302829, + "tokens_seen": 2512650240 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002408923126303964, + "loss": 0.0654, + "theoretical_loss": 3.3956051529009814, + "tokens_seen": 2512912384 + }, + { + "epoch": 0.76, + "learning_rate": 0.000240812068688814, + "loss": 0.0654, + "theoretical_loss": 3.3955771652365216, + "tokens_seen": 2513174528 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.0003988731768913567, + "objective/train/docs_used": 914593, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4558669328689575, + "objective/train/original_loss": 1.4558669328689575, + "objective/train/theoretical_loss": 3.395563172805535, + "objective/train/tokens_used": 2533765600, + "objective/train/value_avg": -0.004772186279296875, + "objective/train/value_loss": 8.095076191239059e-05, + "objective/train/value_max": -1.722574234008789e-05, + "objective/train/value_min": -0.1739501953125, + "objective/train/value_reward_corr": 0.6699456283607889, + "objective/train/value_std": 0.00800323486328125, + "objective/train/weight_avg": 1.0004359483718872, + "objective/train/weighted_lm_loss": 1.4565863609313965, + "objective/train/weights_max": 1.1899962425231934, + "objective/train/weights_min": 0.4030267000198364, + "theoretical_loss": 3.395563172805535, + "tokens_seen": 2513305600 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002407318247472316, + "loss": 0.0668, + "theoretical_loss": 3.395549181308562, + "tokens_seen": 2513436672 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024065158080564916, + "loss": 0.0684, + "theoretical_loss": 3.3955212011162126, + "tokens_seen": 2513698816 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024057133686406678, + "loss": 0.0654, + "theoretical_loss": 3.395493224658586, + "tokens_seen": 2513960960 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024049109292248436, + "loss": 0.0628, + "theoretical_loss": 3.395465251934794, + "tokens_seen": 2514223104 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024041084898090195, + "loss": 0.0636, + "theoretical_loss": 3.39543728294395, + "tokens_seen": 2514485248 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024033060503931953, + "loss": 0.0631, + "theoretical_loss": 3.395409317685165, + "tokens_seen": 2514747392 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024025036109773712, + "loss": 0.066, + "theoretical_loss": 3.395381356157554, + "tokens_seen": 2515009536 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024017011715615473, + "loss": 0.0675, + "theoretical_loss": 3.3953533983602293, + "tokens_seen": 2515271680 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024008987321457232, + "loss": 0.0661, + "theoretical_loss": 3.3953254442923044, + "tokens_seen": 2515533824 + }, + { + "epoch": 0.76, + "learning_rate": 0.00024000962927298988, + "loss": 0.0672, + "theoretical_loss": 3.3952974939528935, + "tokens_seen": 2515795968 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002399293853314075, + "loss": 0.0639, + "theoretical_loss": 3.395269547341111, + "tokens_seen": 2516058112 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023984914138982507, + "loss": 0.0644, + "theoretical_loss": 3.3952416044560714, + "tokens_seen": 2516320256 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": -0.0004222899442538619, + "objective/train/docs_used": 916228, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4156543016433716, + "objective/train/original_loss": 1.4156540632247925, + "objective/train/theoretical_loss": 3.3952136652968896, + "objective/train/tokens_used": 2537042400, + "objective/train/value_avg": -0.0073394775390625, + "objective/train/value_loss": 0.00018496091070119292, + "objective/train/value_max": -5.02467155456543e-05, + "objective/train/value_min": -0.493408203125, + "objective/train/value_reward_corr": 0.6832515692663174, + "objective/train/value_std": 0.01263427734375, + "objective/train/weight_avg": 0.9996618032455444, + "objective/train/weighted_lm_loss": 1.4154560565948486, + "objective/train/weights_max": 1.4198497533798218, + "objective/train/weights_min": 0.3722045421600342, + "theoretical_loss": 3.3952136652968896, + "tokens_seen": 2516582400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023976889744824266, + "loss": 0.0687, + "theoretical_loss": 3.3952136652968896, + "tokens_seen": 2516582400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023968865350666024, + "loss": 0.065, + "theoretical_loss": 3.3951857298626806, + "tokens_seen": 2516844544 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023960840956507786, + "loss": 0.0648, + "theoretical_loss": 3.3951577981525602, + "tokens_seen": 2517106688 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023952816562349544, + "loss": 0.0655, + "theoretical_loss": 3.395129870165644, + "tokens_seen": 2517368832 + }, + { + "epoch": 0.76, + "learning_rate": 0.000239447921681913, + "loss": 0.0666, + "theoretical_loss": 3.3951019459010476, + "tokens_seen": 2517630976 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002393676777403306, + "loss": 0.0636, + "theoretical_loss": 3.3950740253578875, + "tokens_seen": 2517893120 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002392874337987482, + "loss": 0.065, + "theoretical_loss": 3.3950461085352814, + "tokens_seen": 2518155264 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002392071898571658, + "loss": 0.0659, + "theoretical_loss": 3.3950181954323453, + "tokens_seen": 2518417408 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023912694591558337, + "loss": 0.0654, + "theoretical_loss": 3.3949902860481966, + "tokens_seen": 2518679552 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023904670197400096, + "loss": 0.0653, + "theoretical_loss": 3.394962380381953, + "tokens_seen": 2518941696 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023896645803241857, + "loss": 0.066, + "theoretical_loss": 3.3949344784327327, + "tokens_seen": 2519203840 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023888621409083613, + "loss": 0.0664, + "theoretical_loss": 3.3949065801996534, + "tokens_seen": 2519465984 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023880597014925374, + "loss": 0.0631, + "theoretical_loss": 3.3948786856818334, + "tokens_seen": 2519728128 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.0014712244737893343, + "objective/train/docs_used": 917453, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2721487283706665, + "objective/train/original_loss": 1.2721487283706665, + "objective/train/theoretical_loss": 3.394864739815871, + "objective/train/tokens_used": 2540319200, + "objective/train/value_avg": -0.0135040283203125, + "objective/train/value_loss": 0.0003676135966088623, + "objective/train/value_max": -2.7120113372802734e-05, + "objective/train/value_min": -0.6787109375, + "objective/train/value_reward_corr": 0.78297990649481, + "objective/train/value_std": 0.025665283203125, + "objective/train/weight_avg": 1.0016452074050903, + "objective/train/weighted_lm_loss": 1.2733814716339111, + "objective/train/weights_max": 1.8618712425231934, + "objective/train/weights_min": 0.4023630917072296, + "theoretical_loss": 3.394864739815871, + "tokens_seen": 2519859200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023872572620767132, + "loss": 0.0645, + "theoretical_loss": 3.3948507948783924, + "tokens_seen": 2519990272 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023864548226608894, + "loss": 0.0634, + "theoretical_loss": 3.3948229077884484, + "tokens_seen": 2520252416 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002385652383245065, + "loss": 0.0659, + "theoretical_loss": 3.394795024411122, + "tokens_seen": 2520514560 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023848499438292408, + "loss": 0.0665, + "theoretical_loss": 3.394767144745532, + "tokens_seen": 2520776704 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002384047504413417, + "loss": 0.0691, + "theoretical_loss": 3.394739268790798, + "tokens_seen": 2521038848 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023832450649975928, + "loss": 0.0644, + "theoretical_loss": 3.3947113965460414, + "tokens_seen": 2521300992 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023824426255817686, + "loss": 0.0675, + "theoretical_loss": 3.3946835280103826, + "tokens_seen": 2521563136 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023816401861659445, + "loss": 0.066, + "theoretical_loss": 3.3946556631829417, + "tokens_seen": 2521825280 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023808377467501204, + "loss": 0.0669, + "theoretical_loss": 3.3946278020628404, + "tokens_seen": 2522087424 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023800353073342962, + "loss": 0.0642, + "theoretical_loss": 3.3945999446492, + "tokens_seen": 2522349568 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002379232867918472, + "loss": 0.0653, + "theoretical_loss": 3.3945720909411428, + "tokens_seen": 2522611712 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023784304285026482, + "loss": 0.0654, + "theoretical_loss": 3.3945442409377904, + "tokens_seen": 2522873856 + }, + { + "epoch": 0.76, + "objective/train/advantage_avg": 0.0011498106177896261, + "objective/train/docs_used": 918647, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3918640613555908, + "objective/train/original_loss": 1.3918638229370117, + "objective/train/theoretical_loss": 3.394516394638265, + "objective/train/tokens_used": 2543596000, + "objective/train/value_avg": -0.00826263427734375, + "objective/train/value_loss": 0.0001964410621440038, + "objective/train/value_max": -8.547306060791016e-05, + "objective/train/value_min": -0.39306640625, + "objective/train/value_reward_corr": 0.7096400416725087, + "objective/train/value_std": 0.01409912109375, + "objective/train/weight_avg": 1.0012379884719849, + "objective/train/weighted_lm_loss": 1.3931630849838257, + "objective/train/weights_max": 1.191449761390686, + "objective/train/weights_min": 0.3797193765640259, + "theoretical_loss": 3.394516394638265, + "tokens_seen": 2523136000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002377627989086824, + "loss": 0.0674, + "theoretical_loss": 3.394516394638265, + "tokens_seen": 2523136000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002376825549671, + "loss": 0.0643, + "theoretical_loss": 3.3944885520416896, + "tokens_seen": 2523398144 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023760231102551758, + "loss": 0.0681, + "theoretical_loss": 3.3944607131471876, + "tokens_seen": 2523660288 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023752206708393516, + "loss": 0.0675, + "theoretical_loss": 3.3944328779538813, + "tokens_seen": 2523922432 + }, + { + "epoch": 0.76, + "learning_rate": 0.00023744182314235277, + "loss": 0.0638, + "theoretical_loss": 3.3944050464608955, + "tokens_seen": 2524184576 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023736157920077033, + "loss": 0.0652, + "theoretical_loss": 3.394377218667353, + "tokens_seen": 2524446720 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023728133525918795, + "loss": 0.0628, + "theoretical_loss": 3.3943493945723784, + "tokens_seen": 2524708864 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023720109131760553, + "loss": 0.0677, + "theoretical_loss": 3.394321574175096, + "tokens_seen": 2524971008 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002371208473760231, + "loss": 0.0657, + "theoretical_loss": 3.394293757474631, + "tokens_seen": 2525233152 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002370406034344407, + "loss": 0.0682, + "theoretical_loss": 3.394265944470108, + "tokens_seen": 2525495296 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002369603594928583, + "loss": 0.063, + "theoretical_loss": 3.3942381351606525, + "tokens_seen": 2525757440 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002368801155512759, + "loss": 0.0669, + "theoretical_loss": 3.3942103295453903, + "tokens_seen": 2526019584 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023679987160969346, + "loss": 0.0635, + "theoretical_loss": 3.394182527623448, + "tokens_seen": 2526281728 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.0006156957242637873, + "objective/train/docs_used": 919806, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.17476487159729, + "objective/train/original_loss": 1.17476487159729, + "objective/train/theoretical_loss": 3.394168628047198, + "objective/train/tokens_used": 2546872800, + "objective/train/value_avg": -0.01107025146484375, + "objective/train/value_loss": 0.0007344778859987855, + "objective/train/value_max": -5.519390106201172e-05, + "objective/train/value_min": -0.95849609375, + "objective/train/value_reward_corr": 0.7828262709025253, + "objective/train/value_std": 0.035430908203125, + "objective/train/weight_avg": 1.0009746551513672, + "objective/train/weighted_lm_loss": 1.1768461465835571, + "objective/train/weights_max": 2.2943544387817383, + "objective/train/weights_min": 0.38488778471946716, + "theoretical_loss": 3.394168628047198, + "tokens_seen": 2526412800 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023671962766811107, + "loss": 0.0677, + "theoretical_loss": 3.39415472939395, + "tokens_seen": 2526543872 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023663938372652866, + "loss": 0.0645, + "theoretical_loss": 3.3941269348560246, + "tokens_seen": 2526806016 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023655913978494624, + "loss": 0.0664, + "theoretical_loss": 3.3940991440087984, + "tokens_seen": 2527068160 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023647889584336383, + "loss": 0.0682, + "theoretical_loss": 3.3940713568513976, + "tokens_seen": 2527330304 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002363986519017814, + "loss": 0.0647, + "theoretical_loss": 3.394043573382951, + "tokens_seen": 2527592448 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023631840796019903, + "loss": 0.0654, + "theoretical_loss": 3.394015793602585, + "tokens_seen": 2527854592 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023623816401861658, + "loss": 0.0654, + "theoretical_loss": 3.393988017509429, + "tokens_seen": 2528116736 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002361579200770342, + "loss": 0.0666, + "theoretical_loss": 3.3939602451026096, + "tokens_seen": 2528378880 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023607767613545178, + "loss": 0.0646, + "theoretical_loss": 3.393932476381257, + "tokens_seen": 2528641024 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023599743219386937, + "loss": 0.066, + "theoretical_loss": 3.3939047113445, + "tokens_seen": 2528903168 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023591718825228695, + "loss": 0.0658, + "theoretical_loss": 3.393876949991467, + "tokens_seen": 2529165312 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023583694431070454, + "loss": 0.0659, + "theoretical_loss": 3.393849192321288, + "tokens_seen": 2529427456 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.00048636639257892966, + "objective/train/docs_used": 920541, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2705464363098145, + "objective/train/original_loss": 1.270546555519104, + "objective/train/theoretical_loss": 3.3938214383330925, + "objective/train/tokens_used": 2550149600, + "objective/train/value_avg": -0.00516510009765625, + "objective/train/value_loss": 0.00013838338782079518, + "objective/train/value_max": -5.346536636352539e-05, + "objective/train/value_min": -0.260498046875, + "objective/train/value_reward_corr": 0.5679147804729487, + "objective/train/value_std": 0.0089569091796875, + "objective/train/weight_avg": 1.0005491971969604, + "objective/train/weighted_lm_loss": 1.2707606554031372, + "objective/train/weights_max": 1.1276289224624634, + "objective/train/weights_min": 0.3921318054199219, + "theoretical_loss": 3.3938214383330925, + "tokens_seen": 2529689600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023575670036912215, + "loss": 0.0677, + "theoretical_loss": 3.3938214383330925, + "tokens_seen": 2529689600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023567645642753974, + "loss": 0.0665, + "theoretical_loss": 3.3937936880260113, + "tokens_seen": 2529951744 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002355962124859573, + "loss": 0.0675, + "theoretical_loss": 3.3937659413991743, + "tokens_seen": 2530213888 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002355159685443749, + "loss": 0.066, + "theoretical_loss": 3.3937381984517123, + "tokens_seen": 2530476032 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002354357246027925, + "loss": 0.0634, + "theoretical_loss": 3.3937104591827563, + "tokens_seen": 2530738176 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002353554806612101, + "loss": 0.0658, + "theoretical_loss": 3.3936827235914375, + "tokens_seen": 2531000320 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023527523671962766, + "loss": 0.0665, + "theoretical_loss": 3.3936549916768874, + "tokens_seen": 2531262464 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023519499277804528, + "loss": 0.0683, + "theoretical_loss": 3.3936272634382387, + "tokens_seen": 2531524608 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023511474883646286, + "loss": 0.0663, + "theoretical_loss": 3.3935995388746227, + "tokens_seen": 2531786752 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023503450489488042, + "loss": 0.0633, + "theoretical_loss": 3.393571817985172, + "tokens_seen": 2532048896 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023495426095329803, + "loss": 0.0673, + "theoretical_loss": 3.3935441007690197, + "tokens_seen": 2532311040 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023487401701171562, + "loss": 0.0654, + "theoretical_loss": 3.393516387225299, + "tokens_seen": 2532573184 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023479377307013323, + "loss": 0.0653, + "theoretical_loss": 3.393488677353142, + "tokens_seen": 2532835328 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": -0.0022209736052900553, + "objective/train/docs_used": 921695, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3328808546066284, + "objective/train/original_loss": 1.3328807353973389, + "objective/train/theoretical_loss": 3.39347482379363, + "objective/train/tokens_used": 2553426400, + "objective/train/value_avg": -0.00995635986328125, + "objective/train/value_loss": 0.00040688071749173105, + "objective/train/value_max": -3.147125244140625e-05, + "objective/train/value_min": -0.552734375, + "objective/train/value_reward_corr": 0.7400871618874371, + "objective/train/value_std": 0.0183563232421875, + "objective/train/weight_avg": 0.9979639649391174, + "objective/train/weighted_lm_loss": 1.330152988433838, + "objective/train/weights_max": 1.2169888019561768, + "objective/train/weights_min": 0.3680844008922577, + "theoretical_loss": 3.39347482379363, + "tokens_seen": 2532966400 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002347135291285508, + "loss": 0.0655, + "theoretical_loss": 3.3934609711516845, + "tokens_seen": 2533097472 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023463328518696838, + "loss": 0.0638, + "theoretical_loss": 3.3934332686200586, + "tokens_seen": 2533359616 + }, + { + "epoch": 0.77, + "learning_rate": 0.000234553041245386, + "loss": 0.0665, + "theoretical_loss": 3.3934055697573995, + "tokens_seen": 2533621760 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023447279730380357, + "loss": 0.0663, + "theoretical_loss": 3.393377874562841, + "tokens_seen": 2533883904 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023439255336222116, + "loss": 0.0656, + "theoretical_loss": 3.393350183035519, + "tokens_seen": 2534146048 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023431230942063874, + "loss": 0.0645, + "theoretical_loss": 3.3933224951745675, + "tokens_seen": 2534408192 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023423206547905636, + "loss": 0.0656, + "theoretical_loss": 3.3932948109791226, + "tokens_seen": 2534670336 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023415182153747392, + "loss": 0.0666, + "theoretical_loss": 3.39326713044832, + "tokens_seen": 2534932480 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002340715775958915, + "loss": 0.0669, + "theoretical_loss": 3.393239453581295, + "tokens_seen": 2535194624 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002339913336543091, + "loss": 0.067, + "theoretical_loss": 3.393211780377185, + "tokens_seen": 2535456768 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002339110897127267, + "loss": 0.0645, + "theoretical_loss": 3.3931841108351257, + "tokens_seen": 2535718912 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023383084577114428, + "loss": 0.0646, + "theoretical_loss": 3.393156444954255, + "tokens_seen": 2535981056 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.00019107981643173844, + "objective/train/docs_used": 922895, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.402829885482788, + "objective/train/original_loss": 1.402829647064209, + "objective/train/theoretical_loss": 3.393128782733709, + "objective/train/tokens_used": 2556703200, + "objective/train/value_avg": -0.007198333740234375, + "objective/train/value_loss": 0.0003119884931948036, + "objective/train/value_max": -3.6776065826416016e-05, + "objective/train/value_min": -0.7333984375, + "objective/train/value_reward_corr": 0.7111351334598468, + "objective/train/value_std": 0.0170745849609375, + "objective/train/weight_avg": 1.000330924987793, + "objective/train/weighted_lm_loss": 1.4031528234481812, + "objective/train/weights_max": 1.3954987525939941, + "objective/train/weights_min": 0.3701740503311157, + "theoretical_loss": 3.393128782733709, + "tokens_seen": 2536243200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023375060182956187, + "loss": 0.0671, + "theoretical_loss": 3.393128782733709, + "tokens_seen": 2536243200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023367035788797946, + "loss": 0.0677, + "theoretical_loss": 3.3931011241726248, + "tokens_seen": 2536505344 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023359011394639707, + "loss": 0.0642, + "theoretical_loss": 3.393073469270142, + "tokens_seen": 2536767488 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023350987000481463, + "loss": 0.0653, + "theoretical_loss": 3.393045818025397, + "tokens_seen": 2537029632 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023342962606323224, + "loss": 0.0664, + "theoretical_loss": 3.393018170437529, + "tokens_seen": 2537291776 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023334938212164982, + "loss": 0.0671, + "theoretical_loss": 3.392990526505676, + "tokens_seen": 2537553920 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002332691381800674, + "loss": 0.0659, + "theoretical_loss": 3.3929628862289776, + "tokens_seen": 2537816064 + }, + { + "epoch": 0.77, + "learning_rate": 0.000233188894238485, + "loss": 0.0662, + "theoretical_loss": 3.392935249606573, + "tokens_seen": 2538078208 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023310865029690258, + "loss": 0.0673, + "theoretical_loss": 3.3929076166376007, + "tokens_seen": 2538340352 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002330284063553202, + "loss": 0.0633, + "theoretical_loss": 3.3928799873212014, + "tokens_seen": 2538602496 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023294816241373775, + "loss": 0.065, + "theoretical_loss": 3.3928523616565154, + "tokens_seen": 2538864640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023286791847215536, + "loss": 0.0637, + "theoretical_loss": 3.3928247396426827, + "tokens_seen": 2539126784 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023278767453057295, + "loss": 0.0629, + "theoretical_loss": 3.3927971212788437, + "tokens_seen": 2539388928 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": -3.936679786420427e-05, + "objective/train/docs_used": 924078, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2824890613555908, + "objective/train/original_loss": 1.2824890613555908, + "objective/train/theoretical_loss": 3.3927833134654035, + "objective/train/tokens_used": 2559980000, + "objective/train/value_avg": -0.00939178466796875, + "objective/train/value_loss": 0.000269672047579661, + "objective/train/value_max": -4.267692565917969e-05, + "objective/train/value_min": -0.66162109375, + "objective/train/value_reward_corr": 0.7390932968979256, + "objective/train/value_std": 0.0201263427734375, + "objective/train/weight_avg": 1.0000852346420288, + "objective/train/weighted_lm_loss": 1.2810968160629272, + "objective/train/weights_max": 1.523683786392212, + "objective/train/weights_min": 0.3727017939090729, + "theoretical_loss": 3.3927833134654035, + "tokens_seen": 2539520000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023270743058899054, + "loss": 0.0658, + "theoretical_loss": 3.3927695065641394, + "tokens_seen": 2539651072 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023262718664740812, + "loss": 0.0643, + "theoretical_loss": 3.392741895497712, + "tokens_seen": 2539913216 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002325469427058257, + "loss": 0.0628, + "theoretical_loss": 3.392714288078702, + "tokens_seen": 2540175360 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023246669876424332, + "loss": 0.0647, + "theoretical_loss": 3.3926866843062515, + "tokens_seen": 2540437504 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023238645482266088, + "loss": 0.0643, + "theoretical_loss": 3.392659084179503, + "tokens_seen": 2540699648 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002323062108810785, + "loss": 0.0633, + "theoretical_loss": 3.3926314876975985, + "tokens_seen": 2540961792 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023222596693949608, + "loss": 0.0643, + "theoretical_loss": 3.3926038948596813, + "tokens_seen": 2541223936 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023214572299791366, + "loss": 0.0648, + "theoretical_loss": 3.3925763056648934, + "tokens_seen": 2541486080 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023206547905633125, + "loss": 0.0648, + "theoretical_loss": 3.3925487201123787, + "tokens_seen": 2541748224 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023198523511474883, + "loss": 0.0671, + "theoretical_loss": 3.392521138201281, + "tokens_seen": 2542010368 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023190499117316645, + "loss": 0.065, + "theoretical_loss": 3.392493559930744, + "tokens_seen": 2542272512 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023182474723158403, + "loss": 0.0668, + "theoretical_loss": 3.3924659852999115, + "tokens_seen": 2542534656 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": -0.0005489328177645802, + "objective/train/docs_used": 925229, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1024796962738037, + "objective/train/original_loss": 1.1024794578552246, + "objective/train/theoretical_loss": 3.3924384143079283, + "objective/train/tokens_used": 2563256800, + "objective/train/value_avg": -0.00791168212890625, + "objective/train/value_loss": 0.00014381870278157294, + "objective/train/value_max": -3.24249267578125e-05, + "objective/train/value_min": -0.222900390625, + "objective/train/value_reward_corr": 0.77419179227656, + "objective/train/value_std": 0.013671875, + "objective/train/weight_avg": 0.999518632888794, + "objective/train/weighted_lm_loss": 1.1019738912582397, + "objective/train/weights_max": 1.1265925168991089, + "objective/train/weights_min": 0.37364134192466736, + "theoretical_loss": 3.3924384143079283, + "tokens_seen": 2542796800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002317445032900016, + "loss": 0.0647, + "theoretical_loss": 3.3924384143079283, + "tokens_seen": 2542796800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002316642593484192, + "loss": 0.0684, + "theoretical_loss": 3.392410846953939, + "tokens_seen": 2543058944 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002315840154068368, + "loss": 0.0661, + "theoretical_loss": 3.3923832832370886, + "tokens_seen": 2543321088 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023150377146525437, + "loss": 0.0672, + "theoretical_loss": 3.392355723156523, + "tokens_seen": 2543583232 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023142352752367196, + "loss": 0.0663, + "theoretical_loss": 3.392328166711387, + "tokens_seen": 2543845376 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023134328358208957, + "loss": 0.0652, + "theoretical_loss": 3.3923006139008267, + "tokens_seen": 2544107520 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023126303964050716, + "loss": 0.0642, + "theoretical_loss": 3.3922730647239887, + "tokens_seen": 2544369664 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023118279569892471, + "loss": 0.0632, + "theoretical_loss": 3.392245519180019, + "tokens_seen": 2544631808 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023110255175734233, + "loss": 0.0642, + "theoretical_loss": 3.3922179772680643, + "tokens_seen": 2544893952 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002310223078157599, + "loss": 0.0658, + "theoretical_loss": 3.392190438987272, + "tokens_seen": 2545156096 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023094206387417753, + "loss": 0.0667, + "theoretical_loss": 3.39216290433679, + "tokens_seen": 2545418240 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023086181993259508, + "loss": 0.0641, + "theoretical_loss": 3.392135373315764, + "tokens_seen": 2545680384 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023078157599101267, + "loss": 0.0634, + "theoretical_loss": 3.392107845923344, + "tokens_seen": 2545942528 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.001058534486219287, + "objective/train/docs_used": 926243, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2137850522994995, + "objective/train/original_loss": 1.21378493309021, + "objective/train/theoretical_loss": 3.392094083587595, + "objective/train/tokens_used": 2566533600, + "objective/train/value_avg": -0.007965087890625, + "objective/train/value_loss": 0.0001501610822742805, + "objective/train/value_max": -2.2649765014648438e-05, + "objective/train/value_min": -0.30517578125, + "objective/train/value_reward_corr": 0.6901371006082144, + "objective/train/value_std": 0.01177215576171875, + "objective/train/weight_avg": 1.0011311769485474, + "objective/train/weighted_lm_loss": 1.2146296501159668, + "objective/train/weights_max": 1.2128745317459106, + "objective/train/weights_min": 0.6144814491271973, + "theoretical_loss": 3.392094083587595, + "tokens_seen": 2546073600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023070133204943028, + "loss": 0.0637, + "theoretical_loss": 3.3920803221586775, + "tokens_seen": 2546204672 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023062108810784784, + "loss": 0.0636, + "theoretical_loss": 3.3920528020209124, + "tokens_seen": 2546466816 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023054084416626545, + "loss": 0.0617, + "theoretical_loss": 3.392025285509198, + "tokens_seen": 2546728960 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023046060022468304, + "loss": 0.0632, + "theoretical_loss": 3.391997772622683, + "tokens_seen": 2546991104 + }, + { + "epoch": 0.77, + "learning_rate": 0.00023038035628310065, + "loss": 0.0621, + "theoretical_loss": 3.3919702633605175, + "tokens_seen": 2547253248 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002303001123415182, + "loss": 0.0622, + "theoretical_loss": 3.391942757721851, + "tokens_seen": 2547515392 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002302198683999358, + "loss": 0.065, + "theoretical_loss": 3.3919152557058325, + "tokens_seen": 2547777536 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002301396244583534, + "loss": 0.0638, + "theoretical_loss": 3.3918877573116126, + "tokens_seen": 2548039680 + }, + { + "epoch": 0.77, + "learning_rate": 0.000230059380516771, + "loss": 0.066, + "theoretical_loss": 3.3918602625383425, + "tokens_seen": 2548301824 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022997913657518858, + "loss": 0.0647, + "theoretical_loss": 3.391832771385172, + "tokens_seen": 2548563968 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022989889263360616, + "loss": 0.0633, + "theoretical_loss": 3.3918052838512533, + "tokens_seen": 2548826112 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022981864869202375, + "loss": 0.0642, + "theoretical_loss": 3.3917777999357366, + "tokens_seen": 2549088256 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": -0.0001033979351632297, + "objective/train/docs_used": 927515, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2924425601959229, + "objective/train/original_loss": 1.2924423217773438, + "objective/train/theoretical_loss": 3.3917503196377745, + "objective/train/tokens_used": 2569810400, + "objective/train/value_avg": -0.00595855712890625, + "objective/train/value_loss": 0.0001676402462180704, + "objective/train/value_max": -4.00543212890625e-05, + "objective/train/value_min": -0.206787109375, + "objective/train/value_reward_corr": 0.6296488844205017, + "objective/train/value_std": 0.00970458984375, + "objective/train/weight_avg": 0.9999687671661377, + "objective/train/weighted_lm_loss": 1.292542815208435, + "objective/train/weights_max": 1.1142876148223877, + "objective/train/weights_min": 0.36936861276626587, + "theoretical_loss": 3.3917503196377745, + "tokens_seen": 2549350400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022973840475044136, + "loss": 0.0661, + "theoretical_loss": 3.3917503196377745, + "tokens_seen": 2549350400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022965816080885892, + "loss": 0.0647, + "theoretical_loss": 3.391722842956518, + "tokens_seen": 2549612544 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022957791686727653, + "loss": 0.0627, + "theoretical_loss": 3.3916953698911203, + "tokens_seen": 2549874688 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022949767292569412, + "loss": 0.0639, + "theoretical_loss": 3.391667900440733, + "tokens_seen": 2550136832 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002294174289841117, + "loss": 0.0647, + "theoretical_loss": 3.3916404346045095, + "tokens_seen": 2550398976 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002293371850425293, + "loss": 0.0641, + "theoretical_loss": 3.3916129723816026, + "tokens_seen": 2550661120 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022925694110094688, + "loss": 0.0654, + "theoretical_loss": 3.3915855137711657, + "tokens_seen": 2550923264 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002291766971593645, + "loss": 0.0619, + "theoretical_loss": 3.3915580587723526, + "tokens_seen": 2551185408 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022909645321778205, + "loss": 0.0623, + "theoretical_loss": 3.391530607384317, + "tokens_seen": 2551447552 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022901620927619966, + "loss": 0.0633, + "theoretical_loss": 3.3915031596062133, + "tokens_seen": 2551709696 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022893596533461724, + "loss": 0.0634, + "theoretical_loss": 3.391475715437196, + "tokens_seen": 2551971840 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022885572139303486, + "loss": 0.0623, + "theoretical_loss": 3.39144827487642, + "tokens_seen": 2552233984 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022877547745145242, + "loss": 0.0657, + "theoretical_loss": 3.3914208379230395, + "tokens_seen": 2552496128 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.0006462376331910491, + "objective/train/docs_used": 928605, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2336928844451904, + "objective/train/original_loss": 1.2336928844451904, + "objective/train/theoretical_loss": 3.391407120798859, + "objective/train/tokens_used": 2573087200, + "objective/train/value_avg": -0.01276397705078125, + "objective/train/value_loss": 0.00046865135664120317, + "objective/train/value_max": -6.204843521118164e-05, + "objective/train/value_min": -0.7060546875, + "objective/train/value_reward_corr": 0.6902025219331749, + "objective/train/value_std": 0.0211639404296875, + "objective/train/weight_avg": 1.000848650932312, + "objective/train/weighted_lm_loss": 1.2347334623336792, + "objective/train/weights_max": 1.2664811611175537, + "objective/train/weights_min": 0.36937421560287476, + "theoretical_loss": 3.391407120798859, + "tokens_seen": 2552627200 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022869523350987, + "loss": 0.0656, + "theoretical_loss": 3.3913934045762106, + "tokens_seen": 2552758272 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022861498956828761, + "loss": 0.0624, + "theoretical_loss": 3.3913659748350895, + "tokens_seen": 2553020416 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022853474562670517, + "loss": 0.0659, + "theoretical_loss": 3.391338548698831, + "tokens_seen": 2553282560 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022845450168512278, + "loss": 0.0644, + "theoretical_loss": 3.391311126166592, + "tokens_seen": 2553544704 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022837425774354037, + "loss": 0.0623, + "theoretical_loss": 3.3912837072375286, + "tokens_seen": 2553806848 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022829401380195796, + "loss": 0.0648, + "theoretical_loss": 3.391256291910798, + "tokens_seen": 2554068992 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022821376986037554, + "loss": 0.0642, + "theoretical_loss": 3.391228880185557, + "tokens_seen": 2554331136 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022813352591879313, + "loss": 0.0625, + "theoretical_loss": 3.3912014720609625, + "tokens_seen": 2554593280 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022805328197721074, + "loss": 0.0659, + "theoretical_loss": 3.391174067536173, + "tokens_seen": 2554855424 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022797303803562832, + "loss": 0.0665, + "theoretical_loss": 3.391146666610346, + "tokens_seen": 2555117568 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022789279409404588, + "loss": 0.0665, + "theoretical_loss": 3.3911192692826395, + "tokens_seen": 2555379712 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002278125501524635, + "loss": 0.0647, + "theoretical_loss": 3.3910918755522124, + "tokens_seen": 2555641856 + }, + { + "epoch": 0.77, + "objective/train/advantage_avg": 0.0010113369207829237, + "objective/train/docs_used": 929757, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3162387609481812, + "objective/train/original_loss": 1.3162386417388916, + "objective/train/theoretical_loss": 3.391064485418223, + "objective/train/tokens_used": 2576364000, + "objective/train/value_avg": -0.005802154541015625, + "objective/train/value_loss": 0.0001210497721331194, + "objective/train/value_max": -5.829334259033203e-05, + "objective/train/value_min": -0.424072265625, + "objective/train/value_reward_corr": 0.7426082308917776, + "objective/train/value_std": 0.01232147216796875, + "objective/train/weight_avg": 1.0010701417922974, + "objective/train/weighted_lm_loss": 1.3174530267715454, + "objective/train/weights_max": 1.1952372789382935, + "objective/train/weights_min": 0.7097556591033936, + "theoretical_loss": 3.391064485418223, + "tokens_seen": 2555904000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022773230621088108, + "loss": 0.0644, + "theoretical_loss": 3.391064485418223, + "tokens_seen": 2555904000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022765206226929867, + "loss": 0.0667, + "theoretical_loss": 3.3910370988798304, + "tokens_seen": 2556166144 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022757181832771625, + "loss": 0.0666, + "theoretical_loss": 3.3910097159361943, + "tokens_seen": 2556428288 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022749157438613386, + "loss": 0.0635, + "theoretical_loss": 3.3909823365864744, + "tokens_seen": 2556690432 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022741133044455145, + "loss": 0.0646, + "theoretical_loss": 3.3909549608298297, + "tokens_seen": 2556952576 + }, + { + "epoch": 0.77, + "learning_rate": 0.000227331086502969, + "loss": 0.0646, + "theoretical_loss": 3.3909275886654213, + "tokens_seen": 2557214720 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022725084256138662, + "loss": 0.0653, + "theoretical_loss": 3.3909002200924094, + "tokens_seen": 2557476864 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002271705986198042, + "loss": 0.0655, + "theoretical_loss": 3.3908728551099543, + "tokens_seen": 2557739008 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022709035467822182, + "loss": 0.0656, + "theoretical_loss": 3.390845493717218, + "tokens_seen": 2558001152 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022701011073663938, + "loss": 0.067, + "theoretical_loss": 3.3908181359133605, + "tokens_seen": 2558263296 + }, + { + "epoch": 0.78, + "learning_rate": 0.000226929866795057, + "loss": 0.0642, + "theoretical_loss": 3.390790781697544, + "tokens_seen": 2558525440 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022684962285347458, + "loss": 0.0665, + "theoretical_loss": 3.390763431068931, + "tokens_seen": 2558787584 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022676937891189213, + "loss": 0.0641, + "theoretical_loss": 3.390736084026683, + "tokens_seen": 2559049728 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 1.652149876463227e-06, + "objective/train/docs_used": 930868, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.139220952987671, + "objective/train/original_loss": 1.139220952987671, + "objective/train/theoretical_loss": 3.390722411850184, + "objective/train/tokens_used": 2579640800, + "objective/train/value_avg": -0.0068511962890625, + "objective/train/value_loss": 0.0003664409159682691, + "objective/train/value_max": -4.035234451293945e-05, + "objective/train/value_min": -0.71240234375, + "objective/train/value_reward_corr": 0.6045150013411714, + "objective/train/value_std": 0.0150909423828125, + "objective/train/weight_avg": 1.0001568794250488, + "objective/train/weighted_lm_loss": 1.1387513875961304, + "objective/train/weights_max": 1.5181612968444824, + "objective/train/weights_min": 0.3759317100048065, + "theoretical_loss": 3.390722411850184, + "tokens_seen": 2559180800 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022668913497030975, + "loss": 0.0609, + "theoretical_loss": 3.390708740569962, + "tokens_seen": 2559311872 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022660889102872733, + "loss": 0.0665, + "theoretical_loss": 3.3906814006979316, + "tokens_seen": 2559574016 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022652864708714495, + "loss": 0.0641, + "theoretical_loss": 3.390654064409754, + "tokens_seen": 2559836160 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002264484031455625, + "loss": 0.0602, + "theoretical_loss": 3.3906267317045935, + "tokens_seen": 2560098304 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002263681592039801, + "loss": 0.064, + "theoretical_loss": 3.3905994025816124, + "tokens_seen": 2560360448 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002262879152623977, + "loss": 0.0643, + "theoretical_loss": 3.3905720770399754, + "tokens_seen": 2560622592 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002262076713208153, + "loss": 0.0616, + "theoretical_loss": 3.3905447550788463, + "tokens_seen": 2560884736 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022612742737923287, + "loss": 0.0636, + "theoretical_loss": 3.39051743669739, + "tokens_seen": 2561146880 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022604718343765046, + "loss": 0.0641, + "theoretical_loss": 3.39049012189477, + "tokens_seen": 2561409024 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022596693949606807, + "loss": 0.0626, + "theoretical_loss": 3.3904628106701526, + "tokens_seen": 2561671168 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022588669555448563, + "loss": 0.066, + "theoretical_loss": 3.3904355030227022, + "tokens_seen": 2561933312 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022580645161290321, + "loss": 0.0643, + "theoretical_loss": 3.3904081989515844, + "tokens_seen": 2562195456 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.0004914909368380904, + "objective/train/docs_used": 931999, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2367290258407593, + "objective/train/original_loss": 1.2367291450500488, + "objective/train/theoretical_loss": 3.390380898455965, + "objective/train/tokens_used": 2582917600, + "objective/train/value_avg": -0.006000518798828125, + "objective/train/value_loss": 8.66991831571795e-05, + "objective/train/value_max": -4.267692565917969e-05, + "objective/train/value_min": -0.195556640625, + "objective/train/value_reward_corr": 0.74524198172894, + "objective/train/value_std": 0.01154327392578125, + "objective/train/weight_avg": 1.0005344152450562, + "objective/train/weighted_lm_loss": 1.2368214130401611, + "objective/train/weights_max": 1.1457269191741943, + "objective/train/weights_min": 0.822311520576477, + "theoretical_loss": 3.390380898455965, + "tokens_seen": 2562457600 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022572620767132083, + "loss": 0.0631, + "theoretical_loss": 3.390380898455965, + "tokens_seen": 2562457600 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002256459637297384, + "loss": 0.0665, + "theoretical_loss": 3.390353601535011, + "tokens_seen": 2562719744 + }, + { + "epoch": 0.78, + "learning_rate": 0.000225565719788156, + "loss": 0.0642, + "theoretical_loss": 3.3903263081878876, + "tokens_seen": 2562981888 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022548547584657358, + "loss": 0.0635, + "theoretical_loss": 3.390299018413762, + "tokens_seen": 2563244032 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022540523190499117, + "loss": 0.0636, + "theoretical_loss": 3.3902717322118, + "tokens_seen": 2563506176 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022532498796340878, + "loss": 0.0659, + "theoretical_loss": 3.3902444495811705, + "tokens_seen": 2563768320 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022524474402182634, + "loss": 0.065, + "theoretical_loss": 3.3902171705210398, + "tokens_seen": 2564030464 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022516450008024395, + "loss": 0.0626, + "theoretical_loss": 3.390189895030576, + "tokens_seen": 2564292608 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022508425613866154, + "loss": 0.0636, + "theoretical_loss": 3.390162623108948, + "tokens_seen": 2564554752 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022500401219707915, + "loss": 0.0652, + "theoretical_loss": 3.390135354755323, + "tokens_seen": 2564816896 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002249237682554967, + "loss": 0.0643, + "theoretical_loss": 3.3901080899688694, + "tokens_seen": 2565079040 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002248435243139143, + "loss": 0.0638, + "theoretical_loss": 3.390080828748757, + "tokens_seen": 2565341184 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002247632803723319, + "loss": 0.0644, + "theoretical_loss": 3.390053571094154, + "tokens_seen": 2565603328 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 6.488848612207221e-06, + "objective/train/docs_used": 933221, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.4858256578445435, + "objective/train/original_loss": 1.485825538635254, + "objective/train/theoretical_loss": 3.3900399436036595, + "objective/train/tokens_used": 2586194400, + "objective/train/value_avg": -0.00689697265625, + "objective/train/value_loss": 0.00017969694454222918, + "objective/train/value_max": -3.170967102050781e-05, + "objective/train/value_min": -0.291748046875, + "objective/train/value_reward_corr": 0.7551056464542931, + "objective/train/value_std": 0.01503753662109375, + "objective/train/weight_avg": 1.0000879764556885, + "objective/train/weighted_lm_loss": 1.4855372905731201, + "objective/train/weights_max": 1.12247896194458, + "objective/train/weights_min": 0.3718355596065521, + "theoretical_loss": 3.3900399436036595, + "tokens_seen": 2565734400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022468303643074947, + "loss": 0.0655, + "theoretical_loss": 3.3900263170042306, + "tokens_seen": 2565865472 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022460279248916708, + "loss": 0.0633, + "theoretical_loss": 3.3899990664781563, + "tokens_seen": 2566127616 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022452254854758466, + "loss": 0.0659, + "theoretical_loss": 3.389971819515101, + "tokens_seen": 2566389760 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022444230460600225, + "loss": 0.0661, + "theoretical_loss": 3.389944576114235, + "tokens_seen": 2566651904 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022436206066441984, + "loss": 0.0624, + "theoretical_loss": 3.3899173362747286, + "tokens_seen": 2566914048 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022428181672283742, + "loss": 0.0648, + "theoretical_loss": 3.389890099995753, + "tokens_seen": 2567176192 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022420157278125503, + "loss": 0.0661, + "theoretical_loss": 3.389862867276479, + "tokens_seen": 2567438336 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022412132883967262, + "loss": 0.0628, + "theoretical_loss": 3.3898356381160784, + "tokens_seen": 2567700480 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002240410848980902, + "loss": 0.0661, + "theoretical_loss": 3.3898084125137222, + "tokens_seen": 2567962624 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002239608409565078, + "loss": 0.0645, + "theoretical_loss": 3.389781190468583, + "tokens_seen": 2568224768 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022388059701492538, + "loss": 0.0651, + "theoretical_loss": 3.3897539719798324, + "tokens_seen": 2568486912 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022380035307334296, + "loss": 0.0657, + "theoretical_loss": 3.389726757046643, + "tokens_seen": 2568749056 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.0008476021466776729, + "objective/train/docs_used": 934338, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1809134483337402, + "objective/train/original_loss": 1.1809133291244507, + "objective/train/theoretical_loss": 3.3896995456681878, + "objective/train/tokens_used": 2589471200, + "objective/train/value_avg": -0.00931549072265625, + "objective/train/value_loss": 0.00018791698676068336, + "objective/train/value_max": -3.045797348022461e-05, + "objective/train/value_min": -0.2313232421875, + "objective/train/value_reward_corr": 0.8049078212678962, + "objective/train/value_std": 0.018310546875, + "objective/train/weight_avg": 1.0009363889694214, + "objective/train/weighted_lm_loss": 1.1823976039886475, + "objective/train/weights_max": 1.1745542287826538, + "objective/train/weights_min": 0.37418052554130554, + "theoretical_loss": 3.3896995456681878, + "tokens_seen": 2569011200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022372010913176055, + "loss": 0.0633, + "theoretical_loss": 3.3896995456681878, + "tokens_seen": 2569011200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022363986519017816, + "loss": 0.069, + "theoretical_loss": 3.38967233784364, + "tokens_seen": 2569273344 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022355962124859574, + "loss": 0.0636, + "theoretical_loss": 3.3896451335721727, + "tokens_seen": 2569535488 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002234793773070133, + "loss": 0.0646, + "theoretical_loss": 3.3896179328529588, + "tokens_seen": 2569797632 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022339913336543092, + "loss": 0.0665, + "theoretical_loss": 3.3895907356851733, + "tokens_seen": 2570059776 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002233188894238485, + "loss": 0.0652, + "theoretical_loss": 3.38956354206799, + "tokens_seen": 2570321920 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022323864548226611, + "loss": 0.0667, + "theoretical_loss": 3.389536352000583, + "tokens_seen": 2570584064 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022315840154068367, + "loss": 0.0686, + "theoretical_loss": 3.389509165482127, + "tokens_seen": 2570846208 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022307815759910128, + "loss": 0.0663, + "theoretical_loss": 3.389481982511797, + "tokens_seen": 2571108352 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022299791365751887, + "loss": 0.0669, + "theoretical_loss": 3.3894548030887686, + "tokens_seen": 2571370496 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022291766971593643, + "loss": 0.0644, + "theoretical_loss": 3.3894276272122172, + "tokens_seen": 2571632640 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022283742577435404, + "loss": 0.0641, + "theoretical_loss": 3.389400454881318, + "tokens_seen": 2571894784 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022275718183277163, + "loss": 0.0669, + "theoretical_loss": 3.389373286095248, + "tokens_seen": 2572156928 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.0004840648907702416, + "objective/train/docs_used": 935480, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2403749227523804, + "objective/train/original_loss": 1.24037504196167, + "objective/train/theoretical_loss": 3.3893597030312663, + "objective/train/tokens_used": 2592748000, + "objective/train/value_avg": -0.007007598876953125, + "objective/train/value_loss": 0.0001475304743507877, + "objective/train/value_max": -4.506111145019531e-05, + "objective/train/value_min": -0.305908203125, + "objective/train/value_reward_corr": 0.7053234026415041, + "objective/train/value_std": 0.0128021240234375, + "objective/train/weight_avg": 1.0005536079406738, + "objective/train/weighted_lm_loss": 1.2408409118652344, + "objective/train/weights_max": 1.186949610710144, + "objective/train/weights_min": 0.39292243123054504, + "theoretical_loss": 3.3893597030312663, + "tokens_seen": 2572288000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022267693789118924, + "loss": 0.0707, + "theoretical_loss": 3.389346120853183, + "tokens_seen": 2572419072 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002225966939496068, + "loss": 0.0681, + "theoretical_loss": 3.3893189591543, + "tokens_seen": 2572681216 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022251645000802438, + "loss": 0.0686, + "theoretical_loss": 3.3892918009977753, + "tokens_seen": 2572943360 + }, + { + "epoch": 0.78, + "learning_rate": 0.000222436206066442, + "loss": 0.0642, + "theoretical_loss": 3.3892646463827862, + "tokens_seen": 2573205504 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022235596212485958, + "loss": 0.0634, + "theoretical_loss": 3.3892374953085107, + "tokens_seen": 2573467648 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022227571818327717, + "loss": 0.0701, + "theoretical_loss": 3.389210347774126, + "tokens_seen": 2573729792 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022219547424169475, + "loss": 0.0646, + "theoretical_loss": 3.3891832037788103, + "tokens_seen": 2573991936 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022211523030011236, + "loss": 0.0631, + "theoretical_loss": 3.389156063321742, + "tokens_seen": 2574254080 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022203498635852992, + "loss": 0.065, + "theoretical_loss": 3.3891289264020994, + "tokens_seen": 2574516224 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002219547424169475, + "loss": 0.066, + "theoretical_loss": 3.3891017930190612, + "tokens_seen": 2574778368 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022187449847536512, + "loss": 0.0673, + "theoretical_loss": 3.3890746631718067, + "tokens_seen": 2575040512 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002217942545337827, + "loss": 0.0611, + "theoretical_loss": 3.3890475368595157, + "tokens_seen": 2575302656 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.000912030111066997, + "objective/train/docs_used": 936685, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3602075576782227, + "objective/train/original_loss": 1.360207438468933, + "objective/train/theoretical_loss": 3.389020414081367, + "objective/train/tokens_used": 2596024800, + "objective/train/value_avg": -0.00836181640625, + "objective/train/value_loss": 0.0001833103597164154, + "objective/train/value_max": -6.759166717529297e-05, + "objective/train/value_min": -0.2305908203125, + "objective/train/value_reward_corr": 0.6511028326974286, + "objective/train/value_std": 0.01253509521484375, + "objective/train/weight_avg": 1.0009958744049072, + "objective/train/weighted_lm_loss": 1.3608472347259521, + "objective/train/weights_max": 1.1629977226257324, + "objective/train/weights_min": 0.3708355128765106, + "theoretical_loss": 3.389020414081367, + "tokens_seen": 2575564800 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002217140105922003, + "loss": 0.0665, + "theoretical_loss": 3.389020414081367, + "tokens_seen": 2575564800 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022163376665061788, + "loss": 0.066, + "theoretical_loss": 3.3889932948365407, + "tokens_seen": 2575826944 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022155352270903546, + "loss": 0.0632, + "theoretical_loss": 3.3889661791242176, + "tokens_seen": 2576089088 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022147327876745308, + "loss": 0.0678, + "theoretical_loss": 3.3889390669435775, + "tokens_seen": 2576351232 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022139303482587063, + "loss": 0.0681, + "theoretical_loss": 3.388911958293802, + "tokens_seen": 2576613376 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022131279088428825, + "loss": 0.0615, + "theoretical_loss": 3.388884853174071, + "tokens_seen": 2576875520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022123254694270583, + "loss": 0.0688, + "theoretical_loss": 3.3888577515835663, + "tokens_seen": 2577137664 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022115230300112342, + "loss": 0.0615, + "theoretical_loss": 3.3888306535214694, + "tokens_seen": 2577399808 + }, + { + "epoch": 0.78, + "learning_rate": 0.000221072059059541, + "loss": 0.0657, + "theoretical_loss": 3.3888035589869627, + "tokens_seen": 2577661952 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002209918151179586, + "loss": 0.0653, + "theoretical_loss": 3.3887764679792274, + "tokens_seen": 2577924096 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002209115711763762, + "loss": 0.0656, + "theoretical_loss": 3.3887493804974462, + "tokens_seen": 2578186240 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022083132723479376, + "loss": 0.0691, + "theoretical_loss": 3.388722296540802, + "tokens_seen": 2578448384 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022075108329321137, + "loss": 0.0673, + "theoretical_loss": 3.3886952161084776, + "tokens_seen": 2578710528 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.00034838804276660085, + "objective/train/docs_used": 937774, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.201945185661316, + "objective/train/original_loss": 1.2019450664520264, + "objective/train/theoretical_loss": 3.38868167721368, + "objective/train/tokens_used": 2599301600, + "objective/train/value_avg": -0.00756072998046875, + "objective/train/value_loss": 0.00030759855872020125, + "objective/train/value_max": -3.218650817871094e-05, + "objective/train/value_min": -0.5537109375, + "objective/train/value_reward_corr": 0.70862187347612, + "objective/train/value_std": 0.01560211181640625, + "objective/train/weight_avg": 1.0004799365997314, + "objective/train/weighted_lm_loss": 1.2030587196350098, + "objective/train/weights_max": 1.176781177520752, + "objective/train/weights_min": 0.3896505832672119, + "theoretical_loss": 3.38868167721368, + "tokens_seen": 2578841600 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022067083935162896, + "loss": 0.065, + "theoretical_loss": 3.3886681391996563, + "tokens_seen": 2578972672 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022059059541004654, + "loss": 0.0677, + "theoretical_loss": 3.3886410658135206, + "tokens_seen": 2579234816 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022051035146846413, + "loss": 0.068, + "theoretical_loss": 3.388613995949256, + "tokens_seen": 2579496960 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022043010752688171, + "loss": 0.0675, + "theoretical_loss": 3.388586929606045, + "tokens_seen": 2579759104 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022034986358529933, + "loss": 0.0643, + "theoretical_loss": 3.3885598667830727, + "tokens_seen": 2580021248 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002202696196437169, + "loss": 0.0619, + "theoretical_loss": 3.3885328074795233, + "tokens_seen": 2580283392 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002201893757021345, + "loss": 0.0647, + "theoretical_loss": 3.3885057516945816, + "tokens_seen": 2580545536 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022010913176055208, + "loss": 0.0673, + "theoretical_loss": 3.388478699427433, + "tokens_seen": 2580807680 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022002888781896967, + "loss": 0.0639, + "theoretical_loss": 3.388451650677262, + "tokens_seen": 2581069824 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021994864387738725, + "loss": 0.0669, + "theoretical_loss": 3.388424605443256, + "tokens_seen": 2581331968 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021986839993580484, + "loss": 0.0655, + "theoretical_loss": 3.3883975637245993, + "tokens_seen": 2581594112 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021978815599422245, + "loss": 0.0639, + "theoretical_loss": 3.388370525520479, + "tokens_seen": 2581856256 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 1.741459891491104e-05, + "objective/train/docs_used": 938961, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.39012610912323, + "objective/train/original_loss": 1.3901259899139404, + "objective/train/theoretical_loss": 3.3883434908300805, + "objective/train/tokens_used": 2602578400, + "objective/train/value_avg": -0.01126861572265625, + "objective/train/value_loss": 0.0005996196996420622, + "objective/train/value_max": -3.975629806518555e-05, + "objective/train/value_min": -0.6435546875, + "objective/train/value_reward_corr": 0.7908500869694258, + "objective/train/value_std": 0.028778076171875, + "objective/train/weight_avg": 1.0002272129058838, + "objective/train/weighted_lm_loss": 1.3898470401763916, + "objective/train/weights_max": 1.3806082010269165, + "objective/train/weights_min": 0.05692806467413902, + "theoretical_loss": 3.3883434908300805, + "tokens_seen": 2582118400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021970791205264004, + "loss": 0.0654, + "theoretical_loss": 3.3883434908300805, + "tokens_seen": 2582118400 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002196276681110576, + "loss": 0.0675, + "theoretical_loss": 3.388316459652591, + "tokens_seen": 2582380544 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002195474241694752, + "loss": 0.0639, + "theoretical_loss": 3.3882894319871983, + "tokens_seen": 2582642688 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002194671802278928, + "loss": 0.0654, + "theoretical_loss": 3.388262407833089, + "tokens_seen": 2582904832 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002193869362863104, + "loss": 0.0673, + "theoretical_loss": 3.3882353871894506, + "tokens_seen": 2583166976 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021930669234472797, + "loss": 0.0657, + "theoretical_loss": 3.388208370055471, + "tokens_seen": 2583429120 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021922644840314558, + "loss": 0.065, + "theoretical_loss": 3.388181356430338, + "tokens_seen": 2583691264 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021914620446156316, + "loss": 0.0641, + "theoretical_loss": 3.3881543463132404, + "tokens_seen": 2583953408 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021906596051998072, + "loss": 0.0644, + "theoretical_loss": 3.3881273397033667, + "tokens_seen": 2584215552 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021898571657839834, + "loss": 0.0657, + "theoretical_loss": 3.388100336599906, + "tokens_seen": 2584477696 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021890547263681592, + "loss": 0.0649, + "theoretical_loss": 3.388073337002047, + "tokens_seen": 2584739840 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021882522869523353, + "loss": 0.065, + "theoretical_loss": 3.388046340908979, + "tokens_seen": 2585001984 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002187449847536511, + "loss": 0.0653, + "theoretical_loss": 3.388019348319892, + "tokens_seen": 2585264128 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": 0.0002849264128599316, + "objective/train/docs_used": 939988, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.224392056465149, + "objective/train/original_loss": 1.224392056465149, + "objective/train/theoretical_loss": 3.3880058533390884, + "objective/train/tokens_used": 2605855200, + "objective/train/value_avg": -0.006206512451171875, + "objective/train/value_loss": 0.0002516733657103032, + "objective/train/value_max": -3.2961368560791016e-05, + "objective/train/value_min": -0.38623046875, + "objective/train/value_reward_corr": 0.6863537063306748, + "objective/train/value_std": 0.01476287841796875, + "objective/train/weight_avg": 1.0003920793533325, + "objective/train/weighted_lm_loss": 1.2246785163879395, + "objective/train/weights_max": 1.432533860206604, + "objective/train/weights_min": 0.38275596499443054, + "theoretical_loss": 3.3880058533390884, + "tokens_seen": 2585395200 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002186647408120687, + "loss": 0.0624, + "theoretical_loss": 3.3879923592339765, + "tokens_seen": 2585526272 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002185844968704863, + "loss": 0.0641, + "theoretical_loss": 3.3879653736504216, + "tokens_seen": 2585788416 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021850425292890388, + "loss": 0.0626, + "theoretical_loss": 3.3879383915684187, + "tokens_seen": 2586050560 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021842400898732146, + "loss": 0.0696, + "theoretical_loss": 3.387911412987158, + "tokens_seen": 2586312704 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021834376504573905, + "loss": 0.0653, + "theoretical_loss": 3.3878844379058313, + "tokens_seen": 2586574848 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021826352110415666, + "loss": 0.0677, + "theoretical_loss": 3.387857466323629, + "tokens_seen": 2586836992 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021818327716257422, + "loss": 0.0676, + "theoretical_loss": 3.3878304982397434, + "tokens_seen": 2587099136 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002181030332209918, + "loss": 0.0646, + "theoretical_loss": 3.387803533653366, + "tokens_seen": 2587361280 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021802278927940942, + "loss": 0.0683, + "theoretical_loss": 3.3877765725636886, + "tokens_seen": 2587623424 + }, + { + "epoch": 0.78, + "learning_rate": 0.000217942545337827, + "loss": 0.0657, + "theoretical_loss": 3.3877496149699047, + "tokens_seen": 2587885568 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002178623013962446, + "loss": 0.0638, + "theoretical_loss": 3.387722660871206, + "tokens_seen": 2588147712 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021778205745466217, + "loss": 0.0662, + "theoretical_loss": 3.387695710266785, + "tokens_seen": 2588409856 + }, + { + "epoch": 0.78, + "objective/train/advantage_avg": -8.60083382576704e-05, + "objective/train/docs_used": 941211, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.307102918624878, + "objective/train/original_loss": 1.3071026802062988, + "objective/train/theoretical_loss": 3.387668763155836, + "objective/train/tokens_used": 2609132000, + "objective/train/value_avg": -0.007694244384765625, + "objective/train/value_loss": 0.00019316507678013295, + "objective/train/value_max": -4.7206878662109375e-05, + "objective/train/value_min": -0.380126953125, + "objective/train/value_reward_corr": 0.6662835411165384, + "objective/train/value_std": 0.0128936767578125, + "objective/train/weight_avg": 1.000002145767212, + "objective/train/weighted_lm_loss": 1.3065834045410156, + "objective/train/weights_max": 1.1861990690231323, + "objective/train/weights_min": 0.3745060861110687, + "theoretical_loss": 3.387668763155836, + "tokens_seen": 2588672000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021770181351307978, + "loss": 0.0681, + "theoretical_loss": 3.387668763155836, + "tokens_seen": 2588672000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021762156957149737, + "loss": 0.066, + "theoretical_loss": 3.387641819537552, + "tokens_seen": 2588934144 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021754132562991493, + "loss": 0.0682, + "theoretical_loss": 3.3876148794111267, + "tokens_seen": 2589196288 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021746108168833254, + "loss": 0.064, + "theoretical_loss": 3.3875879427757543, + "tokens_seen": 2589458432 + }, + { + "epoch": 0.78, + "learning_rate": 0.00021738083774675013, + "loss": 0.0653, + "theoretical_loss": 3.3875610096306286, + "tokens_seen": 2589720576 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002173005938051677, + "loss": 0.0659, + "theoretical_loss": 3.3875340799749445, + "tokens_seen": 2589982720 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002172203498635853, + "loss": 0.0612, + "theoretical_loss": 3.3875071538078965, + "tokens_seen": 2590244864 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021714010592200288, + "loss": 0.0623, + "theoretical_loss": 3.38748023112868, + "tokens_seen": 2590507008 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002170598619804205, + "loss": 0.0641, + "theoretical_loss": 3.3874533119364902, + "tokens_seen": 2590769152 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021697961803883805, + "loss": 0.0669, + "theoretical_loss": 3.3874263962305227, + "tokens_seen": 2591031296 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021689937409725567, + "loss": 0.0635, + "theoretical_loss": 3.3873994840099733, + "tokens_seen": 2591293440 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021681913015567325, + "loss": 0.0663, + "theoretical_loss": 3.3873725752740382, + "tokens_seen": 2591555584 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021673888621409087, + "loss": 0.0637, + "theoretical_loss": 3.387345670021914, + "tokens_seen": 2591817728 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.000786102085839957, + "objective/train/docs_used": 942495, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2474442720413208, + "objective/train/original_loss": 1.2474443912506104, + "objective/train/theoretical_loss": 3.3873322187020296, + "objective/train/tokens_used": 2612408800, + "objective/train/value_avg": -0.006381988525390625, + "objective/train/value_loss": 0.00012210979184601456, + "objective/train/value_max": -3.11732292175293e-05, + "objective/train/value_min": -0.37255859375, + "objective/train/value_reward_corr": 0.750205768261117, + "objective/train/value_std": 0.0131072998046875, + "objective/train/weight_avg": 1.000842809677124, + "objective/train/weighted_lm_loss": 1.248508095741272, + "objective/train/weights_max": 1.166409969329834, + "objective/train/weights_min": 0.3774433732032776, + "theoretical_loss": 3.3873322187020296, + "tokens_seen": 2591948800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021665864227250842, + "loss": 0.0657, + "theoretical_loss": 3.387318768252797, + "tokens_seen": 2592079872 + }, + { + "epoch": 0.79, + "learning_rate": 0.000216578398330926, + "loss": 0.0652, + "theoretical_loss": 3.387291869965884, + "tokens_seen": 2592342016 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021649815438934362, + "loss": 0.0676, + "theoretical_loss": 3.3872649751603725, + "tokens_seen": 2592604160 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021641791044776118, + "loss": 0.0663, + "theoretical_loss": 3.3872380838354603, + "tokens_seen": 2592866304 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002163376665061788, + "loss": 0.0618, + "theoretical_loss": 3.3872111959903446, + "tokens_seen": 2593128448 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021625742256459638, + "loss": 0.0633, + "theoretical_loss": 3.3871843116242237, + "tokens_seen": 2593390592 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021617717862301396, + "loss": 0.0654, + "theoretical_loss": 3.3871574307362957, + "tokens_seen": 2593652736 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021609693468143155, + "loss": 0.0651, + "theoretical_loss": 3.387130553325759, + "tokens_seen": 2593914880 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021601669073984913, + "loss": 0.0659, + "theoretical_loss": 3.3871036793918123, + "tokens_seen": 2594177024 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021593644679826675, + "loss": 0.0659, + "theoretical_loss": 3.3870768089336556, + "tokens_seen": 2594439168 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021585620285668433, + "loss": 0.0663, + "theoretical_loss": 3.3870499419504867, + "tokens_seen": 2594701312 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021577595891510192, + "loss": 0.0659, + "theoretical_loss": 3.3870230784415067, + "tokens_seen": 2594963456 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": -0.0007075011963024735, + "objective/train/docs_used": 943650, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3366597890853882, + "objective/train/original_loss": 1.3366597890853882, + "objective/train/theoretical_loss": 3.3869962184059146, + "objective/train/tokens_used": 2615685600, + "objective/train/value_avg": -0.00949859619140625, + "objective/train/value_loss": 0.0006140853511169553, + "objective/train/value_max": -1.9252300262451172e-05, + "objective/train/value_min": -0.75830078125, + "objective/train/value_reward_corr": 0.7244639267372566, + "objective/train/value_std": 0.0239410400390625, + "objective/train/weight_avg": 0.9995501637458801, + "objective/train/weighted_lm_loss": 1.3356695175170898, + "objective/train/weights_max": 1.4631335735321045, + "objective/train/weights_min": 0.23861059546470642, + "theoretical_loss": 3.3869962184059146, + "tokens_seen": 2595225600 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002156957149735195, + "loss": 0.0685, + "theoretical_loss": 3.3869962184059146, + "tokens_seen": 2595225600 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002156154710319371, + "loss": 0.0695, + "theoretical_loss": 3.386969361842911, + "tokens_seen": 2595487744 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002155352270903547, + "loss": 0.0671, + "theoretical_loss": 3.3869425087516953, + "tokens_seen": 2595749888 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021545498314877226, + "loss": 0.0673, + "theoretical_loss": 3.386915659131469, + "tokens_seen": 2596012032 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021537473920718987, + "loss": 0.0681, + "theoretical_loss": 3.386888812981433, + "tokens_seen": 2596274176 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021529449526560746, + "loss": 0.0642, + "theoretical_loss": 3.3868619703007883, + "tokens_seen": 2596536320 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021521425132402502, + "loss": 0.0641, + "theoretical_loss": 3.386835131088737, + "tokens_seen": 2596798464 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021513400738244263, + "loss": 0.0648, + "theoretical_loss": 3.386808295344479, + "tokens_seen": 2597060608 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021505376344086021, + "loss": 0.0654, + "theoretical_loss": 3.386781463067218, + "tokens_seen": 2597322752 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021497351949927783, + "loss": 0.0655, + "theoretical_loss": 3.386754634256156, + "tokens_seen": 2597584896 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021489327555769539, + "loss": 0.0644, + "theoretical_loss": 3.3867278089104946, + "tokens_seen": 2597847040 + }, + { + "epoch": 0.79, + "learning_rate": 0.000214813031616113, + "loss": 0.0661, + "theoretical_loss": 3.3867009870294376, + "tokens_seen": 2598109184 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021473278767453058, + "loss": 0.0686, + "theoretical_loss": 3.3866741686121875, + "tokens_seen": 2598371328 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": -0.00011098467803094536, + "objective/train/docs_used": 944930, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2284715175628662, + "objective/train/original_loss": 1.2284715175628662, + "objective/train/theoretical_loss": 3.3866607607022408, + "objective/train/tokens_used": 2618962400, + "objective/train/value_avg": -0.00836181640625, + "objective/train/value_loss": 0.0007939133793115616, + "objective/train/value_max": -3.916025161743164e-05, + "objective/train/value_min": -0.68017578125, + "objective/train/value_reward_corr": 0.5531434795339926, + "objective/train/value_std": 0.0196075439453125, + "objective/train/weight_avg": 1.0001929998397827, + "objective/train/weighted_lm_loss": 1.2284572124481201, + "objective/train/weights_max": 1.8591458797454834, + "objective/train/weights_min": 0.06395908445119858, + "theoretical_loss": 3.3866607607022408, + "tokens_seen": 2598502400 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021465254373294817, + "loss": 0.0646, + "theoretical_loss": 3.3866473536579473, + "tokens_seen": 2598633472 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021457229979136576, + "loss": 0.0684, + "theoretical_loss": 3.3866205421659217, + "tokens_seen": 2598895616 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021449205584978334, + "loss": 0.0674, + "theoretical_loss": 3.386593734135313, + "tokens_seen": 2599157760 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021441181190820095, + "loss": 0.0674, + "theoretical_loss": 3.3865669295653262, + "tokens_seen": 2599419904 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002143315679666185, + "loss": 0.0626, + "theoretical_loss": 3.3865401284551657, + "tokens_seen": 2599682048 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002142513240250361, + "loss": 0.0662, + "theoretical_loss": 3.3865133308040356, + "tokens_seen": 2599944192 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002141710800834537, + "loss": 0.0674, + "theoretical_loss": 3.3864865366111414, + "tokens_seen": 2600206336 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002140908361418713, + "loss": 0.0626, + "theoretical_loss": 3.3864597458756878, + "tokens_seen": 2600468480 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021401059220028888, + "loss": 0.0675, + "theoretical_loss": 3.38643295859688, + "tokens_seen": 2600730624 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021393034825870647, + "loss": 0.0695, + "theoretical_loss": 3.3864061747739242, + "tokens_seen": 2600992768 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021385010431712408, + "loss": 0.0696, + "theoretical_loss": 3.386379394406026, + "tokens_seen": 2601254912 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021376986037554166, + "loss": 0.0649, + "theoretical_loss": 3.3863526174923915, + "tokens_seen": 2601517056 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.000880547275301069, + "objective/train/docs_used": 946148, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.5105973482131958, + "objective/train/original_loss": 1.5105971097946167, + "objective/train/theoretical_loss": 3.3863258440322275, + "objective/train/tokens_used": 2622239200, + "objective/train/value_avg": -0.006900787353515625, + "objective/train/value_loss": 0.00013182000839151442, + "objective/train/value_max": -2.956390380859375e-05, + "objective/train/value_min": -0.343017578125, + "objective/train/value_reward_corr": 0.8042078950290413, + "objective/train/value_std": 0.01509857177734375, + "objective/train/weight_avg": 1.0009452104568481, + "objective/train/weighted_lm_loss": 1.5125845670700073, + "objective/train/weights_max": 1.2113949060440063, + "objective/train/weights_min": 0.6516265273094177, + "theoretical_loss": 3.3863258440322275, + "tokens_seen": 2601779200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021368961643395922, + "loss": 0.0669, + "theoretical_loss": 3.3863258440322275, + "tokens_seen": 2601779200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021360937249237684, + "loss": 0.0677, + "theoretical_loss": 3.3862990740247403, + "tokens_seen": 2602041344 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021352912855079442, + "loss": 0.0653, + "theoretical_loss": 3.3862723074691377, + "tokens_seen": 2602303488 + }, + { + "epoch": 0.79, + "learning_rate": 0.000213448884609212, + "loss": 0.0705, + "theoretical_loss": 3.3862455443646255, + "tokens_seen": 2602565632 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002133686406676296, + "loss": 0.0683, + "theoretical_loss": 3.3862187847104126, + "tokens_seen": 2602827776 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021328839672604718, + "loss": 0.0652, + "theoretical_loss": 3.3861920285057057, + "tokens_seen": 2603089920 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002132081527844648, + "loss": 0.0661, + "theoretical_loss": 3.386165275749714, + "tokens_seen": 2603352064 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021312790884288235, + "loss": 0.0652, + "theoretical_loss": 3.3861385264416444, + "tokens_seen": 2603614208 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021304766490129996, + "loss": 0.0661, + "theoretical_loss": 3.3861117805807064, + "tokens_seen": 2603876352 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021296742095971755, + "loss": 0.0663, + "theoretical_loss": 3.3860850381661085, + "tokens_seen": 2604138496 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021288717701813516, + "loss": 0.0674, + "theoretical_loss": 3.3860582991970594, + "tokens_seen": 2604400640 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021280693307655272, + "loss": 0.0668, + "theoretical_loss": 3.3860315636727694, + "tokens_seen": 2604662784 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002127266891349703, + "loss": 0.0667, + "theoretical_loss": 3.386004831592447, + "tokens_seen": 2604924928 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": -0.00013600836973637342, + "objective/train/docs_used": 947237, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.1746832132339478, + "objective/train/original_loss": 1.1746830940246582, + "objective/train/theoretical_loss": 3.385991466843527, + "objective/train/tokens_used": 2625516000, + "objective/train/value_avg": -0.00673675537109375, + "objective/train/value_loss": 0.00014905710122548044, + "objective/train/value_max": -3.534555435180664e-05, + "objective/train/value_min": -0.218994140625, + "objective/train/value_reward_corr": 0.7247144381162496, + "objective/train/value_std": 0.0120391845703125, + "objective/train/weight_avg": 0.9999330639839172, + "objective/train/weighted_lm_loss": 1.1741994619369507, + "objective/train/weights_max": 1.1198102235794067, + "objective/train/weights_min": 0.3691932260990143, + "theoretical_loss": 3.385991466843527, + "tokens_seen": 2605056000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021264644519338792, + "loss": 0.0638, + "theoretical_loss": 3.385978102955303, + "tokens_seen": 2605187072 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021256620125180547, + "loss": 0.0647, + "theoretical_loss": 3.3859513777605468, + "tokens_seen": 2605449216 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002124859573102231, + "loss": 0.065, + "theoretical_loss": 3.385924656007389, + "tokens_seen": 2605711360 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021240571336864067, + "loss": 0.0658, + "theoretical_loss": 3.38589793769504, + "tokens_seen": 2605973504 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021232546942705826, + "loss": 0.0668, + "theoretical_loss": 3.3858712228227117, + "tokens_seen": 2606235648 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021224522548547584, + "loss": 0.0632, + "theoretical_loss": 3.385844511389614, + "tokens_seen": 2606497792 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021216498154389343, + "loss": 0.0693, + "theoretical_loss": 3.385817803394959, + "tokens_seen": 2606759936 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021208473760231104, + "loss": 0.0651, + "theoretical_loss": 3.3857910988379576, + "tokens_seen": 2607022080 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021200449366072863, + "loss": 0.0654, + "theoretical_loss": 3.385764397717823, + "tokens_seen": 2607284224 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002119242497191462, + "loss": 0.0659, + "theoretical_loss": 3.3857377000337663, + "tokens_seen": 2607546368 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002118440057775638, + "loss": 0.0631, + "theoretical_loss": 3.3857110057850006, + "tokens_seen": 2607808512 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021176376183598138, + "loss": 0.0662, + "theoretical_loss": 3.3856843149707383, + "tokens_seen": 2608070656 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.001057982794009149, + "objective/train/docs_used": 948381, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.307920217514038, + "objective/train/original_loss": 1.307920217514038, + "objective/train/theoretical_loss": 3.3856576275901924, + "objective/train/tokens_used": 2628792800, + "objective/train/value_avg": -0.0081939697265625, + "objective/train/value_loss": 0.0002661271137185395, + "objective/train/value_max": -2.7120113372802734e-05, + "objective/train/value_min": -0.9365234375, + "objective/train/value_reward_corr": 0.7017694738472946, + "objective/train/value_std": 0.018798828125, + "objective/train/weight_avg": 1.001183032989502, + "objective/train/weighted_lm_loss": 1.3090044260025024, + "objective/train/weights_max": 1.9032342433929443, + "objective/train/weights_min": 0.37690240144729614, + "theoretical_loss": 3.3856576275901924, + "tokens_seen": 2608332800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021168351789439897, + "loss": 0.0635, + "theoretical_loss": 3.3856576275901924, + "tokens_seen": 2608332800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021160327395281655, + "loss": 0.0665, + "theoretical_loss": 3.385630943642576, + "tokens_seen": 2608594944 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021152303001123417, + "loss": 0.0646, + "theoretical_loss": 3.3856042631271026, + "tokens_seen": 2608857088 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021144278606965175, + "loss": 0.0659, + "theoretical_loss": 3.385577586042986, + "tokens_seen": 2609119232 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002113625421280693, + "loss": 0.0661, + "theoretical_loss": 3.385550912389441, + "tokens_seen": 2609381376 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021128229818648692, + "loss": 0.0671, + "theoretical_loss": 3.385524242165681, + "tokens_seen": 2609643520 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002112020542449045, + "loss": 0.0635, + "theoretical_loss": 3.3854975753709207, + "tokens_seen": 2609905664 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021112181030332212, + "loss": 0.0672, + "theoretical_loss": 3.3854709120043744, + "tokens_seen": 2610167808 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021104156636173968, + "loss": 0.0653, + "theoretical_loss": 3.3854442520652577, + "tokens_seen": 2610429952 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002109613224201573, + "loss": 0.0653, + "theoretical_loss": 3.385417595552786, + "tokens_seen": 2610692096 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021088107847857488, + "loss": 0.0674, + "theoretical_loss": 3.3853909424661746, + "tokens_seen": 2610954240 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021080083453699246, + "loss": 0.0675, + "theoretical_loss": 3.3853642928046397, + "tokens_seen": 2611216384 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021072059059541005, + "loss": 0.0668, + "theoretical_loss": 3.3853376465673968, + "tokens_seen": 2611478528 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.0015518314903602004, + "objective/train/docs_used": 949550, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.2171560525894165, + "objective/train/original_loss": 1.217156171798706, + "objective/train/theoretical_loss": 3.38532432473264, + "objective/train/tokens_used": 2632069600, + "objective/train/value_avg": -0.007183074951171875, + "objective/train/value_loss": 0.00020713287813123316, + "objective/train/value_max": -3.24249267578125e-05, + "objective/train/value_min": -0.37255859375, + "objective/train/value_reward_corr": 0.6996092584226039, + "objective/train/value_std": 0.01509857177734375, + "objective/train/weight_avg": 1.0016437768936157, + "objective/train/weighted_lm_loss": 1.218762993812561, + "objective/train/weights_max": 1.308711290359497, + "objective/train/weights_min": 0.37102511525154114, + "theoretical_loss": 3.38532432473264, + "tokens_seen": 2611609600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021064034665382763, + "loss": 0.07, + "theoretical_loss": 3.3853110037536625, + "tokens_seen": 2611740672 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021056010271224525, + "loss": 0.0681, + "theoretical_loss": 3.3852843643626533, + "tokens_seen": 2612002816 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002104798587706628, + "loss": 0.069, + "theoretical_loss": 3.3852577283935865, + "tokens_seen": 2612264960 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002103996148290804, + "loss": 0.0681, + "theoretical_loss": 3.385231095845678, + "tokens_seen": 2612527104 + }, + { + "epoch": 0.79, + "learning_rate": 0.000210319370887498, + "loss": 0.0624, + "theoretical_loss": 3.385204466718147, + "tokens_seen": 2612789248 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002102391269459156, + "loss": 0.0661, + "theoretical_loss": 3.3851778410102096, + "tokens_seen": 2613051392 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021015888300433317, + "loss": 0.0673, + "theoretical_loss": 3.3851512187210844, + "tokens_seen": 2613313536 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021007863906275076, + "loss": 0.066, + "theoretical_loss": 3.3851245998499895, + "tokens_seen": 2613575680 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020999839512116837, + "loss": 0.0669, + "theoretical_loss": 3.385097984396143, + "tokens_seen": 2613837824 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020991815117958596, + "loss": 0.0656, + "theoretical_loss": 3.3850713723587633, + "tokens_seen": 2614099968 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020983790723800352, + "loss": 0.065, + "theoretical_loss": 3.38504476373707, + "tokens_seen": 2614362112 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020975766329642113, + "loss": 0.0652, + "theoretical_loss": 3.3850181585302823, + "tokens_seen": 2614624256 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 2.870370735763572e-05, + "objective/train/docs_used": 950739, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3297094106674194, + "objective/train/original_loss": 1.3297092914581299, + "objective/train/theoretical_loss": 3.384991556737619, + "objective/train/tokens_used": 2635346400, + "objective/train/value_avg": -0.00719451904296875, + "objective/train/value_loss": 0.00031871211831457913, + "objective/train/value_max": -4.7206878662109375e-05, + "objective/train/value_min": -0.94091796875, + "objective/train/value_reward_corr": 0.7650113923665571, + "objective/train/value_std": 0.0180816650390625, + "objective/train/weight_avg": 1.0001702308654785, + "objective/train/weighted_lm_loss": 1.3302415609359741, + "objective/train/weights_max": 1.8773905038833618, + "objective/train/weights_min": 0.3694460988044739, + "theoretical_loss": 3.384991556737619, + "tokens_seen": 2614886400 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020967741935483871, + "loss": 0.0634, + "theoretical_loss": 3.384991556737619, + "tokens_seen": 2614886400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002095971754132563, + "loss": 0.0666, + "theoretical_loss": 3.3849649583583004, + "tokens_seen": 2615148544 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020951693147167389, + "loss": 0.0657, + "theoretical_loss": 3.3849383633915457, + "tokens_seen": 2615410688 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002094366875300915, + "loss": 0.065, + "theoretical_loss": 3.3849117718365758, + "tokens_seen": 2615672832 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020935644358850908, + "loss": 0.067, + "theoretical_loss": 3.38488518369261, + "tokens_seen": 2615934976 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020927619964692664, + "loss": 0.0653, + "theoretical_loss": 3.384858598958871, + "tokens_seen": 2616197120 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020919595570534426, + "loss": 0.0655, + "theoretical_loss": 3.384832017634578, + "tokens_seen": 2616459264 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020911571176376184, + "loss": 0.0651, + "theoretical_loss": 3.384805439718953, + "tokens_seen": 2616721408 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020903546782217945, + "loss": 0.0656, + "theoretical_loss": 3.384778865211217, + "tokens_seen": 2616983552 + }, + { + "epoch": 0.79, + "learning_rate": 0.000208955223880597, + "loss": 0.0661, + "theoretical_loss": 3.3847522941105925, + "tokens_seen": 2617245696 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002088749799390146, + "loss": 0.0643, + "theoretical_loss": 3.384725726416301, + "tokens_seen": 2617507840 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002087947359974322, + "loss": 0.0646, + "theoretical_loss": 3.384699162127564, + "tokens_seen": 2617769984 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020871449205584977, + "loss": 0.0673, + "theoretical_loss": 3.3846726012436057, + "tokens_seen": 2618032128 + }, + { + "epoch": 0.79, + "objective/train/advantage_avg": 0.00038211545324884355, + "objective/train/docs_used": 951960, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.40494704246521, + "objective/train/original_loss": 1.40494704246521, + "objective/train/theoretical_loss": 3.384659322078175, + "objective/train/tokens_used": 2638623200, + "objective/train/value_avg": -0.01016998291015625, + "objective/train/value_loss": 0.00022523697407450527, + "objective/train/value_max": -4.297494888305664e-05, + "objective/train/value_min": -0.306640625, + "objective/train/value_reward_corr": 0.7207486398264816, + "objective/train/value_std": 0.0157012939453125, + "objective/train/weight_avg": 1.0004863739013672, + "objective/train/weighted_lm_loss": 1.4053072929382324, + "objective/train/weights_max": 1.181889295578003, + "objective/train/weights_min": 0.38233569264411926, + "theoretical_loss": 3.384659322078175, + "tokens_seen": 2618163200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020863424811426738, + "loss": 0.0636, + "theoretical_loss": 3.3846460437636474, + "tokens_seen": 2618294272 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020855400417268497, + "loss": 0.0635, + "theoretical_loss": 3.3846194896869126, + "tokens_seen": 2618556416 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020847376023110258, + "loss": 0.0683, + "theoretical_loss": 3.3845929390126246, + "tokens_seen": 2618818560 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020839351628952014, + "loss": 0.0672, + "theoretical_loss": 3.384566391740007, + "tokens_seen": 2619080704 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020831327234793772, + "loss": 0.0674, + "theoretical_loss": 3.3845398478682833, + "tokens_seen": 2619342848 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020823302840635534, + "loss": 0.0637, + "theoretical_loss": 3.384513307396678, + "tokens_seen": 2619604992 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020815278446477292, + "loss": 0.0647, + "theoretical_loss": 3.384486770324415, + "tokens_seen": 2619867136 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002080725405231905, + "loss": 0.0657, + "theoretical_loss": 3.3844602366507184, + "tokens_seen": 2620129280 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002079922965816081, + "loss": 0.0667, + "theoretical_loss": 3.3844337063748138, + "tokens_seen": 2620391424 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020791205264002568, + "loss": 0.0683, + "theoretical_loss": 3.384407179495926, + "tokens_seen": 2620653568 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020783180869844326, + "loss": 0.0634, + "theoretical_loss": 3.38438065601328, + "tokens_seen": 2620915712 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020775156475686085, + "loss": 0.0627, + "theoretical_loss": 3.3843541359261016, + "tokens_seen": 2621177856 + }, + { + "debugging/Compilability": 1.0, + "debugging/distinct-1-grams": 0.7183788677974512, + "debugging/entropy-1-grams": 5.311092611595205, + "debugging/length": 475.35714285714283, + "debugging/num_segments": 14, + "debugging/raw_token_scores_avg": 0.007209094241261482, + "debugging/raw_token_scores_std": 0.01626458764076233, + "debugging/score": 0.005309415865714915, + "debugging/score_std": 0.0067471201632161425, + "epoch": 0.79, + "objective/train/advantage_avg": -0.00040069257374852896, + "objective/train/docs_used": 953088, + "objective/train/instantaneous_batch_size": 32, + "objective/train/instantaneous_microbatch_size": 32768, + "objective/train/lm_loss": 1.3306788206100464, + "objective/train/original_loss": 1.3306788206100464, + "objective/train/theoretical_loss": 3.3843276192336167, + "objective/train/tokens_used": 2641900000, + "objective/train/value_avg": -0.006809234619140625, + "objective/train/value_loss": 0.00010478802141733468, + "objective/train/value_max": -2.86102294921875e-05, + "objective/train/value_min": -0.298095703125, + "objective/train/value_reward_corr": 0.778743762677434, + "objective/train/value_std": 0.01197052001953125, + "objective/train/weight_avg": 0.9996510744094849, + "objective/train/weighted_lm_loss": 1.3306902647018433, + "objective/train/weights_max": 1.1428725719451904, + "objective/train/weights_min": 0.8198684453964233, + "theoretical_loss": 3.3843276192336167, + "tokens_seen": 2621440000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00020767132081527846, + "loss": 0.0664, + "theoretical_loss": 3.3843276192336167, + "tokens_seen": 2621440000 + } + ], + "max_steps": 12588, + "num_train_epochs": 9223372036854775807, + "total_flos": 1.33782728343552e+18, + "trial_name": null, + "trial_params": null +}