{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3639164690398064, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6e-08, "loss": 2.8922, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.6e-08, "loss": 2.6572, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.199999999999999e-08, "loss": 2.6816, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.32e-07, "loss": 2.6259, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.7199999999999998e-07, "loss": 2.4734, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.12e-07, "loss": 2.4194, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.52e-07, "loss": 2.3645, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.9199999999999997e-07, "loss": 2.287, "step": 80 }, { "epoch": 0.02, "learning_rate": 3.32e-07, "loss": 2.2451, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.72e-07, "loss": 2.1339, "step": 100 }, { "epoch": 0.02, "learning_rate": 4.12e-07, "loss": 2.0578, "step": 110 }, { "epoch": 0.02, "learning_rate": 4.5199999999999997e-07, "loss": 2.0854, "step": 120 }, { "epoch": 0.02, "learning_rate": 4.92e-07, "loss": 1.9981, "step": 130 }, { "epoch": 0.03, "learning_rate": 5.32e-07, "loss": 1.927, "step": 140 }, { "epoch": 0.03, "learning_rate": 5.719999999999999e-07, "loss": 1.8661, "step": 150 }, { "epoch": 0.03, "learning_rate": 6.119999999999999e-07, "loss": 1.8546, "step": 160 }, { "epoch": 0.03, "learning_rate": 6.52e-07, "loss": 1.7949, "step": 170 }, { "epoch": 0.03, "learning_rate": 6.919999999999999e-07, "loss": 1.7926, "step": 180 }, { "epoch": 0.03, "learning_rate": 7.319999999999999e-07, "loss": 1.7737, "step": 190 }, { "epoch": 0.04, "learning_rate": 7.72e-07, "loss": 1.7178, "step": 200 }, { "epoch": 0.04, "learning_rate": 8.12e-07, "loss": 1.6872, "step": 210 }, { "epoch": 0.04, "learning_rate": 8.52e-07, "loss": 1.6504, "step": 220 }, { "epoch": 0.04, "learning_rate": 8.92e-07, "loss": 1.6955, "step": 230 }, { "epoch": 0.04, "learning_rate": 9.32e-07, "loss": 1.6822, "step": 240 }, { "epoch": 0.05, "learning_rate": 9.72e-07, "loss": 1.647, "step": 250 }, { "epoch": 0.05, "learning_rate": 1.012e-06, "loss": 1.6131, "step": 260 }, { "epoch": 0.05, "learning_rate": 1.052e-06, "loss": 1.6433, "step": 270 }, { "epoch": 0.05, "learning_rate": 1.092e-06, "loss": 1.596, "step": 280 }, { "epoch": 0.05, "learning_rate": 1.132e-06, "loss": 1.5824, "step": 290 }, { "epoch": 0.05, "learning_rate": 1.172e-06, "loss": 1.5606, "step": 300 }, { "epoch": 0.06, "learning_rate": 1.212e-06, "loss": 1.5886, "step": 310 }, { "epoch": 0.06, "learning_rate": 1.252e-06, "loss": 1.5905, "step": 320 }, { "epoch": 0.06, "learning_rate": 1.292e-06, "loss": 1.5493, "step": 330 }, { "epoch": 0.06, "learning_rate": 1.332e-06, "loss": 1.5734, "step": 340 }, { "epoch": 0.06, "learning_rate": 1.372e-06, "loss": 1.5435, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.4119999999999998e-06, "loss": 1.5359, "step": 360 }, { "epoch": 0.07, "learning_rate": 1.4519999999999998e-06, "loss": 1.5579, "step": 370 }, { "epoch": 0.07, "learning_rate": 1.4919999999999999e-06, "loss": 1.5358, "step": 380 }, { "epoch": 0.07, "learning_rate": 1.532e-06, "loss": 1.5509, "step": 390 }, { "epoch": 0.07, "learning_rate": 1.572e-06, "loss": 1.5116, "step": 400 }, { "epoch": 0.07, "learning_rate": 1.612e-06, "loss": 1.5251, "step": 410 }, { "epoch": 0.08, "learning_rate": 1.6519999999999998e-06, "loss": 1.5107, "step": 420 }, { "epoch": 0.08, "learning_rate": 1.6919999999999999e-06, "loss": 1.4805, "step": 430 }, { "epoch": 0.08, "learning_rate": 1.7319999999999999e-06, "loss": 1.4819, "step": 440 }, { "epoch": 0.08, "learning_rate": 1.772e-06, "loss": 1.4897, "step": 450 }, { "epoch": 0.08, "learning_rate": 1.812e-06, "loss": 1.4824, "step": 460 }, { "epoch": 0.09, "learning_rate": 1.852e-06, "loss": 1.4822, "step": 470 }, { "epoch": 0.09, "learning_rate": 1.8919999999999998e-06, "loss": 1.5, "step": 480 }, { "epoch": 0.09, "learning_rate": 1.932e-06, "loss": 1.4595, "step": 490 }, { "epoch": 0.09, "learning_rate": 1.972e-06, "loss": 1.4163, "step": 500 }, { "epoch": 0.09, "eval_wmt2019_zh-en_accuracy": 0.5762586867111048, "eval_wmt2019_zh-en_loss": 2.037109375, "eval_wmt2019_zh-en_runtime": 77.5891, "eval_wmt2019_zh-en_samples_per_second": 12.888, "eval_wmt2019_zh-en_steps_per_second": 3.222, "step": 500 }, { "epoch": 0.09, "eval_ted_trans_en-ja_accuracy": 0.49833809864188705, "eval_ted_trans_en-ja_loss": 2.359375, "eval_ted_trans_en-ja_runtime": 67.0162, "eval_ted_trans_en-ja_samples_per_second": 11.952, "eval_ted_trans_en-ja_steps_per_second": 2.999, "step": 500 }, { "epoch": 0.09, "eval_ted_trans_zh-ja_accuracy": 0.41924741924741926, "eval_ted_trans_zh-ja_loss": 3.099609375, "eval_ted_trans_zh-ja_runtime": 4.122, "eval_ted_trans_zh-ja_samples_per_second": 10.189, "eval_ted_trans_zh-ja_steps_per_second": 2.669, "step": 500 }, { "epoch": 0.09, "eval_sharegpt_accuracy": 0.6638255086604158, "eval_sharegpt_loss": 1.4677734375, "eval_sharegpt_runtime": 754.4011, "eval_sharegpt_samples_per_second": 4.438, "eval_sharegpt_steps_per_second": 1.109, "step": 500 }, { "epoch": 0.09, "eval_dolly15k_accuracy": 0.5514953610117659, "eval_dolly15k_loss": 1.9169921875, "eval_dolly15k_runtime": 63.6309, "eval_dolly15k_samples_per_second": 11.802, "eval_dolly15k_steps_per_second": 2.955, "step": 500 }, { "epoch": 0.09, "eval_ikala_accuracy": 0.6825314578991759, "eval_ikala_loss": 1.2626953125, "eval_ikala_runtime": 1328.8914, "eval_ikala_samples_per_second": 10.328, "eval_ikala_steps_per_second": 2.583, "step": 500 }, { "epoch": 0.09, "eval_oasst_export_accuracy": 0.5725428280687767, "eval_oasst_export_loss": 2.181640625, "eval_oasst_export_runtime": 206.1134, "eval_oasst_export_samples_per_second": 10.184, "eval_oasst_export_steps_per_second": 2.547, "step": 500 }, { "epoch": 0.09, "learning_rate": 2.012e-06, "loss": 1.4116, "step": 510 }, { "epoch": 0.09, "learning_rate": 2.052e-06, "loss": 1.4304, "step": 520 }, { "epoch": 0.1, "learning_rate": 2.092e-06, "loss": 1.4102, "step": 530 }, { "epoch": 0.1, "learning_rate": 2.132e-06, "loss": 1.3838, "step": 540 }, { "epoch": 0.1, "learning_rate": 2.172e-06, "loss": 1.4179, "step": 550 }, { "epoch": 0.1, "learning_rate": 2.212e-06, "loss": 1.4286, "step": 560 }, { "epoch": 0.1, "learning_rate": 2.2519999999999998e-06, "loss": 1.426, "step": 570 }, { "epoch": 0.11, "learning_rate": 2.292e-06, "loss": 1.4257, "step": 580 }, { "epoch": 0.11, "learning_rate": 2.332e-06, "loss": 1.4342, "step": 590 }, { "epoch": 0.11, "learning_rate": 2.372e-06, "loss": 1.4015, "step": 600 }, { "epoch": 0.11, "learning_rate": 2.412e-06, "loss": 1.3999, "step": 610 }, { "epoch": 0.11, "learning_rate": 2.452e-06, "loss": 1.4062, "step": 620 }, { "epoch": 0.11, "learning_rate": 2.492e-06, "loss": 1.427, "step": 630 }, { "epoch": 0.12, "learning_rate": 2.532e-06, "loss": 1.4171, "step": 640 }, { "epoch": 0.12, "learning_rate": 2.572e-06, "loss": 1.4221, "step": 650 }, { "epoch": 0.12, "learning_rate": 2.612e-06, "loss": 1.3547, "step": 660 }, { "epoch": 0.12, "learning_rate": 2.652e-06, "loss": 1.3783, "step": 670 }, { "epoch": 0.12, "learning_rate": 2.6920000000000002e-06, "loss": 1.3574, "step": 680 }, { "epoch": 0.13, "learning_rate": 2.7320000000000003e-06, "loss": 1.3604, "step": 690 }, { "epoch": 0.13, "learning_rate": 2.7719999999999995e-06, "loss": 1.3978, "step": 700 }, { "epoch": 0.13, "learning_rate": 2.8119999999999995e-06, "loss": 1.3879, "step": 710 }, { "epoch": 0.13, "learning_rate": 2.8519999999999995e-06, "loss": 1.3731, "step": 720 }, { "epoch": 0.13, "learning_rate": 2.8919999999999996e-06, "loss": 1.3732, "step": 730 }, { "epoch": 0.13, "learning_rate": 2.9319999999999996e-06, "loss": 1.3787, "step": 740 }, { "epoch": 0.14, "learning_rate": 2.9719999999999997e-06, "loss": 1.3842, "step": 750 }, { "epoch": 0.14, "learning_rate": 3.0119999999999997e-06, "loss": 1.3952, "step": 760 }, { "epoch": 0.14, "learning_rate": 3.0519999999999997e-06, "loss": 1.3117, "step": 770 }, { "epoch": 0.14, "learning_rate": 3.0919999999999998e-06, "loss": 1.3531, "step": 780 }, { "epoch": 0.14, "learning_rate": 3.132e-06, "loss": 1.3287, "step": 790 }, { "epoch": 0.15, "learning_rate": 3.172e-06, "loss": 1.3417, "step": 800 }, { "epoch": 0.15, "learning_rate": 3.212e-06, "loss": 1.3341, "step": 810 }, { "epoch": 0.15, "learning_rate": 3.2519999999999995e-06, "loss": 1.364, "step": 820 }, { "epoch": 0.15, "learning_rate": 3.2919999999999996e-06, "loss": 1.3201, "step": 830 }, { "epoch": 0.15, "learning_rate": 3.3319999999999996e-06, "loss": 1.3714, "step": 840 }, { "epoch": 0.15, "learning_rate": 3.3719999999999996e-06, "loss": 1.3098, "step": 850 }, { "epoch": 0.16, "learning_rate": 3.4119999999999997e-06, "loss": 1.3222, "step": 860 }, { "epoch": 0.16, "learning_rate": 3.4519999999999997e-06, "loss": 1.3451, "step": 870 }, { "epoch": 0.16, "learning_rate": 3.4919999999999998e-06, "loss": 1.3502, "step": 880 }, { "epoch": 0.16, "learning_rate": 3.532e-06, "loss": 1.3445, "step": 890 }, { "epoch": 0.16, "learning_rate": 3.572e-06, "loss": 1.3304, "step": 900 }, { "epoch": 0.17, "learning_rate": 3.612e-06, "loss": 1.3081, "step": 910 }, { "epoch": 0.17, "learning_rate": 3.652e-06, "loss": 1.3106, "step": 920 }, { "epoch": 0.17, "learning_rate": 3.692e-06, "loss": 1.3357, "step": 930 }, { "epoch": 0.17, "learning_rate": 3.732e-06, "loss": 1.3243, "step": 940 }, { "epoch": 0.17, "learning_rate": 3.7719999999999996e-06, "loss": 1.3164, "step": 950 }, { "epoch": 0.17, "learning_rate": 3.8119999999999997e-06, "loss": 1.3124, "step": 960 }, { "epoch": 0.18, "learning_rate": 3.852e-06, "loss": 1.3162, "step": 970 }, { "epoch": 0.18, "learning_rate": 3.891999999999999e-06, "loss": 1.3124, "step": 980 }, { "epoch": 0.18, "learning_rate": 3.932e-06, "loss": 1.3347, "step": 990 }, { "epoch": 0.18, "learning_rate": 3.971999999999999e-06, "loss": 1.3174, "step": 1000 }, { "epoch": 0.18, "eval_wmt2019_zh-en_accuracy": 0.5819691791089524, "eval_wmt2019_zh-en_loss": 1.9326171875, "eval_wmt2019_zh-en_runtime": 76.631, "eval_wmt2019_zh-en_samples_per_second": 13.05, "eval_wmt2019_zh-en_steps_per_second": 3.262, "step": 1000 }, { "epoch": 0.18, "eval_ted_trans_en-ja_accuracy": 0.5248655214298109, "eval_ted_trans_en-ja_loss": 2.15625, "eval_ted_trans_en-ja_runtime": 67.1457, "eval_ted_trans_en-ja_samples_per_second": 11.929, "eval_ted_trans_en-ja_steps_per_second": 2.993, "step": 1000 }, { "epoch": 0.18, "eval_ted_trans_zh-ja_accuracy": 0.46262002743484226, "eval_ted_trans_zh-ja_loss": 2.8671875, "eval_ted_trans_zh-ja_runtime": 3.9271, "eval_ted_trans_zh-ja_samples_per_second": 10.695, "eval_ted_trans_zh-ja_steps_per_second": 2.801, "step": 1000 }, { "epoch": 0.18, "eval_sharegpt_accuracy": 0.6835767342429696, "eval_sharegpt_loss": 1.326171875, "eval_sharegpt_runtime": 751.6419, "eval_sharegpt_samples_per_second": 4.454, "eval_sharegpt_steps_per_second": 1.114, "step": 1000 }, { "epoch": 0.18, "eval_dolly15k_accuracy": 0.5566094783945817, "eval_dolly15k_loss": 1.84765625, "eval_dolly15k_runtime": 63.1655, "eval_dolly15k_samples_per_second": 11.889, "eval_dolly15k_steps_per_second": 2.976, "step": 1000 }, { "epoch": 0.18, "eval_ikala_accuracy": 0.6979014199391581, "eval_ikala_loss": 1.1513671875, "eval_ikala_runtime": 1328.524, "eval_ikala_samples_per_second": 10.331, "eval_ikala_steps_per_second": 2.583, "step": 1000 }, { "epoch": 0.18, "eval_oasst_export_accuracy": 0.5836786504830943, "eval_oasst_export_loss": 2.068359375, "eval_oasst_export_runtime": 206.6429, "eval_oasst_export_samples_per_second": 10.158, "eval_oasst_export_steps_per_second": 2.541, "step": 1000 }, { "epoch": 0.18, "learning_rate": 4.011999999999999e-06, "loss": 1.2867, "step": 1010 }, { "epoch": 0.19, "learning_rate": 4.0519999999999995e-06, "loss": 1.2933, "step": 1020 }, { "epoch": 0.19, "learning_rate": 4.091999999999999e-06, "loss": 1.2778, "step": 1030 }, { "epoch": 0.19, "learning_rate": 4.1319999999999996e-06, "loss": 1.3085, "step": 1040 }, { "epoch": 0.19, "learning_rate": 4.171999999999999e-06, "loss": 1.2772, "step": 1050 }, { "epoch": 0.19, "learning_rate": 4.212e-06, "loss": 1.3461, "step": 1060 }, { "epoch": 0.19, "learning_rate": 4.251999999999999e-06, "loss": 1.3247, "step": 1070 }, { "epoch": 0.2, "learning_rate": 4.292e-06, "loss": 1.2988, "step": 1080 }, { "epoch": 0.2, "learning_rate": 4.331999999999999e-06, "loss": 1.3175, "step": 1090 }, { "epoch": 0.2, "learning_rate": 4.372e-06, "loss": 1.3061, "step": 1100 }, { "epoch": 0.2, "learning_rate": 4.4119999999999994e-06, "loss": 1.3001, "step": 1110 }, { "epoch": 0.2, "learning_rate": 4.452e-06, "loss": 1.3325, "step": 1120 }, { "epoch": 0.21, "learning_rate": 4.4919999999999995e-06, "loss": 1.3177, "step": 1130 }, { "epoch": 0.21, "learning_rate": 4.532e-06, "loss": 1.3128, "step": 1140 }, { "epoch": 0.21, "learning_rate": 4.572e-06, "loss": 1.302, "step": 1150 }, { "epoch": 0.21, "learning_rate": 4.612e-06, "loss": 1.3233, "step": 1160 }, { "epoch": 0.21, "learning_rate": 4.652e-06, "loss": 1.3075, "step": 1170 }, { "epoch": 0.21, "learning_rate": 4.692e-06, "loss": 1.3044, "step": 1180 }, { "epoch": 0.22, "learning_rate": 4.732e-06, "loss": 1.2686, "step": 1190 }, { "epoch": 0.22, "learning_rate": 4.772e-06, "loss": 1.3169, "step": 1200 }, { "epoch": 0.22, "learning_rate": 4.812e-06, "loss": 1.3075, "step": 1210 }, { "epoch": 0.22, "learning_rate": 4.852e-06, "loss": 1.2911, "step": 1220 }, { "epoch": 0.22, "learning_rate": 4.892e-06, "loss": 1.289, "step": 1230 }, { "epoch": 0.23, "learning_rate": 4.932e-06, "loss": 1.2944, "step": 1240 }, { "epoch": 0.23, "learning_rate": 4.972e-06, "loss": 1.2753, "step": 1250 }, { "epoch": 0.23, "learning_rate": 5.012e-06, "loss": 1.2949, "step": 1260 }, { "epoch": 0.23, "learning_rate": 5.051999999999999e-06, "loss": 1.2816, "step": 1270 }, { "epoch": 0.23, "learning_rate": 5.092e-06, "loss": 1.3104, "step": 1280 }, { "epoch": 0.23, "learning_rate": 5.131999999999999e-06, "loss": 1.274, "step": 1290 }, { "epoch": 0.24, "learning_rate": 5.172e-06, "loss": 1.296, "step": 1300 }, { "epoch": 0.24, "learning_rate": 5.211999999999999e-06, "loss": 1.29, "step": 1310 }, { "epoch": 0.24, "learning_rate": 5.252e-06, "loss": 1.2643, "step": 1320 }, { "epoch": 0.24, "learning_rate": 5.2919999999999995e-06, "loss": 1.2882, "step": 1330 }, { "epoch": 0.24, "learning_rate": 5.332e-06, "loss": 1.295, "step": 1340 }, { "epoch": 0.25, "learning_rate": 5.3719999999999996e-06, "loss": 1.248, "step": 1350 }, { "epoch": 0.25, "learning_rate": 5.412e-06, "loss": 1.3236, "step": 1360 }, { "epoch": 0.25, "learning_rate": 5.452e-06, "loss": 1.2925, "step": 1370 }, { "epoch": 0.25, "learning_rate": 5.492e-06, "loss": 1.2991, "step": 1380 }, { "epoch": 0.25, "learning_rate": 5.532e-06, "loss": 1.2853, "step": 1390 }, { "epoch": 0.25, "learning_rate": 5.572e-06, "loss": 1.2835, "step": 1400 }, { "epoch": 0.26, "learning_rate": 5.612e-06, "loss": 1.2687, "step": 1410 }, { "epoch": 0.26, "learning_rate": 5.652e-06, "loss": 1.254, "step": 1420 }, { "epoch": 0.26, "learning_rate": 5.692e-06, "loss": 1.3045, "step": 1430 }, { "epoch": 0.26, "learning_rate": 5.732e-06, "loss": 1.2598, "step": 1440 }, { "epoch": 0.26, "learning_rate": 5.772e-06, "loss": 1.2628, "step": 1450 }, { "epoch": 0.27, "learning_rate": 5.8120000000000004e-06, "loss": 1.2519, "step": 1460 }, { "epoch": 0.27, "learning_rate": 5.852e-06, "loss": 1.2902, "step": 1470 }, { "epoch": 0.27, "learning_rate": 5.892e-06, "loss": 1.2999, "step": 1480 }, { "epoch": 0.27, "learning_rate": 5.932e-06, "loss": 1.283, "step": 1490 }, { "epoch": 0.27, "learning_rate": 5.972e-06, "loss": 1.2697, "step": 1500 }, { "epoch": 0.27, "eval_wmt2019_zh-en_accuracy": 0.587438249823571, "eval_wmt2019_zh-en_loss": 1.9453125, "eval_wmt2019_zh-en_runtime": 77.3466, "eval_wmt2019_zh-en_samples_per_second": 12.929, "eval_wmt2019_zh-en_steps_per_second": 3.232, "step": 1500 }, { "epoch": 0.27, "eval_ted_trans_en-ja_accuracy": 0.5393802273612374, "eval_ted_trans_en-ja_loss": 2.05078125, "eval_ted_trans_en-ja_runtime": 66.8272, "eval_ted_trans_en-ja_samples_per_second": 11.986, "eval_ted_trans_en-ja_steps_per_second": 3.008, "step": 1500 }, { "epoch": 0.27, "eval_ted_trans_zh-ja_accuracy": 0.46426092990978485, "eval_ted_trans_zh-ja_loss": 2.8203125, "eval_ted_trans_zh-ja_runtime": 3.6203, "eval_ted_trans_zh-ja_samples_per_second": 11.601, "eval_ted_trans_zh-ja_steps_per_second": 3.038, "step": 1500 }, { "epoch": 0.27, "eval_sharegpt_accuracy": 0.6912774549792565, "eval_sharegpt_loss": 1.2744140625, "eval_sharegpt_runtime": 755.8288, "eval_sharegpt_samples_per_second": 4.43, "eval_sharegpt_steps_per_second": 1.107, "step": 1500 }, { "epoch": 0.27, "eval_dolly15k_accuracy": 0.5594429758634393, "eval_dolly15k_loss": 1.8212890625, "eval_dolly15k_runtime": 63.0527, "eval_dolly15k_samples_per_second": 11.911, "eval_dolly15k_steps_per_second": 2.982, "step": 1500 }, { "epoch": 0.27, "eval_ikala_accuracy": 0.7042301907582296, "eval_ikala_loss": 1.115234375, "eval_ikala_runtime": 1329.6879, "eval_ikala_samples_per_second": 10.322, "eval_ikala_steps_per_second": 2.581, "step": 1500 }, { "epoch": 0.27, "eval_oasst_export_accuracy": 0.5868625620259539, "eval_oasst_export_loss": 2.0390625, "eval_oasst_export_runtime": 206.437, "eval_oasst_export_samples_per_second": 10.168, "eval_oasst_export_steps_per_second": 2.543, "step": 1500 }, { "epoch": 0.27, "learning_rate": 6.011999999999999e-06, "loss": 1.3179, "step": 1510 }, { "epoch": 0.28, "learning_rate": 6.051999999999999e-06, "loss": 1.2781, "step": 1520 }, { "epoch": 0.28, "learning_rate": 6.0919999999999994e-06, "loss": 1.2717, "step": 1530 }, { "epoch": 0.28, "learning_rate": 6.131999999999999e-06, "loss": 1.2661, "step": 1540 }, { "epoch": 0.28, "learning_rate": 6.1719999999999995e-06, "loss": 1.287, "step": 1550 }, { "epoch": 0.28, "learning_rate": 6.211999999999999e-06, "loss": 1.2784, "step": 1560 }, { "epoch": 0.29, "learning_rate": 6.252e-06, "loss": 1.2767, "step": 1570 }, { "epoch": 0.29, "learning_rate": 6.291999999999999e-06, "loss": 1.2657, "step": 1580 }, { "epoch": 0.29, "learning_rate": 6.332e-06, "loss": 1.2957, "step": 1590 }, { "epoch": 0.29, "learning_rate": 6.371999999999999e-06, "loss": 1.3181, "step": 1600 }, { "epoch": 0.29, "learning_rate": 6.412e-06, "loss": 1.2688, "step": 1610 }, { "epoch": 0.29, "learning_rate": 6.451999999999999e-06, "loss": 1.2598, "step": 1620 }, { "epoch": 0.3, "learning_rate": 6.492e-06, "loss": 1.2875, "step": 1630 }, { "epoch": 0.3, "learning_rate": 6.5319999999999995e-06, "loss": 1.2573, "step": 1640 }, { "epoch": 0.3, "learning_rate": 6.572e-06, "loss": 1.2698, "step": 1650 }, { "epoch": 0.3, "learning_rate": 6.6119999999999995e-06, "loss": 1.271, "step": 1660 }, { "epoch": 0.3, "learning_rate": 6.652e-06, "loss": 1.2724, "step": 1670 }, { "epoch": 0.31, "learning_rate": 6.692e-06, "loss": 1.2527, "step": 1680 }, { "epoch": 0.31, "learning_rate": 6.732e-06, "loss": 1.2659, "step": 1690 }, { "epoch": 0.31, "learning_rate": 6.772e-06, "loss": 1.2298, "step": 1700 }, { "epoch": 0.31, "learning_rate": 6.812e-06, "loss": 1.2217, "step": 1710 }, { "epoch": 0.31, "learning_rate": 6.852e-06, "loss": 1.2695, "step": 1720 }, { "epoch": 0.31, "learning_rate": 6.892e-06, "loss": 1.2339, "step": 1730 }, { "epoch": 0.32, "learning_rate": 6.932e-06, "loss": 1.2342, "step": 1740 }, { "epoch": 0.32, "learning_rate": 6.972e-06, "loss": 1.2652, "step": 1750 }, { "epoch": 0.32, "learning_rate": 7.011999999999999e-06, "loss": 1.2411, "step": 1760 }, { "epoch": 0.32, "learning_rate": 7.0519999999999996e-06, "loss": 1.2478, "step": 1770 }, { "epoch": 0.32, "learning_rate": 7.091999999999999e-06, "loss": 1.2379, "step": 1780 }, { "epoch": 0.33, "learning_rate": 7.132e-06, "loss": 1.2847, "step": 1790 }, { "epoch": 0.33, "learning_rate": 7.171999999999999e-06, "loss": 1.2378, "step": 1800 }, { "epoch": 0.33, "learning_rate": 7.212e-06, "loss": 1.2901, "step": 1810 }, { "epoch": 0.33, "learning_rate": 7.251999999999999e-06, "loss": 1.2662, "step": 1820 }, { "epoch": 0.33, "learning_rate": 7.292e-06, "loss": 1.2622, "step": 1830 }, { "epoch": 0.33, "learning_rate": 7.3319999999999994e-06, "loss": 1.2518, "step": 1840 }, { "epoch": 0.34, "learning_rate": 7.372e-06, "loss": 1.2648, "step": 1850 }, { "epoch": 0.34, "learning_rate": 7.4119999999999995e-06, "loss": 1.2582, "step": 1860 }, { "epoch": 0.34, "learning_rate": 7.452e-06, "loss": 1.2545, "step": 1870 }, { "epoch": 0.34, "learning_rate": 7.492e-06, "loss": 1.2372, "step": 1880 }, { "epoch": 0.34, "learning_rate": 7.532e-06, "loss": 1.2266, "step": 1890 }, { "epoch": 0.35, "learning_rate": 7.572e-06, "loss": 1.2509, "step": 1900 }, { "epoch": 0.35, "learning_rate": 7.612e-06, "loss": 1.2487, "step": 1910 }, { "epoch": 0.35, "learning_rate": 7.652e-06, "loss": 1.2968, "step": 1920 }, { "epoch": 0.35, "learning_rate": 7.692e-06, "loss": 1.2719, "step": 1930 }, { "epoch": 0.35, "learning_rate": 7.732e-06, "loss": 1.2537, "step": 1940 }, { "epoch": 0.35, "learning_rate": 7.772e-06, "loss": 1.2733, "step": 1950 }, { "epoch": 0.36, "learning_rate": 7.812e-06, "loss": 1.2663, "step": 1960 }, { "epoch": 0.36, "learning_rate": 7.852e-06, "loss": 1.2496, "step": 1970 }, { "epoch": 0.36, "learning_rate": 7.892e-06, "loss": 1.2542, "step": 1980 }, { "epoch": 0.36, "learning_rate": 7.932e-06, "loss": 1.2398, "step": 1990 }, { "epoch": 0.36, "learning_rate": 7.972e-06, "loss": 1.2564, "step": 2000 }, { "epoch": 0.36, "eval_wmt2019_zh-en_accuracy": 0.5861558940985061, "eval_wmt2019_zh-en_loss": 1.9638671875, "eval_wmt2019_zh-en_runtime": 76.6629, "eval_wmt2019_zh-en_samples_per_second": 13.044, "eval_wmt2019_zh-en_steps_per_second": 3.261, "step": 2000 }, { "epoch": 0.36, "eval_ted_trans_en-ja_accuracy": 0.5495979979766785, "eval_ted_trans_en-ja_loss": 2.001953125, "eval_ted_trans_en-ja_runtime": 66.9893, "eval_ted_trans_en-ja_samples_per_second": 11.957, "eval_ted_trans_en-ja_steps_per_second": 3.0, "step": 2000 }, { "epoch": 0.36, "eval_ted_trans_zh-ja_accuracy": 0.47411944869831546, "eval_ted_trans_zh-ja_loss": 2.7109375, "eval_ted_trans_zh-ja_runtime": 4.1683, "eval_ted_trans_zh-ja_samples_per_second": 10.076, "eval_ted_trans_zh-ja_steps_per_second": 2.639, "step": 2000 }, { "epoch": 0.36, "eval_sharegpt_accuracy": 0.6977260319800304, "eval_sharegpt_loss": 1.234375, "eval_sharegpt_runtime": 751.9973, "eval_sharegpt_samples_per_second": 4.452, "eval_sharegpt_steps_per_second": 1.113, "step": 2000 }, { "epoch": 0.36, "eval_dolly15k_accuracy": 0.5599267437239759, "eval_dolly15k_loss": 1.822265625, "eval_dolly15k_runtime": 63.1103, "eval_dolly15k_samples_per_second": 11.9, "eval_dolly15k_steps_per_second": 2.979, "step": 2000 }, { "epoch": 0.36, "eval_ikala_accuracy": 0.708770642571952, "eval_ikala_loss": 1.0927734375, "eval_ikala_runtime": 1329.0014, "eval_ikala_samples_per_second": 10.327, "eval_ikala_steps_per_second": 2.582, "step": 2000 }, { "epoch": 0.36, "eval_oasst_export_accuracy": 0.5890918246383349, "eval_oasst_export_loss": 2.033203125, "eval_oasst_export_runtime": 207.1646, "eval_oasst_export_samples_per_second": 10.132, "eval_oasst_export_steps_per_second": 2.534, "step": 2000 } ], "max_steps": 43960, "num_train_epochs": 8, "total_flos": 952516817190912.0, "trial_name": null, "trial_params": null }