redpajama-3b-chat / trainer_state.json
ray
[init] push weights
29f0794
raw
history blame
No virus
96.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8212414090449476,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.0000000000000004e-08,
"loss": 2.1648,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 7.500000000000001e-08,
"loss": 2.131,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.2500000000000002e-07,
"loss": 1.9325,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 1.7500000000000002e-07,
"loss": 1.8743,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 2.2500000000000002e-07,
"loss": 1.8232,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 2.75e-07,
"loss": 1.7315,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 3.25e-07,
"loss": 1.656,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 3.75e-07,
"loss": 1.6538,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 4.2500000000000006e-07,
"loss": 1.5483,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 4.7500000000000006e-07,
"loss": 1.5073,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 5.250000000000001e-07,
"loss": 1.501,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 5.750000000000001e-07,
"loss": 1.4804,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 6.25e-07,
"loss": 1.4357,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 6.750000000000001e-07,
"loss": 1.424,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 7.25e-07,
"loss": 1.4579,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 7.750000000000001e-07,
"loss": 1.4185,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 8.250000000000001e-07,
"loss": 1.4141,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 8.75e-07,
"loss": 1.4098,
"step": 180
},
{
"epoch": 0.03,
"learning_rate": 9.25e-07,
"loss": 1.4144,
"step": 190
},
{
"epoch": 0.03,
"learning_rate": 9.7e-07,
"loss": 1.3644,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 1.02e-06,
"loss": 1.3524,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 1.0700000000000001e-06,
"loss": 1.3403,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 1.12e-06,
"loss": 1.3355,
"step": 230
},
{
"epoch": 0.04,
"learning_rate": 1.1700000000000002e-06,
"loss": 1.3448,
"step": 240
},
{
"epoch": 0.04,
"learning_rate": 1.2200000000000002e-06,
"loss": 1.322,
"step": 250
},
{
"epoch": 0.04,
"learning_rate": 1.2700000000000001e-06,
"loss": 1.3186,
"step": 260
},
{
"epoch": 0.04,
"learning_rate": 1.32e-06,
"loss": 1.3038,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 1.3700000000000002e-06,
"loss": 1.2853,
"step": 280
},
{
"epoch": 0.05,
"learning_rate": 1.42e-06,
"loss": 1.2939,
"step": 290
},
{
"epoch": 0.05,
"learning_rate": 1.4700000000000001e-06,
"loss": 1.2918,
"step": 300
},
{
"epoch": 0.05,
"learning_rate": 1.52e-06,
"loss": 1.2976,
"step": 310
},
{
"epoch": 0.05,
"learning_rate": 1.5700000000000002e-06,
"loss": 1.3128,
"step": 320
},
{
"epoch": 0.05,
"learning_rate": 1.6200000000000002e-06,
"loss": 1.2433,
"step": 330
},
{
"epoch": 0.06,
"learning_rate": 1.6700000000000003e-06,
"loss": 1.2978,
"step": 340
},
{
"epoch": 0.06,
"learning_rate": 1.72e-06,
"loss": 1.2964,
"step": 350
},
{
"epoch": 0.06,
"learning_rate": 1.77e-06,
"loss": 1.2625,
"step": 360
},
{
"epoch": 0.06,
"learning_rate": 1.8200000000000002e-06,
"loss": 1.2837,
"step": 370
},
{
"epoch": 0.06,
"learning_rate": 1.87e-06,
"loss": 1.2995,
"step": 380
},
{
"epoch": 0.06,
"learning_rate": 1.9200000000000003e-06,
"loss": 1.2706,
"step": 390
},
{
"epoch": 0.07,
"learning_rate": 1.97e-06,
"loss": 1.2819,
"step": 400
},
{
"epoch": 0.07,
"learning_rate": 2.02e-06,
"loss": 1.2522,
"step": 410
},
{
"epoch": 0.07,
"learning_rate": 2.07e-06,
"loss": 1.2955,
"step": 420
},
{
"epoch": 0.07,
"learning_rate": 2.12e-06,
"loss": 1.2506,
"step": 430
},
{
"epoch": 0.07,
"learning_rate": 2.17e-06,
"loss": 1.249,
"step": 440
},
{
"epoch": 0.07,
"learning_rate": 2.2200000000000003e-06,
"loss": 1.2413,
"step": 450
},
{
"epoch": 0.08,
"learning_rate": 2.2700000000000003e-06,
"loss": 1.2463,
"step": 460
},
{
"epoch": 0.08,
"learning_rate": 2.3200000000000002e-06,
"loss": 1.288,
"step": 470
},
{
"epoch": 0.08,
"learning_rate": 2.37e-06,
"loss": 1.2531,
"step": 480
},
{
"epoch": 0.08,
"learning_rate": 2.42e-06,
"loss": 1.2314,
"step": 490
},
{
"epoch": 0.08,
"learning_rate": 2.47e-06,
"loss": 1.2369,
"step": 500
},
{
"epoch": 0.08,
"eval_multi_news_accuracy": 0.5592306537314586,
"eval_multi_news_loss": 1.919921875,
"eval_multi_news_runtime": 374.7444,
"eval_multi_news_samples_per_second": 15.002,
"eval_multi_news_steps_per_second": 1.876,
"step": 500
},
{
"epoch": 0.08,
"eval_samsum_accuracy": 0.630043040249728,
"eval_samsum_loss": 1.3271484375,
"eval_samsum_runtime": 37.5336,
"eval_samsum_samples_per_second": 21.794,
"eval_samsum_steps_per_second": 2.744,
"step": 500
},
{
"epoch": 0.08,
"eval_billsum_accuracy": 0.6415100921468554,
"eval_billsum_loss": 1.4970703125,
"eval_billsum_runtime": 204.4286,
"eval_billsum_samples_per_second": 15.991,
"eval_billsum_steps_per_second": 2.001,
"step": 500
},
{
"epoch": 0.08,
"eval_wmt2019_zh-en_accuracy": 0.5844479239374446,
"eval_wmt2019_zh-en_loss": 1.89453125,
"eval_wmt2019_zh-en_runtime": 43.2897,
"eval_wmt2019_zh-en_samples_per_second": 23.1,
"eval_wmt2019_zh-en_steps_per_second": 2.888,
"step": 500
},
{
"epoch": 0.08,
"eval_ted_trans_en-ja_accuracy": 0.5366497079329188,
"eval_ted_trans_en-ja_loss": 2.01953125,
"eval_ted_trans_en-ja_runtime": 36.4641,
"eval_ted_trans_en-ja_samples_per_second": 21.967,
"eval_ted_trans_en-ja_steps_per_second": 2.77,
"step": 500
},
{
"epoch": 0.08,
"eval_ted_trans_zh-ja_accuracy": 0.44175365344467643,
"eval_ted_trans_zh-ja_loss": 2.703125,
"eval_ted_trans_zh-ja_runtime": 2.2264,
"eval_ted_trans_zh-ja_samples_per_second": 18.864,
"eval_ted_trans_zh-ja_steps_per_second": 2.695,
"step": 500
},
{
"epoch": 0.08,
"eval_sharegpt_accuracy": 0.7056496488080175,
"eval_sharegpt_loss": 1.1474609375,
"eval_sharegpt_runtime": 735.7691,
"eval_sharegpt_samples_per_second": 4.55,
"eval_sharegpt_steps_per_second": 0.569,
"step": 500
},
{
"epoch": 0.08,
"eval_dolly15k_accuracy": 0.5961999725877193,
"eval_dolly15k_loss": 1.6650390625,
"eval_dolly15k_runtime": 33.9484,
"eval_dolly15k_samples_per_second": 22.122,
"eval_dolly15k_steps_per_second": 2.769,
"step": 500
},
{
"epoch": 0.08,
"eval_ikala_accuracy": 0.7306054447586751,
"eval_ikala_loss": 1.0380859375,
"eval_ikala_runtime": 887.5903,
"eval_ikala_samples_per_second": 16.005,
"eval_ikala_steps_per_second": 2.001,
"step": 500
},
{
"epoch": 0.08,
"eval_oasst_export_accuracy": 0.656822117898619,
"eval_oasst_export_loss": 1.60546875,
"eval_oasst_export_runtime": 134.1688,
"eval_oasst_export_samples_per_second": 15.644,
"eval_oasst_export_steps_per_second": 1.96,
"step": 500
},
{
"epoch": 0.08,
"eval_joke_accuracy": 0.48218347232752085,
"eval_joke_loss": 2.29296875,
"eval_joke_runtime": 3.5706,
"eval_joke_samples_per_second": 21.285,
"eval_joke_steps_per_second": 2.801,
"step": 500
},
{
"epoch": 0.08,
"eval_gsm8k_accuracy": 0.7402563310685608,
"eval_gsm8k_loss": 1.0068359375,
"eval_gsm8k_runtime": 56.8505,
"eval_gsm8k_samples_per_second": 23.201,
"eval_gsm8k_steps_per_second": 2.902,
"step": 500
},
{
"epoch": 0.08,
"eval_webgpt_accuracy": 0.4973525539337287,
"eval_webgpt_loss": 2.21484375,
"eval_webgpt_runtime": 155.091,
"eval_webgpt_samples_per_second": 22.974,
"eval_webgpt_steps_per_second": 2.876,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 2.52e-06,
"loss": 1.2409,
"step": 510
},
{
"epoch": 0.09,
"learning_rate": 2.5700000000000004e-06,
"loss": 1.2076,
"step": 520
},
{
"epoch": 0.09,
"learning_rate": 2.6200000000000003e-06,
"loss": 1.2425,
"step": 530
},
{
"epoch": 0.09,
"learning_rate": 2.6700000000000003e-06,
"loss": 1.267,
"step": 540
},
{
"epoch": 0.09,
"learning_rate": 2.7200000000000002e-06,
"loss": 1.238,
"step": 550
},
{
"epoch": 0.09,
"learning_rate": 2.7700000000000006e-06,
"loss": 1.2176,
"step": 560
},
{
"epoch": 0.09,
"learning_rate": 2.82e-06,
"loss": 1.2168,
"step": 570
},
{
"epoch": 0.1,
"learning_rate": 2.87e-06,
"loss": 1.2262,
"step": 580
},
{
"epoch": 0.1,
"learning_rate": 2.92e-06,
"loss": 1.2125,
"step": 590
},
{
"epoch": 0.1,
"learning_rate": 2.97e-06,
"loss": 1.2092,
"step": 600
},
{
"epoch": 0.1,
"learning_rate": 3.0200000000000003e-06,
"loss": 1.2521,
"step": 610
},
{
"epoch": 0.1,
"learning_rate": 3.0700000000000003e-06,
"loss": 1.2297,
"step": 620
},
{
"epoch": 0.1,
"learning_rate": 3.12e-06,
"loss": 1.2317,
"step": 630
},
{
"epoch": 0.11,
"learning_rate": 3.17e-06,
"loss": 1.2225,
"step": 640
},
{
"epoch": 0.11,
"learning_rate": 3.2200000000000005e-06,
"loss": 1.2227,
"step": 650
},
{
"epoch": 0.11,
"learning_rate": 3.2700000000000005e-06,
"loss": 1.2172,
"step": 660
},
{
"epoch": 0.11,
"learning_rate": 3.3200000000000004e-06,
"loss": 1.22,
"step": 670
},
{
"epoch": 0.11,
"learning_rate": 3.3700000000000003e-06,
"loss": 1.2164,
"step": 680
},
{
"epoch": 0.11,
"learning_rate": 3.4200000000000007e-06,
"loss": 1.2045,
"step": 690
},
{
"epoch": 0.11,
"learning_rate": 3.4700000000000002e-06,
"loss": 1.2334,
"step": 700
},
{
"epoch": 0.12,
"learning_rate": 3.52e-06,
"loss": 1.1979,
"step": 710
},
{
"epoch": 0.12,
"learning_rate": 3.57e-06,
"loss": 1.2066,
"step": 720
},
{
"epoch": 0.12,
"learning_rate": 3.62e-06,
"loss": 1.2153,
"step": 730
},
{
"epoch": 0.12,
"learning_rate": 3.6700000000000004e-06,
"loss": 1.2246,
"step": 740
},
{
"epoch": 0.12,
"learning_rate": 3.7200000000000004e-06,
"loss": 1.2027,
"step": 750
},
{
"epoch": 0.12,
"learning_rate": 3.7700000000000003e-06,
"loss": 1.233,
"step": 760
},
{
"epoch": 0.13,
"learning_rate": 3.820000000000001e-06,
"loss": 1.2156,
"step": 770
},
{
"epoch": 0.13,
"learning_rate": 3.87e-06,
"loss": 1.2067,
"step": 780
},
{
"epoch": 0.13,
"learning_rate": 3.920000000000001e-06,
"loss": 1.2077,
"step": 790
},
{
"epoch": 0.13,
"learning_rate": 3.97e-06,
"loss": 1.184,
"step": 800
},
{
"epoch": 0.13,
"learning_rate": 4.0200000000000005e-06,
"loss": 1.1747,
"step": 810
},
{
"epoch": 0.13,
"learning_rate": 4.07e-06,
"loss": 1.2055,
"step": 820
},
{
"epoch": 0.14,
"learning_rate": 4.12e-06,
"loss": 1.2137,
"step": 830
},
{
"epoch": 0.14,
"learning_rate": 4.17e-06,
"loss": 1.1934,
"step": 840
},
{
"epoch": 0.14,
"learning_rate": 4.22e-06,
"loss": 1.2154,
"step": 850
},
{
"epoch": 0.14,
"learning_rate": 4.270000000000001e-06,
"loss": 1.2216,
"step": 860
},
{
"epoch": 0.14,
"learning_rate": 4.32e-06,
"loss": 1.2002,
"step": 870
},
{
"epoch": 0.14,
"learning_rate": 4.3700000000000005e-06,
"loss": 1.1698,
"step": 880
},
{
"epoch": 0.15,
"learning_rate": 4.42e-06,
"loss": 1.2006,
"step": 890
},
{
"epoch": 0.15,
"learning_rate": 4.47e-06,
"loss": 1.1706,
"step": 900
},
{
"epoch": 0.15,
"learning_rate": 4.520000000000001e-06,
"loss": 1.1898,
"step": 910
},
{
"epoch": 0.15,
"learning_rate": 4.57e-06,
"loss": 1.1941,
"step": 920
},
{
"epoch": 0.15,
"learning_rate": 4.620000000000001e-06,
"loss": 1.1978,
"step": 930
},
{
"epoch": 0.15,
"learning_rate": 4.670000000000001e-06,
"loss": 1.1871,
"step": 940
},
{
"epoch": 0.16,
"learning_rate": 4.7200000000000005e-06,
"loss": 1.1673,
"step": 950
},
{
"epoch": 0.16,
"learning_rate": 4.77e-06,
"loss": 1.1938,
"step": 960
},
{
"epoch": 0.16,
"learning_rate": 4.8200000000000004e-06,
"loss": 1.1601,
"step": 970
},
{
"epoch": 0.16,
"learning_rate": 4.87e-06,
"loss": 1.1815,
"step": 980
},
{
"epoch": 0.16,
"learning_rate": 4.92e-06,
"loss": 1.1985,
"step": 990
},
{
"epoch": 0.16,
"learning_rate": 4.970000000000001e-06,
"loss": 1.1755,
"step": 1000
},
{
"epoch": 0.16,
"eval_multi_news_accuracy": 0.5616533126883595,
"eval_multi_news_loss": 1.9033203125,
"eval_multi_news_runtime": 374.666,
"eval_multi_news_samples_per_second": 15.005,
"eval_multi_news_steps_per_second": 1.876,
"step": 1000
},
{
"epoch": 0.16,
"eval_samsum_accuracy": 0.6358605685096722,
"eval_samsum_loss": 1.2763671875,
"eval_samsum_runtime": 36.4854,
"eval_samsum_samples_per_second": 22.42,
"eval_samsum_steps_per_second": 2.823,
"step": 1000
},
{
"epoch": 0.16,
"eval_billsum_accuracy": 0.645555269329641,
"eval_billsum_loss": 1.466796875,
"eval_billsum_runtime": 205.3486,
"eval_billsum_samples_per_second": 15.919,
"eval_billsum_steps_per_second": 1.992,
"step": 1000
},
{
"epoch": 0.16,
"eval_wmt2019_zh-en_accuracy": 0.5821662271706222,
"eval_wmt2019_zh-en_loss": 1.908203125,
"eval_wmt2019_zh-en_runtime": 42.6249,
"eval_wmt2019_zh-en_samples_per_second": 23.46,
"eval_wmt2019_zh-en_steps_per_second": 2.933,
"step": 1000
},
{
"epoch": 0.16,
"eval_ted_trans_en-ja_accuracy": 0.5513235961740165,
"eval_ted_trans_en-ja_loss": 1.9208984375,
"eval_ted_trans_en-ja_runtime": 35.6003,
"eval_ted_trans_en-ja_samples_per_second": 22.5,
"eval_ted_trans_en-ja_steps_per_second": 2.837,
"step": 1000
},
{
"epoch": 0.16,
"eval_ted_trans_zh-ja_accuracy": 0.4552332912988651,
"eval_ted_trans_zh-ja_loss": 2.595703125,
"eval_ted_trans_zh-ja_runtime": 2.6463,
"eval_ted_trans_zh-ja_samples_per_second": 15.871,
"eval_ted_trans_zh-ja_steps_per_second": 2.267,
"step": 1000
},
{
"epoch": 0.16,
"eval_sharegpt_accuracy": 0.7199542010473684,
"eval_sharegpt_loss": 1.0751953125,
"eval_sharegpt_runtime": 733.0519,
"eval_sharegpt_samples_per_second": 4.567,
"eval_sharegpt_steps_per_second": 0.572,
"step": 1000
},
{
"epoch": 0.16,
"eval_dolly15k_accuracy": 0.5963712993421053,
"eval_dolly15k_loss": 1.6484375,
"eval_dolly15k_runtime": 33.8269,
"eval_dolly15k_samples_per_second": 22.201,
"eval_dolly15k_steps_per_second": 2.779,
"step": 1000
},
{
"epoch": 0.16,
"eval_ikala_accuracy": 0.7374268761235112,
"eval_ikala_loss": 0.98876953125,
"eval_ikala_runtime": 886.0533,
"eval_ikala_samples_per_second": 16.033,
"eval_ikala_steps_per_second": 2.004,
"step": 1000
},
{
"epoch": 0.16,
"eval_oasst_export_accuracy": 0.6594323119298394,
"eval_oasst_export_loss": 1.580078125,
"eval_oasst_export_runtime": 134.3333,
"eval_oasst_export_samples_per_second": 15.625,
"eval_oasst_export_steps_per_second": 1.958,
"step": 1000
},
{
"epoch": 0.16,
"eval_joke_accuracy": 0.4916603487490523,
"eval_joke_loss": 2.20703125,
"eval_joke_runtime": 3.5959,
"eval_joke_samples_per_second": 21.135,
"eval_joke_steps_per_second": 2.781,
"step": 1000
},
{
"epoch": 0.16,
"eval_gsm8k_accuracy": 0.760284126003706,
"eval_gsm8k_loss": 0.89794921875,
"eval_gsm8k_runtime": 57.2198,
"eval_gsm8k_samples_per_second": 23.051,
"eval_gsm8k_steps_per_second": 2.884,
"step": 1000
},
{
"epoch": 0.16,
"eval_webgpt_accuracy": 0.4994055667344498,
"eval_webgpt_loss": 2.18359375,
"eval_webgpt_runtime": 155.137,
"eval_webgpt_samples_per_second": 22.967,
"eval_webgpt_steps_per_second": 2.875,
"step": 1000
},
{
"epoch": 0.17,
"learning_rate": 5.02e-06,
"loss": 1.1772,
"step": 1010
},
{
"epoch": 0.17,
"learning_rate": 5.070000000000001e-06,
"loss": 1.2069,
"step": 1020
},
{
"epoch": 0.17,
"learning_rate": 5.12e-06,
"loss": 1.1755,
"step": 1030
},
{
"epoch": 0.17,
"learning_rate": 5.1700000000000005e-06,
"loss": 1.1658,
"step": 1040
},
{
"epoch": 0.17,
"learning_rate": 5.220000000000001e-06,
"loss": 1.1896,
"step": 1050
},
{
"epoch": 0.17,
"learning_rate": 5.27e-06,
"loss": 1.1743,
"step": 1060
},
{
"epoch": 0.18,
"learning_rate": 5.320000000000001e-06,
"loss": 1.1444,
"step": 1070
},
{
"epoch": 0.18,
"learning_rate": 5.370000000000001e-06,
"loss": 1.1812,
"step": 1080
},
{
"epoch": 0.18,
"learning_rate": 5.420000000000001e-06,
"loss": 1.1549,
"step": 1090
},
{
"epoch": 0.18,
"learning_rate": 5.470000000000001e-06,
"loss": 1.1929,
"step": 1100
},
{
"epoch": 0.18,
"learning_rate": 5.5200000000000005e-06,
"loss": 1.1317,
"step": 1110
},
{
"epoch": 0.18,
"learning_rate": 5.570000000000001e-06,
"loss": 1.1531,
"step": 1120
},
{
"epoch": 0.19,
"learning_rate": 5.620000000000001e-06,
"loss": 1.1871,
"step": 1130
},
{
"epoch": 0.19,
"learning_rate": 5.67e-06,
"loss": 1.1507,
"step": 1140
},
{
"epoch": 0.19,
"learning_rate": 5.72e-06,
"loss": 1.1916,
"step": 1150
},
{
"epoch": 0.19,
"learning_rate": 5.77e-06,
"loss": 1.1532,
"step": 1160
},
{
"epoch": 0.19,
"learning_rate": 5.82e-06,
"loss": 1.1763,
"step": 1170
},
{
"epoch": 0.19,
"learning_rate": 5.8700000000000005e-06,
"loss": 1.1719,
"step": 1180
},
{
"epoch": 0.2,
"learning_rate": 5.92e-06,
"loss": 1.1784,
"step": 1190
},
{
"epoch": 0.2,
"learning_rate": 5.9700000000000004e-06,
"loss": 1.1597,
"step": 1200
},
{
"epoch": 0.2,
"learning_rate": 6.02e-06,
"loss": 1.1594,
"step": 1210
},
{
"epoch": 0.2,
"learning_rate": 6.07e-06,
"loss": 1.1769,
"step": 1220
},
{
"epoch": 0.2,
"learning_rate": 6.120000000000001e-06,
"loss": 1.1692,
"step": 1230
},
{
"epoch": 0.2,
"learning_rate": 6.17e-06,
"loss": 1.1327,
"step": 1240
},
{
"epoch": 0.21,
"learning_rate": 6.220000000000001e-06,
"loss": 1.1733,
"step": 1250
},
{
"epoch": 0.21,
"learning_rate": 6.27e-06,
"loss": 1.16,
"step": 1260
},
{
"epoch": 0.21,
"learning_rate": 6.3200000000000005e-06,
"loss": 1.1701,
"step": 1270
},
{
"epoch": 0.21,
"learning_rate": 6.370000000000001e-06,
"loss": 1.1649,
"step": 1280
},
{
"epoch": 0.21,
"learning_rate": 6.42e-06,
"loss": 1.1477,
"step": 1290
},
{
"epoch": 0.21,
"learning_rate": 6.470000000000001e-06,
"loss": 1.1498,
"step": 1300
},
{
"epoch": 0.22,
"learning_rate": 6.520000000000001e-06,
"loss": 1.1881,
"step": 1310
},
{
"epoch": 0.22,
"learning_rate": 6.570000000000001e-06,
"loss": 1.1414,
"step": 1320
},
{
"epoch": 0.22,
"learning_rate": 6.620000000000001e-06,
"loss": 1.1663,
"step": 1330
},
{
"epoch": 0.22,
"learning_rate": 6.6700000000000005e-06,
"loss": 1.1555,
"step": 1340
},
{
"epoch": 0.22,
"learning_rate": 6.720000000000001e-06,
"loss": 1.1652,
"step": 1350
},
{
"epoch": 0.22,
"learning_rate": 6.770000000000001e-06,
"loss": 1.1539,
"step": 1360
},
{
"epoch": 0.23,
"learning_rate": 6.820000000000001e-06,
"loss": 1.1633,
"step": 1370
},
{
"epoch": 0.23,
"learning_rate": 6.870000000000001e-06,
"loss": 1.1583,
"step": 1380
},
{
"epoch": 0.23,
"learning_rate": 6.92e-06,
"loss": 1.1404,
"step": 1390
},
{
"epoch": 0.23,
"learning_rate": 6.97e-06,
"loss": 1.1436,
"step": 1400
},
{
"epoch": 0.23,
"learning_rate": 7.0200000000000006e-06,
"loss": 1.1856,
"step": 1410
},
{
"epoch": 0.23,
"learning_rate": 7.07e-06,
"loss": 1.1587,
"step": 1420
},
{
"epoch": 0.23,
"learning_rate": 7.1200000000000004e-06,
"loss": 1.1296,
"step": 1430
},
{
"epoch": 0.24,
"learning_rate": 7.17e-06,
"loss": 1.1171,
"step": 1440
},
{
"epoch": 0.24,
"learning_rate": 7.22e-06,
"loss": 1.1459,
"step": 1450
},
{
"epoch": 0.24,
"learning_rate": 7.270000000000001e-06,
"loss": 1.1621,
"step": 1460
},
{
"epoch": 0.24,
"learning_rate": 7.32e-06,
"loss": 1.1345,
"step": 1470
},
{
"epoch": 0.24,
"learning_rate": 7.370000000000001e-06,
"loss": 1.1711,
"step": 1480
},
{
"epoch": 0.24,
"learning_rate": 7.420000000000001e-06,
"loss": 1.1852,
"step": 1490
},
{
"epoch": 0.25,
"learning_rate": 7.4700000000000005e-06,
"loss": 1.1361,
"step": 1500
},
{
"epoch": 0.25,
"eval_multi_news_accuracy": 0.5626650769023163,
"eval_multi_news_loss": 1.9013671875,
"eval_multi_news_runtime": 374.2125,
"eval_multi_news_samples_per_second": 15.024,
"eval_multi_news_steps_per_second": 1.879,
"step": 1500
},
{
"epoch": 0.25,
"eval_samsum_accuracy": 0.641110533036939,
"eval_samsum_loss": 1.267578125,
"eval_samsum_runtime": 37.1994,
"eval_samsum_samples_per_second": 21.99,
"eval_samsum_steps_per_second": 2.769,
"step": 1500
},
{
"epoch": 0.25,
"eval_billsum_accuracy": 0.648249370750216,
"eval_billsum_loss": 1.453125,
"eval_billsum_runtime": 204.445,
"eval_billsum_samples_per_second": 15.99,
"eval_billsum_steps_per_second": 2.001,
"step": 1500
},
{
"epoch": 0.25,
"eval_wmt2019_zh-en_accuracy": 0.5873898487705391,
"eval_wmt2019_zh-en_loss": 1.892578125,
"eval_wmt2019_zh-en_runtime": 43.8258,
"eval_wmt2019_zh-en_samples_per_second": 22.818,
"eval_wmt2019_zh-en_steps_per_second": 2.852,
"step": 1500
},
{
"epoch": 0.25,
"eval_ted_trans_en-ja_accuracy": 0.5575474107655961,
"eval_ted_trans_en-ja_loss": 1.8818359375,
"eval_ted_trans_en-ja_runtime": 35.7188,
"eval_ted_trans_en-ja_samples_per_second": 22.425,
"eval_ted_trans_en-ja_steps_per_second": 2.828,
"step": 1500
},
{
"epoch": 0.25,
"eval_ted_trans_zh-ja_accuracy": 0.45999153259949194,
"eval_ted_trans_zh-ja_loss": 2.556640625,
"eval_ted_trans_zh-ja_runtime": 2.58,
"eval_ted_trans_zh-ja_samples_per_second": 16.279,
"eval_ted_trans_zh-ja_steps_per_second": 2.326,
"step": 1500
},
{
"epoch": 0.25,
"eval_sharegpt_accuracy": 0.7297610954662402,
"eval_sharegpt_loss": 1.0302734375,
"eval_sharegpt_runtime": 732.545,
"eval_sharegpt_samples_per_second": 4.57,
"eval_sharegpt_steps_per_second": 0.572,
"step": 1500
},
{
"epoch": 0.25,
"eval_dolly15k_accuracy": 0.5962685032894737,
"eval_dolly15k_loss": 1.646484375,
"eval_dolly15k_runtime": 33.5813,
"eval_dolly15k_samples_per_second": 22.364,
"eval_dolly15k_steps_per_second": 2.799,
"step": 1500
},
{
"epoch": 0.25,
"eval_ikala_accuracy": 0.7406414384414164,
"eval_ikala_loss": 0.96875,
"eval_ikala_runtime": 885.454,
"eval_ikala_samples_per_second": 16.044,
"eval_ikala_steps_per_second": 2.006,
"step": 1500
},
{
"epoch": 0.25,
"eval_oasst_export_accuracy": 0.6599712470813749,
"eval_oasst_export_loss": 1.578125,
"eval_oasst_export_runtime": 133.2511,
"eval_oasst_export_samples_per_second": 15.752,
"eval_oasst_export_steps_per_second": 1.974,
"step": 1500
},
{
"epoch": 0.25,
"eval_joke_accuracy": 0.49838893100833964,
"eval_joke_loss": 2.1953125,
"eval_joke_runtime": 4.5928,
"eval_joke_samples_per_second": 16.548,
"eval_joke_steps_per_second": 2.177,
"step": 1500
},
{
"epoch": 0.25,
"eval_gsm8k_accuracy": 0.7668082149474984,
"eval_gsm8k_loss": 0.85791015625,
"eval_gsm8k_runtime": 57.7515,
"eval_gsm8k_samples_per_second": 22.839,
"eval_gsm8k_steps_per_second": 2.857,
"step": 1500
},
{
"epoch": 0.25,
"eval_webgpt_accuracy": 0.4995741373619939,
"eval_webgpt_loss": 2.181640625,
"eval_webgpt_runtime": 154.199,
"eval_webgpt_samples_per_second": 23.107,
"eval_webgpt_steps_per_second": 2.892,
"step": 1500
},
{
"epoch": 0.25,
"learning_rate": 7.520000000000001e-06,
"loss": 1.1574,
"step": 1510
},
{
"epoch": 0.25,
"learning_rate": 7.57e-06,
"loss": 1.1593,
"step": 1520
},
{
"epoch": 0.25,
"learning_rate": 7.620000000000001e-06,
"loss": 1.1255,
"step": 1530
},
{
"epoch": 0.25,
"learning_rate": 7.670000000000001e-06,
"loss": 1.1665,
"step": 1540
},
{
"epoch": 0.25,
"learning_rate": 7.72e-06,
"loss": 1.1459,
"step": 1550
},
{
"epoch": 0.26,
"learning_rate": 7.77e-06,
"loss": 1.1187,
"step": 1560
},
{
"epoch": 0.26,
"learning_rate": 7.820000000000001e-06,
"loss": 1.1469,
"step": 1570
},
{
"epoch": 0.26,
"learning_rate": 7.870000000000001e-06,
"loss": 1.1648,
"step": 1580
},
{
"epoch": 0.26,
"learning_rate": 7.92e-06,
"loss": 1.1314,
"step": 1590
},
{
"epoch": 0.26,
"learning_rate": 7.970000000000002e-06,
"loss": 1.1213,
"step": 1600
},
{
"epoch": 0.26,
"learning_rate": 8.020000000000001e-06,
"loss": 1.1424,
"step": 1610
},
{
"epoch": 0.27,
"learning_rate": 8.07e-06,
"loss": 1.1637,
"step": 1620
},
{
"epoch": 0.27,
"learning_rate": 8.120000000000002e-06,
"loss": 1.1403,
"step": 1630
},
{
"epoch": 0.27,
"learning_rate": 8.17e-06,
"loss": 1.1299,
"step": 1640
},
{
"epoch": 0.27,
"learning_rate": 8.220000000000001e-06,
"loss": 1.1361,
"step": 1650
},
{
"epoch": 0.27,
"learning_rate": 8.27e-06,
"loss": 1.1484,
"step": 1660
},
{
"epoch": 0.27,
"learning_rate": 8.32e-06,
"loss": 1.1292,
"step": 1670
},
{
"epoch": 0.28,
"learning_rate": 8.370000000000001e-06,
"loss": 1.1395,
"step": 1680
},
{
"epoch": 0.28,
"learning_rate": 8.42e-06,
"loss": 1.1299,
"step": 1690
},
{
"epoch": 0.28,
"learning_rate": 8.47e-06,
"loss": 1.145,
"step": 1700
},
{
"epoch": 0.28,
"learning_rate": 8.52e-06,
"loss": 1.1351,
"step": 1710
},
{
"epoch": 0.28,
"learning_rate": 8.570000000000001e-06,
"loss": 1.1579,
"step": 1720
},
{
"epoch": 0.28,
"learning_rate": 8.62e-06,
"loss": 1.1483,
"step": 1730
},
{
"epoch": 0.29,
"learning_rate": 8.67e-06,
"loss": 1.1278,
"step": 1740
},
{
"epoch": 0.29,
"learning_rate": 8.720000000000001e-06,
"loss": 1.1375,
"step": 1750
},
{
"epoch": 0.29,
"learning_rate": 8.77e-06,
"loss": 1.1526,
"step": 1760
},
{
"epoch": 0.29,
"learning_rate": 8.82e-06,
"loss": 1.1535,
"step": 1770
},
{
"epoch": 0.29,
"learning_rate": 8.870000000000001e-06,
"loss": 1.1377,
"step": 1780
},
{
"epoch": 0.29,
"learning_rate": 8.920000000000001e-06,
"loss": 1.1578,
"step": 1790
},
{
"epoch": 0.3,
"learning_rate": 8.97e-06,
"loss": 1.1598,
"step": 1800
},
{
"epoch": 0.3,
"learning_rate": 9.020000000000002e-06,
"loss": 1.1601,
"step": 1810
},
{
"epoch": 0.3,
"learning_rate": 9.070000000000001e-06,
"loss": 1.1292,
"step": 1820
},
{
"epoch": 0.3,
"learning_rate": 9.12e-06,
"loss": 1.111,
"step": 1830
},
{
"epoch": 0.3,
"learning_rate": 9.17e-06,
"loss": 1.12,
"step": 1840
},
{
"epoch": 0.3,
"learning_rate": 9.220000000000002e-06,
"loss": 1.1,
"step": 1850
},
{
"epoch": 0.31,
"learning_rate": 9.270000000000001e-06,
"loss": 1.099,
"step": 1860
},
{
"epoch": 0.31,
"learning_rate": 9.32e-06,
"loss": 1.1333,
"step": 1870
},
{
"epoch": 0.31,
"learning_rate": 9.370000000000002e-06,
"loss": 1.1386,
"step": 1880
},
{
"epoch": 0.31,
"learning_rate": 9.42e-06,
"loss": 1.1389,
"step": 1890
},
{
"epoch": 0.31,
"learning_rate": 9.47e-06,
"loss": 1.1294,
"step": 1900
},
{
"epoch": 0.31,
"learning_rate": 9.52e-06,
"loss": 1.1326,
"step": 1910
},
{
"epoch": 0.32,
"learning_rate": 9.57e-06,
"loss": 1.129,
"step": 1920
},
{
"epoch": 0.32,
"learning_rate": 9.620000000000001e-06,
"loss": 1.1224,
"step": 1930
},
{
"epoch": 0.32,
"learning_rate": 9.67e-06,
"loss": 1.1168,
"step": 1940
},
{
"epoch": 0.32,
"learning_rate": 9.72e-06,
"loss": 1.1223,
"step": 1950
},
{
"epoch": 0.32,
"learning_rate": 9.770000000000001e-06,
"loss": 1.1064,
"step": 1960
},
{
"epoch": 0.32,
"learning_rate": 9.820000000000001e-06,
"loss": 1.1303,
"step": 1970
},
{
"epoch": 0.33,
"learning_rate": 9.87e-06,
"loss": 1.1134,
"step": 1980
},
{
"epoch": 0.33,
"learning_rate": 9.920000000000002e-06,
"loss": 1.1396,
"step": 1990
},
{
"epoch": 0.33,
"learning_rate": 9.970000000000001e-06,
"loss": 1.1418,
"step": 2000
},
{
"epoch": 0.33,
"eval_multi_news_accuracy": 0.5614524803428215,
"eval_multi_news_loss": 1.9052734375,
"eval_multi_news_runtime": 373.3978,
"eval_multi_news_samples_per_second": 15.056,
"eval_multi_news_steps_per_second": 1.883,
"step": 2000
},
{
"epoch": 0.33,
"eval_samsum_accuracy": 0.6388875750839521,
"eval_samsum_loss": 1.265625,
"eval_samsum_runtime": 37.3723,
"eval_samsum_samples_per_second": 21.888,
"eval_samsum_steps_per_second": 2.756,
"step": 2000
},
{
"epoch": 0.33,
"eval_billsum_accuracy": 0.6493294263495999,
"eval_billsum_loss": 1.4462890625,
"eval_billsum_runtime": 203.77,
"eval_billsum_samples_per_second": 16.043,
"eval_billsum_steps_per_second": 2.007,
"step": 2000
},
{
"epoch": 0.33,
"eval_wmt2019_zh-en_accuracy": 0.5823181343543334,
"eval_wmt2019_zh-en_loss": 1.9228515625,
"eval_wmt2019_zh-en_runtime": 43.5037,
"eval_wmt2019_zh-en_samples_per_second": 22.987,
"eval_wmt2019_zh-en_steps_per_second": 2.873,
"step": 2000
},
{
"epoch": 0.33,
"eval_ted_trans_en-ja_accuracy": 0.5623202978930665,
"eval_ted_trans_en-ja_loss": 1.869140625,
"eval_ted_trans_en-ja_runtime": 35.4889,
"eval_ted_trans_en-ja_samples_per_second": 22.57,
"eval_ted_trans_en-ja_steps_per_second": 2.846,
"step": 2000
},
{
"epoch": 0.33,
"eval_ted_trans_zh-ja_accuracy": 0.46688327918020495,
"eval_ted_trans_zh-ja_loss": 2.46875,
"eval_ted_trans_zh-ja_runtime": 2.6642,
"eval_ted_trans_zh-ja_samples_per_second": 15.765,
"eval_ted_trans_zh-ja_steps_per_second": 2.252,
"step": 2000
},
{
"epoch": 0.33,
"eval_sharegpt_accuracy": 0.7361997474453208,
"eval_sharegpt_loss": 1.001953125,
"eval_sharegpt_runtime": 732.4255,
"eval_sharegpt_samples_per_second": 4.571,
"eval_sharegpt_steps_per_second": 0.572,
"step": 2000
},
{
"epoch": 0.33,
"eval_dolly15k_accuracy": 0.5939898574561403,
"eval_dolly15k_loss": 1.65625,
"eval_dolly15k_runtime": 33.8567,
"eval_dolly15k_samples_per_second": 22.182,
"eval_dolly15k_steps_per_second": 2.776,
"step": 2000
},
{
"epoch": 0.33,
"eval_ikala_accuracy": 0.7422763555784087,
"eval_ikala_loss": 0.9580078125,
"eval_ikala_runtime": 885.2845,
"eval_ikala_samples_per_second": 16.047,
"eval_ikala_steps_per_second": 2.006,
"step": 2000
},
{
"epoch": 0.33,
"eval_oasst_export_accuracy": 0.6593580262738169,
"eval_oasst_export_loss": 1.578125,
"eval_oasst_export_runtime": 132.7253,
"eval_oasst_export_samples_per_second": 15.815,
"eval_oasst_export_steps_per_second": 1.982,
"step": 2000
},
{
"epoch": 0.33,
"eval_joke_accuracy": 0.49895754359363154,
"eval_joke_loss": 2.171875,
"eval_joke_runtime": 4.5049,
"eval_joke_samples_per_second": 16.871,
"eval_joke_steps_per_second": 2.22,
"step": 2000
},
{
"epoch": 0.33,
"eval_gsm8k_accuracy": 0.775555898702903,
"eval_gsm8k_loss": 0.8232421875,
"eval_gsm8k_runtime": 56.3886,
"eval_gsm8k_samples_per_second": 23.391,
"eval_gsm8k_steps_per_second": 2.926,
"step": 2000
},
{
"epoch": 0.33,
"eval_webgpt_accuracy": 0.4990524556304364,
"eval_webgpt_loss": 2.185546875,
"eval_webgpt_runtime": 154.0524,
"eval_webgpt_samples_per_second": 23.128,
"eval_webgpt_steps_per_second": 2.895,
"step": 2000
},
{
"epoch": 0.33,
"learning_rate": 9.99914354230901e-06,
"loss": 1.116,
"step": 2010
},
{
"epoch": 0.33,
"learning_rate": 9.997002398081536e-06,
"loss": 1.1243,
"step": 2020
},
{
"epoch": 0.33,
"learning_rate": 9.99486125385406e-06,
"loss": 1.1183,
"step": 2030
},
{
"epoch": 0.34,
"learning_rate": 9.992720109626585e-06,
"loss": 1.1324,
"step": 2040
},
{
"epoch": 0.34,
"learning_rate": 9.99057896539911e-06,
"loss": 1.1033,
"step": 2050
},
{
"epoch": 0.34,
"learning_rate": 9.988437821171634e-06,
"loss": 1.0962,
"step": 2060
},
{
"epoch": 0.34,
"learning_rate": 9.98629667694416e-06,
"loss": 1.1253,
"step": 2070
},
{
"epoch": 0.34,
"learning_rate": 9.984155532716685e-06,
"loss": 1.1522,
"step": 2080
},
{
"epoch": 0.34,
"learning_rate": 9.98201438848921e-06,
"loss": 1.142,
"step": 2090
},
{
"epoch": 0.34,
"learning_rate": 9.979873244261734e-06,
"loss": 1.1289,
"step": 2100
},
{
"epoch": 0.35,
"learning_rate": 9.97773210003426e-06,
"loss": 1.1367,
"step": 2110
},
{
"epoch": 0.35,
"learning_rate": 9.975590955806785e-06,
"loss": 1.1303,
"step": 2120
},
{
"epoch": 0.35,
"learning_rate": 9.973449811579308e-06,
"loss": 1.1041,
"step": 2130
},
{
"epoch": 0.35,
"learning_rate": 9.971308667351834e-06,
"loss": 1.1325,
"step": 2140
},
{
"epoch": 0.35,
"learning_rate": 9.969167523124359e-06,
"loss": 1.1371,
"step": 2150
},
{
"epoch": 0.35,
"learning_rate": 9.967026378896883e-06,
"loss": 1.112,
"step": 2160
},
{
"epoch": 0.36,
"learning_rate": 9.964885234669408e-06,
"loss": 1.1172,
"step": 2170
},
{
"epoch": 0.36,
"learning_rate": 9.962744090441932e-06,
"loss": 1.0959,
"step": 2180
},
{
"epoch": 0.36,
"learning_rate": 9.960602946214459e-06,
"loss": 1.1322,
"step": 2190
},
{
"epoch": 0.36,
"learning_rate": 9.958461801986983e-06,
"loss": 1.1098,
"step": 2200
},
{
"epoch": 0.36,
"learning_rate": 9.956320657759508e-06,
"loss": 1.1185,
"step": 2210
},
{
"epoch": 0.36,
"learning_rate": 9.954179513532032e-06,
"loss": 1.1027,
"step": 2220
},
{
"epoch": 0.37,
"learning_rate": 9.952038369304557e-06,
"loss": 1.1217,
"step": 2230
},
{
"epoch": 0.37,
"learning_rate": 9.949897225077082e-06,
"loss": 1.115,
"step": 2240
},
{
"epoch": 0.37,
"learning_rate": 9.947756080849606e-06,
"loss": 1.1197,
"step": 2250
},
{
"epoch": 0.37,
"learning_rate": 9.945614936622131e-06,
"loss": 1.0926,
"step": 2260
},
{
"epoch": 0.37,
"learning_rate": 9.943473792394657e-06,
"loss": 1.1085,
"step": 2270
},
{
"epoch": 0.37,
"learning_rate": 9.94133264816718e-06,
"loss": 1.139,
"step": 2280
},
{
"epoch": 0.38,
"learning_rate": 9.939191503939706e-06,
"loss": 1.1131,
"step": 2290
},
{
"epoch": 0.38,
"learning_rate": 9.937050359712231e-06,
"loss": 1.1281,
"step": 2300
},
{
"epoch": 0.38,
"learning_rate": 9.934909215484757e-06,
"loss": 1.0962,
"step": 2310
},
{
"epoch": 0.38,
"learning_rate": 9.93276807125728e-06,
"loss": 1.107,
"step": 2320
},
{
"epoch": 0.38,
"learning_rate": 9.930626927029806e-06,
"loss": 1.1082,
"step": 2330
},
{
"epoch": 0.38,
"learning_rate": 9.928485782802331e-06,
"loss": 1.1323,
"step": 2340
},
{
"epoch": 0.39,
"learning_rate": 9.926344638574855e-06,
"loss": 1.0984,
"step": 2350
},
{
"epoch": 0.39,
"learning_rate": 9.92420349434738e-06,
"loss": 1.118,
"step": 2360
},
{
"epoch": 0.39,
"learning_rate": 9.922062350119905e-06,
"loss": 1.1003,
"step": 2370
},
{
"epoch": 0.39,
"learning_rate": 9.919921205892429e-06,
"loss": 1.115,
"step": 2380
},
{
"epoch": 0.39,
"learning_rate": 9.917780061664954e-06,
"loss": 1.0974,
"step": 2390
},
{
"epoch": 0.39,
"learning_rate": 9.915638917437478e-06,
"loss": 1.107,
"step": 2400
},
{
"epoch": 0.4,
"learning_rate": 9.913497773210005e-06,
"loss": 1.1101,
"step": 2410
},
{
"epoch": 0.4,
"learning_rate": 9.911356628982529e-06,
"loss": 1.1115,
"step": 2420
},
{
"epoch": 0.4,
"learning_rate": 9.909215484755054e-06,
"loss": 1.0951,
"step": 2430
},
{
"epoch": 0.4,
"learning_rate": 9.907074340527578e-06,
"loss": 1.0938,
"step": 2440
},
{
"epoch": 0.4,
"learning_rate": 9.904933196300103e-06,
"loss": 1.0772,
"step": 2450
},
{
"epoch": 0.4,
"learning_rate": 9.902792052072629e-06,
"loss": 1.1028,
"step": 2460
},
{
"epoch": 0.41,
"learning_rate": 9.900650907845152e-06,
"loss": 1.0923,
"step": 2470
},
{
"epoch": 0.41,
"learning_rate": 9.898509763617678e-06,
"loss": 1.1238,
"step": 2480
},
{
"epoch": 0.41,
"learning_rate": 9.896368619390203e-06,
"loss": 1.1401,
"step": 2490
},
{
"epoch": 0.41,
"learning_rate": 9.894227475162728e-06,
"loss": 1.1142,
"step": 2500
},
{
"epoch": 0.41,
"eval_multi_news_accuracy": 0.5619751512736382,
"eval_multi_news_loss": 1.904296875,
"eval_multi_news_runtime": 373.6389,
"eval_multi_news_samples_per_second": 15.047,
"eval_multi_news_steps_per_second": 1.881,
"step": 2500
},
{
"epoch": 0.41,
"eval_samsum_accuracy": 0.6433334909899258,
"eval_samsum_loss": 1.2607421875,
"eval_samsum_runtime": 37.306,
"eval_samsum_samples_per_second": 21.927,
"eval_samsum_steps_per_second": 2.761,
"step": 2500
},
{
"epoch": 0.41,
"eval_billsum_accuracy": 0.650991772793869,
"eval_billsum_loss": 1.439453125,
"eval_billsum_runtime": 203.4152,
"eval_billsum_samples_per_second": 16.071,
"eval_billsum_steps_per_second": 2.011,
"step": 2500
},
{
"epoch": 0.41,
"eval_wmt2019_zh-en_accuracy": 0.5893870117057808,
"eval_wmt2019_zh-en_loss": 1.904296875,
"eval_wmt2019_zh-en_runtime": 43.0674,
"eval_wmt2019_zh-en_samples_per_second": 23.219,
"eval_wmt2019_zh-en_steps_per_second": 2.902,
"step": 2500
},
{
"epoch": 0.41,
"eval_ted_trans_en-ja_accuracy": 0.563225558860213,
"eval_ted_trans_en-ja_loss": 1.8388671875,
"eval_ted_trans_en-ja_runtime": 35.7494,
"eval_ted_trans_en-ja_samples_per_second": 22.406,
"eval_ted_trans_en-ja_steps_per_second": 2.825,
"step": 2500
},
{
"epoch": 0.41,
"eval_ted_trans_zh-ja_accuracy": 0.4661596958174905,
"eval_ted_trans_zh-ja_loss": 2.453125,
"eval_ted_trans_zh-ja_runtime": 2.525,
"eval_ted_trans_zh-ja_samples_per_second": 16.634,
"eval_ted_trans_zh-ja_steps_per_second": 2.376,
"step": 2500
},
{
"epoch": 0.41,
"eval_sharegpt_accuracy": 0.7423548548826656,
"eval_sharegpt_loss": 0.97265625,
"eval_sharegpt_runtime": 732.2894,
"eval_sharegpt_samples_per_second": 4.572,
"eval_sharegpt_steps_per_second": 0.572,
"step": 2500
},
{
"epoch": 0.41,
"eval_dolly15k_accuracy": 0.5930646929824561,
"eval_dolly15k_loss": 1.6513671875,
"eval_dolly15k_runtime": 33.3723,
"eval_dolly15k_samples_per_second": 22.504,
"eval_dolly15k_steps_per_second": 2.817,
"step": 2500
},
{
"epoch": 0.41,
"eval_ikala_accuracy": 0.7440914978067725,
"eval_ikala_loss": 0.9453125,
"eval_ikala_runtime": 884.831,
"eval_ikala_samples_per_second": 16.055,
"eval_ikala_steps_per_second": 2.007,
"step": 2500
},
{
"epoch": 0.41,
"eval_oasst_export_accuracy": 0.6600834038561538,
"eval_oasst_export_loss": 1.5791015625,
"eval_oasst_export_runtime": 133.5652,
"eval_oasst_export_samples_per_second": 15.715,
"eval_oasst_export_steps_per_second": 1.969,
"step": 2500
},
{
"epoch": 0.41,
"eval_joke_accuracy": 0.5195223654283548,
"eval_joke_loss": 2.078125,
"eval_joke_runtime": 4.5929,
"eval_joke_samples_per_second": 16.547,
"eval_joke_steps_per_second": 2.177,
"step": 2500
},
{
"epoch": 0.41,
"eval_gsm8k_accuracy": 0.782682211241507,
"eval_gsm8k_loss": 0.796875,
"eval_gsm8k_runtime": 56.5404,
"eval_gsm8k_samples_per_second": 23.328,
"eval_gsm8k_steps_per_second": 2.918,
"step": 2500
},
{
"epoch": 0.41,
"eval_webgpt_accuracy": 0.49846334564786127,
"eval_webgpt_loss": 2.189453125,
"eval_webgpt_runtime": 154.9389,
"eval_webgpt_samples_per_second": 22.996,
"eval_webgpt_steps_per_second": 2.879,
"step": 2500
},
{
"epoch": 0.41,
"learning_rate": 9.892086330935252e-06,
"loss": 1.1335,
"step": 2510
},
{
"epoch": 0.41,
"learning_rate": 9.889945186707778e-06,
"loss": 1.0999,
"step": 2520
},
{
"epoch": 0.42,
"learning_rate": 9.887804042480303e-06,
"loss": 1.1324,
"step": 2530
},
{
"epoch": 0.42,
"learning_rate": 9.885662898252827e-06,
"loss": 1.0832,
"step": 2540
},
{
"epoch": 0.42,
"learning_rate": 9.883521754025352e-06,
"loss": 1.1,
"step": 2550
},
{
"epoch": 0.42,
"learning_rate": 9.881380609797877e-06,
"loss": 1.1153,
"step": 2560
},
{
"epoch": 0.42,
"learning_rate": 9.879239465570401e-06,
"loss": 1.0958,
"step": 2570
},
{
"epoch": 0.42,
"learning_rate": 9.877098321342926e-06,
"loss": 1.1154,
"step": 2580
},
{
"epoch": 0.43,
"learning_rate": 9.874957177115452e-06,
"loss": 1.1139,
"step": 2590
},
{
"epoch": 0.43,
"learning_rate": 9.872816032887977e-06,
"loss": 1.0739,
"step": 2600
},
{
"epoch": 0.43,
"learning_rate": 9.8706748886605e-06,
"loss": 1.1105,
"step": 2610
},
{
"epoch": 0.43,
"learning_rate": 9.868533744433026e-06,
"loss": 1.0969,
"step": 2620
},
{
"epoch": 0.43,
"learning_rate": 9.866392600205552e-06,
"loss": 1.1207,
"step": 2630
},
{
"epoch": 0.43,
"learning_rate": 9.864251455978075e-06,
"loss": 1.1392,
"step": 2640
},
{
"epoch": 0.44,
"learning_rate": 9.8621103117506e-06,
"loss": 1.1161,
"step": 2650
},
{
"epoch": 0.44,
"learning_rate": 9.859969167523126e-06,
"loss": 1.0767,
"step": 2660
},
{
"epoch": 0.44,
"learning_rate": 9.85782802329565e-06,
"loss": 1.1113,
"step": 2670
},
{
"epoch": 0.44,
"learning_rate": 9.855686879068175e-06,
"loss": 1.0801,
"step": 2680
},
{
"epoch": 0.44,
"learning_rate": 9.853545734840699e-06,
"loss": 1.0835,
"step": 2690
},
{
"epoch": 0.44,
"learning_rate": 9.851404590613226e-06,
"loss": 1.0635,
"step": 2700
},
{
"epoch": 0.45,
"learning_rate": 9.84926344638575e-06,
"loss": 1.095,
"step": 2710
},
{
"epoch": 0.45,
"learning_rate": 9.847122302158275e-06,
"loss": 1.0822,
"step": 2720
},
{
"epoch": 0.45,
"learning_rate": 9.844981157930798e-06,
"loss": 1.0983,
"step": 2730
},
{
"epoch": 0.45,
"learning_rate": 9.842840013703324e-06,
"loss": 1.1245,
"step": 2740
},
{
"epoch": 0.45,
"learning_rate": 9.84069886947585e-06,
"loss": 1.0768,
"step": 2750
},
{
"epoch": 0.45,
"learning_rate": 9.838557725248373e-06,
"loss": 1.0958,
"step": 2760
},
{
"epoch": 0.45,
"learning_rate": 9.836416581020898e-06,
"loss": 1.0869,
"step": 2770
},
{
"epoch": 0.46,
"learning_rate": 9.834275436793424e-06,
"loss": 1.126,
"step": 2780
},
{
"epoch": 0.46,
"learning_rate": 9.832134292565947e-06,
"loss": 1.0823,
"step": 2790
},
{
"epoch": 0.46,
"learning_rate": 9.829993148338473e-06,
"loss": 1.1057,
"step": 2800
},
{
"epoch": 0.46,
"learning_rate": 9.827852004110998e-06,
"loss": 1.0717,
"step": 2810
},
{
"epoch": 0.46,
"learning_rate": 9.825710859883523e-06,
"loss": 1.0835,
"step": 2820
},
{
"epoch": 0.46,
"learning_rate": 9.823569715656047e-06,
"loss": 1.1291,
"step": 2830
},
{
"epoch": 0.47,
"learning_rate": 9.821428571428573e-06,
"loss": 1.0856,
"step": 2840
},
{
"epoch": 0.47,
"learning_rate": 9.819287427201098e-06,
"loss": 1.0972,
"step": 2850
},
{
"epoch": 0.47,
"learning_rate": 9.817146282973622e-06,
"loss": 1.0833,
"step": 2860
},
{
"epoch": 0.47,
"learning_rate": 9.815005138746147e-06,
"loss": 1.1124,
"step": 2870
},
{
"epoch": 0.47,
"learning_rate": 9.812863994518672e-06,
"loss": 1.0905,
"step": 2880
},
{
"epoch": 0.47,
"learning_rate": 9.810722850291196e-06,
"loss": 1.0891,
"step": 2890
},
{
"epoch": 0.48,
"learning_rate": 9.808581706063721e-06,
"loss": 1.0931,
"step": 2900
},
{
"epoch": 0.48,
"learning_rate": 9.806440561836245e-06,
"loss": 1.1066,
"step": 2910
},
{
"epoch": 0.48,
"learning_rate": 9.804299417608772e-06,
"loss": 1.0759,
"step": 2920
},
{
"epoch": 0.48,
"learning_rate": 9.802158273381296e-06,
"loss": 1.0996,
"step": 2930
},
{
"epoch": 0.48,
"learning_rate": 9.800017129153821e-06,
"loss": 1.0868,
"step": 2940
},
{
"epoch": 0.48,
"learning_rate": 9.797875984926345e-06,
"loss": 1.0799,
"step": 2950
},
{
"epoch": 0.49,
"learning_rate": 9.79573484069887e-06,
"loss": 1.0989,
"step": 2960
},
{
"epoch": 0.49,
"learning_rate": 9.793593696471396e-06,
"loss": 1.0841,
"step": 2970
},
{
"epoch": 0.49,
"learning_rate": 9.79145255224392e-06,
"loss": 1.0745,
"step": 2980
},
{
"epoch": 0.49,
"learning_rate": 9.789311408016445e-06,
"loss": 1.0742,
"step": 2990
},
{
"epoch": 0.49,
"learning_rate": 9.78717026378897e-06,
"loss": 1.0745,
"step": 3000
},
{
"epoch": 0.49,
"eval_multi_news_accuracy": 0.5619935239487821,
"eval_multi_news_loss": 1.9033203125,
"eval_multi_news_runtime": 373.8934,
"eval_multi_news_samples_per_second": 15.036,
"eval_multi_news_steps_per_second": 1.88,
"step": 3000
},
{
"epoch": 0.49,
"eval_samsum_accuracy": 0.6469753582746063,
"eval_samsum_loss": 1.24609375,
"eval_samsum_runtime": 37.2777,
"eval_samsum_samples_per_second": 21.943,
"eval_samsum_steps_per_second": 2.763,
"step": 3000
},
{
"epoch": 0.49,
"eval_billsum_accuracy": 0.6516022390022165,
"eval_billsum_loss": 1.431640625,
"eval_billsum_runtime": 204.7394,
"eval_billsum_samples_per_second": 15.967,
"eval_billsum_steps_per_second": 1.998,
"step": 3000
},
{
"epoch": 0.49,
"eval_wmt2019_zh-en_accuracy": 0.5844544095665172,
"eval_wmt2019_zh-en_loss": 1.8984375,
"eval_wmt2019_zh-en_runtime": 42.4024,
"eval_wmt2019_zh-en_samples_per_second": 23.584,
"eval_wmt2019_zh-en_steps_per_second": 2.948,
"step": 3000
},
{
"epoch": 0.49,
"eval_ted_trans_en-ja_accuracy": 0.577170182658057,
"eval_ted_trans_en-ja_loss": 1.7958984375,
"eval_ted_trans_en-ja_runtime": 35.5789,
"eval_ted_trans_en-ja_samples_per_second": 22.513,
"eval_ted_trans_en-ja_steps_per_second": 2.839,
"step": 3000
},
{
"epoch": 0.49,
"eval_ted_trans_zh-ja_accuracy": 0.46690610569522834,
"eval_ted_trans_zh-ja_loss": 2.515625,
"eval_ted_trans_zh-ja_runtime": 2.5484,
"eval_ted_trans_zh-ja_samples_per_second": 16.481,
"eval_ted_trans_zh-ja_steps_per_second": 2.354,
"step": 3000
},
{
"epoch": 0.49,
"eval_sharegpt_accuracy": 0.7475412115956699,
"eval_sharegpt_loss": 0.9453125,
"eval_sharegpt_runtime": 731.2857,
"eval_sharegpt_samples_per_second": 4.578,
"eval_sharegpt_steps_per_second": 0.573,
"step": 3000
},
{
"epoch": 0.49,
"eval_dolly15k_accuracy": 0.5928933662280702,
"eval_dolly15k_loss": 1.65625,
"eval_dolly15k_runtime": 34.5989,
"eval_dolly15k_samples_per_second": 21.706,
"eval_dolly15k_steps_per_second": 2.717,
"step": 3000
},
{
"epoch": 0.49,
"eval_ikala_accuracy": 0.7469942144047141,
"eval_ikala_loss": 0.9296875,
"eval_ikala_runtime": 884.7774,
"eval_ikala_samples_per_second": 16.056,
"eval_ikala_steps_per_second": 2.007,
"step": 3000
},
{
"epoch": 0.49,
"eval_oasst_export_accuracy": 0.6607141036415994,
"eval_oasst_export_loss": 1.5732421875,
"eval_oasst_export_runtime": 132.9167,
"eval_oasst_export_samples_per_second": 15.792,
"eval_oasst_export_steps_per_second": 1.979,
"step": 3000
},
{
"epoch": 0.49,
"eval_joke_accuracy": 0.5242608036391205,
"eval_joke_loss": 2.025390625,
"eval_joke_runtime": 4.5573,
"eval_joke_samples_per_second": 16.677,
"eval_joke_steps_per_second": 2.194,
"step": 3000
},
{
"epoch": 0.49,
"eval_gsm8k_accuracy": 0.7849444101297097,
"eval_gsm8k_loss": 0.783203125,
"eval_gsm8k_runtime": 56.5634,
"eval_gsm8k_samples_per_second": 23.319,
"eval_gsm8k_steps_per_second": 2.917,
"step": 3000
},
{
"epoch": 0.49,
"eval_webgpt_accuracy": 0.498837749883775,
"eval_webgpt_loss": 2.19140625,
"eval_webgpt_runtime": 153.8546,
"eval_webgpt_samples_per_second": 23.158,
"eval_webgpt_steps_per_second": 2.899,
"step": 3000
},
{
"epoch": 0.49,
"learning_rate": 9.785029119561494e-06,
"loss": 1.0876,
"step": 3010
},
{
"epoch": 0.5,
"learning_rate": 9.782887975334019e-06,
"loss": 1.0761,
"step": 3020
},
{
"epoch": 0.5,
"learning_rate": 9.780746831106544e-06,
"loss": 1.103,
"step": 3030
},
{
"epoch": 0.5,
"learning_rate": 9.77860568687907e-06,
"loss": 1.0891,
"step": 3040
},
{
"epoch": 0.5,
"learning_rate": 9.776464542651593e-06,
"loss": 1.0852,
"step": 3050
},
{
"epoch": 0.5,
"learning_rate": 9.774323398424119e-06,
"loss": 1.1041,
"step": 3060
},
{
"epoch": 0.5,
"learning_rate": 9.772182254196644e-06,
"loss": 1.0801,
"step": 3070
},
{
"epoch": 0.51,
"learning_rate": 9.770041109969168e-06,
"loss": 1.0851,
"step": 3080
},
{
"epoch": 0.51,
"learning_rate": 9.767899965741693e-06,
"loss": 1.0839,
"step": 3090
},
{
"epoch": 0.51,
"learning_rate": 9.765758821514219e-06,
"loss": 1.0756,
"step": 3100
},
{
"epoch": 0.51,
"learning_rate": 9.763617677286742e-06,
"loss": 1.0604,
"step": 3110
},
{
"epoch": 0.51,
"learning_rate": 9.761476533059268e-06,
"loss": 1.0613,
"step": 3120
},
{
"epoch": 0.51,
"learning_rate": 9.759335388831791e-06,
"loss": 1.0839,
"step": 3130
},
{
"epoch": 0.52,
"learning_rate": 9.757194244604318e-06,
"loss": 1.0873,
"step": 3140
},
{
"epoch": 0.52,
"learning_rate": 9.755053100376842e-06,
"loss": 1.0935,
"step": 3150
},
{
"epoch": 0.52,
"learning_rate": 9.752911956149367e-06,
"loss": 1.0821,
"step": 3160
},
{
"epoch": 0.52,
"learning_rate": 9.750770811921891e-06,
"loss": 1.0679,
"step": 3170
},
{
"epoch": 0.52,
"learning_rate": 9.748629667694417e-06,
"loss": 1.0939,
"step": 3180
},
{
"epoch": 0.52,
"learning_rate": 9.746488523466942e-06,
"loss": 1.0764,
"step": 3190
},
{
"epoch": 0.53,
"learning_rate": 9.744347379239466e-06,
"loss": 1.0772,
"step": 3200
},
{
"epoch": 0.53,
"learning_rate": 9.742206235011991e-06,
"loss": 1.0983,
"step": 3210
},
{
"epoch": 0.53,
"learning_rate": 9.740065090784516e-06,
"loss": 1.0649,
"step": 3220
},
{
"epoch": 0.53,
"learning_rate": 9.73792394655704e-06,
"loss": 1.0829,
"step": 3230
},
{
"epoch": 0.53,
"learning_rate": 9.735782802329565e-06,
"loss": 1.0914,
"step": 3240
},
{
"epoch": 0.53,
"learning_rate": 9.73364165810209e-06,
"loss": 1.0776,
"step": 3250
},
{
"epoch": 0.54,
"learning_rate": 9.731500513874616e-06,
"loss": 1.0698,
"step": 3260
},
{
"epoch": 0.54,
"learning_rate": 9.72935936964714e-06,
"loss": 1.074,
"step": 3270
},
{
"epoch": 0.54,
"learning_rate": 9.727218225419665e-06,
"loss": 1.0951,
"step": 3280
},
{
"epoch": 0.54,
"learning_rate": 9.72507708119219e-06,
"loss": 1.0586,
"step": 3290
},
{
"epoch": 0.54,
"learning_rate": 9.722935936964714e-06,
"loss": 1.066,
"step": 3300
},
{
"epoch": 0.54,
"learning_rate": 9.72079479273724e-06,
"loss": 1.0897,
"step": 3310
},
{
"epoch": 0.55,
"learning_rate": 9.718653648509765e-06,
"loss": 1.079,
"step": 3320
},
{
"epoch": 0.55,
"learning_rate": 9.716512504282289e-06,
"loss": 1.063,
"step": 3330
},
{
"epoch": 0.55,
"learning_rate": 9.714371360054814e-06,
"loss": 1.0688,
"step": 3340
},
{
"epoch": 0.55,
"learning_rate": 9.712230215827338e-06,
"loss": 1.0845,
"step": 3350
},
{
"epoch": 0.55,
"learning_rate": 9.710089071599865e-06,
"loss": 1.0421,
"step": 3360
},
{
"epoch": 0.55,
"learning_rate": 9.707947927372388e-06,
"loss": 1.0735,
"step": 3370
},
{
"epoch": 0.56,
"learning_rate": 9.705806783144914e-06,
"loss": 1.0848,
"step": 3380
},
{
"epoch": 0.56,
"learning_rate": 9.703665638917438e-06,
"loss": 1.0863,
"step": 3390
},
{
"epoch": 0.56,
"learning_rate": 9.701524494689963e-06,
"loss": 1.0372,
"step": 3400
},
{
"epoch": 0.56,
"learning_rate": 9.699383350462488e-06,
"loss": 1.0741,
"step": 3410
},
{
"epoch": 0.56,
"learning_rate": 9.697242206235012e-06,
"loss": 1.0988,
"step": 3420
},
{
"epoch": 0.56,
"learning_rate": 9.695101062007537e-06,
"loss": 1.0808,
"step": 3430
},
{
"epoch": 0.57,
"learning_rate": 9.692959917780063e-06,
"loss": 1.0717,
"step": 3440
},
{
"epoch": 0.57,
"learning_rate": 9.690818773552586e-06,
"loss": 1.0632,
"step": 3450
},
{
"epoch": 0.57,
"learning_rate": 9.688677629325112e-06,
"loss": 1.0539,
"step": 3460
},
{
"epoch": 0.57,
"learning_rate": 9.686536485097637e-06,
"loss": 1.0944,
"step": 3470
},
{
"epoch": 0.57,
"learning_rate": 9.684395340870162e-06,
"loss": 1.0682,
"step": 3480
},
{
"epoch": 0.57,
"learning_rate": 9.682254196642686e-06,
"loss": 1.066,
"step": 3490
},
{
"epoch": 0.57,
"learning_rate": 9.680113052415212e-06,
"loss": 1.0649,
"step": 3500
},
{
"epoch": 0.57,
"eval_multi_news_accuracy": 0.5626207290657621,
"eval_multi_news_loss": 1.90234375,
"eval_multi_news_runtime": 374.9582,
"eval_multi_news_samples_per_second": 14.994,
"eval_multi_news_steps_per_second": 1.875,
"step": 3500
},
{
"epoch": 0.57,
"eval_samsum_accuracy": 0.6482050796954074,
"eval_samsum_loss": 1.244140625,
"eval_samsum_runtime": 36.4554,
"eval_samsum_samples_per_second": 22.438,
"eval_samsum_steps_per_second": 2.825,
"step": 3500
},
{
"epoch": 0.57,
"eval_billsum_accuracy": 0.6542856069509964,
"eval_billsum_loss": 1.427734375,
"eval_billsum_runtime": 204.9118,
"eval_billsum_samples_per_second": 15.953,
"eval_billsum_steps_per_second": 1.996,
"step": 3500
},
{
"epoch": 0.57,
"eval_wmt2019_zh-en_accuracy": 0.5960585499733171,
"eval_wmt2019_zh-en_loss": 1.8671875,
"eval_wmt2019_zh-en_runtime": 42.5542,
"eval_wmt2019_zh-en_samples_per_second": 23.499,
"eval_wmt2019_zh-en_steps_per_second": 2.937,
"step": 3500
},
{
"epoch": 0.57,
"eval_ted_trans_en-ja_accuracy": 0.5799230113905279,
"eval_ted_trans_en-ja_loss": 1.7705078125,
"eval_ted_trans_en-ja_runtime": 35.599,
"eval_ted_trans_en-ja_samples_per_second": 22.501,
"eval_ted_trans_en-ja_steps_per_second": 2.837,
"step": 3500
},
{
"epoch": 0.57,
"eval_ted_trans_zh-ja_accuracy": 0.48124428179322964,
"eval_ted_trans_zh-ja_loss": 2.44140625,
"eval_ted_trans_zh-ja_runtime": 2.5311,
"eval_ted_trans_zh-ja_samples_per_second": 16.594,
"eval_ted_trans_zh-ja_steps_per_second": 2.371,
"step": 3500
},
{
"epoch": 0.57,
"eval_sharegpt_accuracy": 0.7523055854464493,
"eval_sharegpt_loss": 0.92236328125,
"eval_sharegpt_runtime": 732.6588,
"eval_sharegpt_samples_per_second": 4.57,
"eval_sharegpt_steps_per_second": 0.572,
"step": 3500
},
{
"epoch": 0.57,
"eval_dolly15k_accuracy": 0.5933216831140351,
"eval_dolly15k_loss": 1.65625,
"eval_dolly15k_runtime": 33.8299,
"eval_dolly15k_samples_per_second": 22.199,
"eval_dolly15k_steps_per_second": 2.779,
"step": 3500
},
{
"epoch": 0.57,
"eval_ikala_accuracy": 0.7489160065784373,
"eval_ikala_loss": 0.9208984375,
"eval_ikala_runtime": 886.4258,
"eval_ikala_samples_per_second": 16.026,
"eval_ikala_steps_per_second": 2.004,
"step": 3500
},
{
"epoch": 0.57,
"eval_oasst_export_accuracy": 0.6617497330814418,
"eval_oasst_export_loss": 1.572265625,
"eval_oasst_export_runtime": 135.1792,
"eval_oasst_export_samples_per_second": 15.528,
"eval_oasst_export_steps_per_second": 1.946,
"step": 3500
},
{
"epoch": 0.57,
"eval_joke_accuracy": 0.5256823351023503,
"eval_joke_loss": 2.013671875,
"eval_joke_runtime": 3.6235,
"eval_joke_samples_per_second": 20.974,
"eval_joke_steps_per_second": 2.76,
"step": 3500
},
{
"epoch": 0.57,
"eval_gsm8k_accuracy": 0.7892603458925262,
"eval_gsm8k_loss": 0.77001953125,
"eval_gsm8k_runtime": 56.8179,
"eval_gsm8k_samples_per_second": 23.215,
"eval_gsm8k_steps_per_second": 2.904,
"step": 3500
},
{
"epoch": 0.57,
"eval_webgpt_accuracy": 0.49841011281811054,
"eval_webgpt_loss": 2.19140625,
"eval_webgpt_runtime": 157.0655,
"eval_webgpt_samples_per_second": 22.685,
"eval_webgpt_steps_per_second": 2.84,
"step": 3500
},
{
"epoch": 0.58,
"learning_rate": 9.677971908187737e-06,
"loss": 1.0692,
"step": 3510
},
{
"epoch": 0.58,
"learning_rate": 9.67583076396026e-06,
"loss": 1.1059,
"step": 3520
},
{
"epoch": 0.58,
"learning_rate": 9.673689619732786e-06,
"loss": 1.0758,
"step": 3530
},
{
"epoch": 0.58,
"learning_rate": 9.671548475505311e-06,
"loss": 1.0386,
"step": 3540
},
{
"epoch": 0.58,
"learning_rate": 9.669407331277835e-06,
"loss": 1.0865,
"step": 3550
},
{
"epoch": 0.58,
"learning_rate": 9.66726618705036e-06,
"loss": 1.0537,
"step": 3560
},
{
"epoch": 0.59,
"learning_rate": 9.665125042822884e-06,
"loss": 1.0481,
"step": 3570
},
{
"epoch": 0.59,
"learning_rate": 9.662983898595411e-06,
"loss": 1.0811,
"step": 3580
},
{
"epoch": 0.59,
"learning_rate": 9.660842754367935e-06,
"loss": 1.0518,
"step": 3590
},
{
"epoch": 0.59,
"learning_rate": 9.65870161014046e-06,
"loss": 1.0756,
"step": 3600
},
{
"epoch": 0.59,
"learning_rate": 9.656560465912986e-06,
"loss": 1.0594,
"step": 3610
},
{
"epoch": 0.59,
"learning_rate": 9.65441932168551e-06,
"loss": 1.0842,
"step": 3620
},
{
"epoch": 0.6,
"learning_rate": 9.652278177458035e-06,
"loss": 1.0703,
"step": 3630
},
{
"epoch": 0.6,
"learning_rate": 9.650137033230558e-06,
"loss": 1.0649,
"step": 3640
},
{
"epoch": 0.6,
"learning_rate": 9.647995889003084e-06,
"loss": 1.0869,
"step": 3650
},
{
"epoch": 0.6,
"learning_rate": 9.645854744775609e-06,
"loss": 1.0494,
"step": 3660
},
{
"epoch": 0.6,
"learning_rate": 9.643713600548134e-06,
"loss": 1.0575,
"step": 3670
},
{
"epoch": 0.6,
"learning_rate": 9.641572456320658e-06,
"loss": 1.0846,
"step": 3680
},
{
"epoch": 0.61,
"learning_rate": 9.639431312093183e-06,
"loss": 1.0815,
"step": 3690
},
{
"epoch": 0.61,
"learning_rate": 9.637290167865709e-06,
"loss": 1.0593,
"step": 3700
},
{
"epoch": 0.61,
"learning_rate": 9.635149023638232e-06,
"loss": 1.0936,
"step": 3710
},
{
"epoch": 0.61,
"learning_rate": 9.633007879410758e-06,
"loss": 1.0249,
"step": 3720
},
{
"epoch": 0.61,
"learning_rate": 9.630866735183283e-06,
"loss": 1.0382,
"step": 3730
},
{
"epoch": 0.61,
"learning_rate": 9.628725590955807e-06,
"loss": 1.0528,
"step": 3740
},
{
"epoch": 0.62,
"learning_rate": 9.626584446728332e-06,
"loss": 1.0469,
"step": 3750
},
{
"epoch": 0.62,
"learning_rate": 9.624443302500858e-06,
"loss": 1.053,
"step": 3760
},
{
"epoch": 0.62,
"learning_rate": 9.622302158273383e-06,
"loss": 1.0301,
"step": 3770
},
{
"epoch": 0.62,
"learning_rate": 9.620161014045907e-06,
"loss": 1.0913,
"step": 3780
},
{
"epoch": 0.62,
"learning_rate": 9.618019869818432e-06,
"loss": 1.0633,
"step": 3790
},
{
"epoch": 0.62,
"learning_rate": 9.615878725590957e-06,
"loss": 1.0743,
"step": 3800
},
{
"epoch": 0.63,
"learning_rate": 9.613737581363481e-06,
"loss": 1.0486,
"step": 3810
},
{
"epoch": 0.63,
"learning_rate": 9.611596437136006e-06,
"loss": 1.0491,
"step": 3820
},
{
"epoch": 0.63,
"learning_rate": 9.609455292908532e-06,
"loss": 1.0736,
"step": 3830
},
{
"epoch": 0.63,
"learning_rate": 9.607314148681056e-06,
"loss": 1.0729,
"step": 3840
},
{
"epoch": 0.63,
"learning_rate": 9.605173004453581e-06,
"loss": 1.0625,
"step": 3850
},
{
"epoch": 0.63,
"learning_rate": 9.603031860226105e-06,
"loss": 1.0726,
"step": 3860
},
{
"epoch": 0.64,
"learning_rate": 9.600890715998632e-06,
"loss": 1.0666,
"step": 3870
},
{
"epoch": 0.64,
"learning_rate": 9.598749571771155e-06,
"loss": 1.0773,
"step": 3880
},
{
"epoch": 0.64,
"learning_rate": 9.59660842754368e-06,
"loss": 1.065,
"step": 3890
},
{
"epoch": 0.64,
"learning_rate": 9.594467283316204e-06,
"loss": 1.0404,
"step": 3900
},
{
"epoch": 0.64,
"learning_rate": 9.59232613908873e-06,
"loss": 1.0717,
"step": 3910
},
{
"epoch": 0.64,
"learning_rate": 9.590184994861255e-06,
"loss": 1.0667,
"step": 3920
},
{
"epoch": 0.65,
"learning_rate": 9.588043850633779e-06,
"loss": 1.0603,
"step": 3930
},
{
"epoch": 0.65,
"learning_rate": 9.585902706406304e-06,
"loss": 1.0452,
"step": 3940
},
{
"epoch": 0.65,
"learning_rate": 9.58376156217883e-06,
"loss": 1.0681,
"step": 3950
},
{
"epoch": 0.65,
"learning_rate": 9.581620417951353e-06,
"loss": 1.075,
"step": 3960
},
{
"epoch": 0.65,
"learning_rate": 9.579479273723879e-06,
"loss": 1.0735,
"step": 3970
},
{
"epoch": 0.65,
"learning_rate": 9.577338129496404e-06,
"loss": 1.0859,
"step": 3980
},
{
"epoch": 0.66,
"learning_rate": 9.57519698526893e-06,
"loss": 1.0498,
"step": 3990
},
{
"epoch": 0.66,
"learning_rate": 9.573055841041453e-06,
"loss": 1.0353,
"step": 4000
},
{
"epoch": 0.66,
"eval_multi_news_accuracy": 0.5627512384133357,
"eval_multi_news_loss": 1.9013671875,
"eval_multi_news_runtime": 374.2642,
"eval_multi_news_samples_per_second": 15.021,
"eval_multi_news_steps_per_second": 1.878,
"step": 4000
},
{
"epoch": 0.66,
"eval_samsum_accuracy": 0.6499550678711631,
"eval_samsum_loss": 1.228515625,
"eval_samsum_runtime": 37.472,
"eval_samsum_samples_per_second": 21.83,
"eval_samsum_steps_per_second": 2.749,
"step": 4000
},
{
"epoch": 0.66,
"eval_billsum_accuracy": 0.6559680786548813,
"eval_billsum_loss": 1.4189453125,
"eval_billsum_runtime": 204.7196,
"eval_billsum_samples_per_second": 15.968,
"eval_billsum_steps_per_second": 1.998,
"step": 4000
},
{
"epoch": 0.66,
"eval_wmt2019_zh-en_accuracy": 0.605999539382773,
"eval_wmt2019_zh-en_loss": 1.8330078125,
"eval_wmt2019_zh-en_runtime": 43.2969,
"eval_wmt2019_zh-en_samples_per_second": 23.096,
"eval_wmt2019_zh-en_steps_per_second": 2.887,
"step": 4000
},
{
"epoch": 0.66,
"eval_ted_trans_en-ja_accuracy": 0.591362074351765,
"eval_ted_trans_en-ja_loss": 1.7236328125,
"eval_ted_trans_en-ja_runtime": 36.1634,
"eval_ted_trans_en-ja_samples_per_second": 22.149,
"eval_ted_trans_en-ja_steps_per_second": 2.793,
"step": 4000
},
{
"epoch": 0.66,
"eval_ted_trans_zh-ja_accuracy": 0.4934623430962343,
"eval_ted_trans_zh-ja_loss": 2.33984375,
"eval_ted_trans_zh-ja_runtime": 2.8371,
"eval_ted_trans_zh-ja_samples_per_second": 14.804,
"eval_ted_trans_zh-ja_steps_per_second": 2.115,
"step": 4000
},
{
"epoch": 0.66,
"eval_sharegpt_accuracy": 0.7566927466258041,
"eval_sharegpt_loss": 0.90234375,
"eval_sharegpt_runtime": 732.9729,
"eval_sharegpt_samples_per_second": 4.568,
"eval_sharegpt_steps_per_second": 0.572,
"step": 4000
},
{
"epoch": 0.66,
"eval_dolly15k_accuracy": 0.5929618969298246,
"eval_dolly15k_loss": 1.65625,
"eval_dolly15k_runtime": 33.6288,
"eval_dolly15k_samples_per_second": 22.332,
"eval_dolly15k_steps_per_second": 2.795,
"step": 4000
},
{
"epoch": 0.66,
"eval_ikala_accuracy": 0.7508814779446873,
"eval_ikala_loss": 0.91015625,
"eval_ikala_runtime": 887.5958,
"eval_ikala_samples_per_second": 16.005,
"eval_ikala_steps_per_second": 2.001,
"step": 4000
},
{
"epoch": 0.66,
"eval_oasst_export_accuracy": 0.6615836827915093,
"eval_oasst_export_loss": 1.57421875,
"eval_oasst_export_runtime": 134.7449,
"eval_oasst_export_samples_per_second": 15.578,
"eval_oasst_export_steps_per_second": 1.952,
"step": 4000
},
{
"epoch": 0.66,
"eval_joke_accuracy": 0.535538286580743,
"eval_joke_loss": 1.9736328125,
"eval_joke_runtime": 3.6334,
"eval_joke_samples_per_second": 20.917,
"eval_joke_steps_per_second": 2.752,
"step": 4000
},
{
"epoch": 0.66,
"eval_gsm8k_accuracy": 0.7943020382952439,
"eval_gsm8k_loss": 0.74560546875,
"eval_gsm8k_runtime": 57.4917,
"eval_gsm8k_samples_per_second": 22.942,
"eval_gsm8k_steps_per_second": 2.87,
"step": 4000
},
{
"epoch": 0.66,
"eval_webgpt_accuracy": 0.49873483307959016,
"eval_webgpt_loss": 2.19140625,
"eval_webgpt_runtime": 155.9655,
"eval_webgpt_samples_per_second": 22.845,
"eval_webgpt_steps_per_second": 2.86,
"step": 4000
},
{
"epoch": 0.66,
"learning_rate": 9.570914696813978e-06,
"loss": 1.0657,
"step": 4010
},
{
"epoch": 0.66,
"learning_rate": 9.568773552586504e-06,
"loss": 1.0743,
"step": 4020
},
{
"epoch": 0.66,
"learning_rate": 9.566632408359027e-06,
"loss": 1.0543,
"step": 4030
},
{
"epoch": 0.66,
"learning_rate": 9.564491264131553e-06,
"loss": 1.0457,
"step": 4040
},
{
"epoch": 0.67,
"learning_rate": 9.562350119904078e-06,
"loss": 1.0546,
"step": 4050
},
{
"epoch": 0.67,
"learning_rate": 9.560208975676602e-06,
"loss": 1.0485,
"step": 4060
},
{
"epoch": 0.67,
"learning_rate": 9.558067831449127e-06,
"loss": 1.0535,
"step": 4070
},
{
"epoch": 0.67,
"learning_rate": 9.555926687221651e-06,
"loss": 1.0603,
"step": 4080
},
{
"epoch": 0.67,
"learning_rate": 9.553785542994178e-06,
"loss": 1.0444,
"step": 4090
},
{
"epoch": 0.67,
"learning_rate": 9.551644398766702e-06,
"loss": 1.0482,
"step": 4100
},
{
"epoch": 0.68,
"learning_rate": 9.549503254539227e-06,
"loss": 1.0509,
"step": 4110
},
{
"epoch": 0.68,
"learning_rate": 9.54736211031175e-06,
"loss": 1.036,
"step": 4120
},
{
"epoch": 0.68,
"learning_rate": 9.545220966084276e-06,
"loss": 1.0457,
"step": 4130
},
{
"epoch": 0.68,
"learning_rate": 9.543079821856801e-06,
"loss": 1.065,
"step": 4140
},
{
"epoch": 0.68,
"learning_rate": 9.540938677629325e-06,
"loss": 1.0441,
"step": 4150
},
{
"epoch": 0.68,
"learning_rate": 9.53879753340185e-06,
"loss": 1.047,
"step": 4160
},
{
"epoch": 0.68,
"learning_rate": 9.536656389174376e-06,
"loss": 1.05,
"step": 4170
},
{
"epoch": 0.69,
"learning_rate": 9.5345152449469e-06,
"loss": 1.0615,
"step": 4180
},
{
"epoch": 0.69,
"learning_rate": 9.532374100719425e-06,
"loss": 1.0575,
"step": 4190
},
{
"epoch": 0.69,
"learning_rate": 9.53023295649195e-06,
"loss": 1.0614,
"step": 4200
},
{
"epoch": 0.69,
"learning_rate": 9.528091812264476e-06,
"loss": 1.0504,
"step": 4210
},
{
"epoch": 0.69,
"learning_rate": 9.525950668037e-06,
"loss": 1.0401,
"step": 4220
},
{
"epoch": 0.69,
"learning_rate": 9.524023638232272e-06,
"loss": 1.0376,
"step": 4230
},
{
"epoch": 0.7,
"learning_rate": 9.521882494004797e-06,
"loss": 1.0265,
"step": 4240
},
{
"epoch": 0.7,
"learning_rate": 9.519741349777321e-06,
"loss": 1.0636,
"step": 4250
},
{
"epoch": 0.7,
"learning_rate": 9.517600205549846e-06,
"loss": 1.059,
"step": 4260
},
{
"epoch": 0.7,
"learning_rate": 9.515459061322372e-06,
"loss": 1.0552,
"step": 4270
},
{
"epoch": 0.7,
"learning_rate": 9.513317917094897e-06,
"loss": 1.0577,
"step": 4280
},
{
"epoch": 0.7,
"learning_rate": 9.51117677286742e-06,
"loss": 1.034,
"step": 4290
},
{
"epoch": 0.71,
"learning_rate": 9.509035628639946e-06,
"loss": 1.0697,
"step": 4300
},
{
"epoch": 0.71,
"learning_rate": 9.506894484412471e-06,
"loss": 1.0392,
"step": 4310
},
{
"epoch": 0.71,
"learning_rate": 9.504753340184995e-06,
"loss": 1.0069,
"step": 4320
},
{
"epoch": 0.71,
"learning_rate": 9.50261219595752e-06,
"loss": 1.0583,
"step": 4330
},
{
"epoch": 0.71,
"learning_rate": 9.500471051730046e-06,
"loss": 1.0522,
"step": 4340
},
{
"epoch": 0.71,
"learning_rate": 9.49832990750257e-06,
"loss": 1.0315,
"step": 4350
},
{
"epoch": 0.72,
"learning_rate": 9.496188763275095e-06,
"loss": 1.057,
"step": 4360
},
{
"epoch": 0.72,
"learning_rate": 9.494047619047619e-06,
"loss": 1.0513,
"step": 4370
},
{
"epoch": 0.72,
"learning_rate": 9.491906474820146e-06,
"loss": 1.0342,
"step": 4380
},
{
"epoch": 0.72,
"learning_rate": 9.48976533059267e-06,
"loss": 1.0559,
"step": 4390
},
{
"epoch": 0.72,
"learning_rate": 9.487624186365195e-06,
"loss": 1.0377,
"step": 4400
},
{
"epoch": 0.72,
"learning_rate": 9.485483042137718e-06,
"loss": 1.0512,
"step": 4410
},
{
"epoch": 0.73,
"learning_rate": 9.483341897910244e-06,
"loss": 1.0439,
"step": 4420
},
{
"epoch": 0.73,
"learning_rate": 9.48120075368277e-06,
"loss": 1.0344,
"step": 4430
},
{
"epoch": 0.73,
"learning_rate": 9.479059609455293e-06,
"loss": 1.0343,
"step": 4440
},
{
"epoch": 0.73,
"learning_rate": 9.477132579650567e-06,
"loss": 1.0463,
"step": 4450
},
{
"epoch": 0.73,
"learning_rate": 9.47499143542309e-06,
"loss": 1.0443,
"step": 4460
},
{
"epoch": 0.73,
"learning_rate": 9.472850291195616e-06,
"loss": 1.0559,
"step": 4470
},
{
"epoch": 0.74,
"learning_rate": 9.47070914696814e-06,
"loss": 1.0555,
"step": 4480
},
{
"epoch": 0.74,
"learning_rate": 9.468568002740665e-06,
"loss": 1.0267,
"step": 4490
},
{
"epoch": 0.74,
"learning_rate": 9.46642685851319e-06,
"loss": 1.042,
"step": 4500
},
{
"epoch": 0.74,
"eval_multi_news_accuracy": 0.5627252632519255,
"eval_multi_news_loss": 1.9013671875,
"eval_multi_news_runtime": 374.4153,
"eval_multi_news_samples_per_second": 15.015,
"eval_multi_news_steps_per_second": 1.878,
"step": 4500
},
{
"epoch": 0.74,
"eval_samsum_accuracy": 0.6482050796954074,
"eval_samsum_loss": 1.2255859375,
"eval_samsum_runtime": 38.0285,
"eval_samsum_samples_per_second": 21.51,
"eval_samsum_steps_per_second": 2.708,
"step": 4500
},
{
"epoch": 0.74,
"eval_billsum_accuracy": 0.6552127105772998,
"eval_billsum_loss": 1.4169921875,
"eval_billsum_runtime": 204.5119,
"eval_billsum_samples_per_second": 15.984,
"eval_billsum_steps_per_second": 2.0,
"step": 4500
},
{
"epoch": 0.74,
"eval_wmt2019_zh-en_accuracy": 0.5987996483045988,
"eval_wmt2019_zh-en_loss": 1.8505859375,
"eval_wmt2019_zh-en_runtime": 43.4755,
"eval_wmt2019_zh-en_samples_per_second": 23.001,
"eval_wmt2019_zh-en_steps_per_second": 2.875,
"step": 4500
},
{
"epoch": 0.74,
"eval_ted_trans_en-ja_accuracy": 0.5948422811429342,
"eval_ted_trans_en-ja_loss": 1.69140625,
"eval_ted_trans_en-ja_runtime": 36.0142,
"eval_ted_trans_en-ja_samples_per_second": 22.241,
"eval_ted_trans_en-ja_steps_per_second": 2.804,
"step": 4500
},
{
"epoch": 0.74,
"eval_ted_trans_zh-ja_accuracy": 0.521213679609154,
"eval_ted_trans_zh-ja_loss": 2.265625,
"eval_ted_trans_zh-ja_runtime": 2.289,
"eval_ted_trans_zh-ja_samples_per_second": 18.349,
"eval_ted_trans_zh-ja_steps_per_second": 2.621,
"step": 4500
},
{
"epoch": 0.74,
"eval_sharegpt_accuracy": 0.7602375296761273,
"eval_sharegpt_loss": 0.884765625,
"eval_sharegpt_runtime": 733.0075,
"eval_sharegpt_samples_per_second": 4.567,
"eval_sharegpt_steps_per_second": 0.572,
"step": 4500
},
{
"epoch": 0.74,
"eval_dolly15k_accuracy": 0.5921566611842105,
"eval_dolly15k_loss": 1.65234375,
"eval_dolly15k_runtime": 33.7747,
"eval_dolly15k_samples_per_second": 22.236,
"eval_dolly15k_steps_per_second": 2.783,
"step": 4500
},
{
"epoch": 0.74,
"eval_ikala_accuracy": 0.7515879865982705,
"eval_ikala_loss": 0.90576171875,
"eval_ikala_runtime": 884.7883,
"eval_ikala_samples_per_second": 16.056,
"eval_ikala_steps_per_second": 2.007,
"step": 4500
},
{
"epoch": 0.74,
"eval_oasst_export_accuracy": 0.6615137668799588,
"eval_oasst_export_loss": 1.5693359375,
"eval_oasst_export_runtime": 134.5394,
"eval_oasst_export_samples_per_second": 15.601,
"eval_oasst_export_steps_per_second": 1.955,
"step": 4500
},
{
"epoch": 0.74,
"eval_joke_accuracy": 0.5379075056861259,
"eval_joke_loss": 1.966796875,
"eval_joke_runtime": 4.5957,
"eval_joke_samples_per_second": 16.537,
"eval_joke_steps_per_second": 2.176,
"step": 4500
},
{
"epoch": 0.74,
"eval_gsm8k_accuracy": 0.7959928968499074,
"eval_gsm8k_loss": 0.74072265625,
"eval_gsm8k_runtime": 57.0884,
"eval_gsm8k_samples_per_second": 23.105,
"eval_gsm8k_steps_per_second": 2.89,
"step": 4500
},
{
"epoch": 0.74,
"eval_webgpt_accuracy": 0.4989797040964437,
"eval_webgpt_loss": 2.19140625,
"eval_webgpt_runtime": 157.3673,
"eval_webgpt_samples_per_second": 22.641,
"eval_webgpt_steps_per_second": 2.834,
"step": 4500
},
{
"epoch": 0.74,
"learning_rate": 9.464285714285714e-06,
"loss": 1.0577,
"step": 4510
},
{
"epoch": 0.74,
"learning_rate": 9.46214457005824e-06,
"loss": 1.0465,
"step": 4520
},
{
"epoch": 0.74,
"learning_rate": 9.460003425830765e-06,
"loss": 1.0635,
"step": 4530
},
{
"epoch": 0.75,
"learning_rate": 9.457862281603289e-06,
"loss": 1.035,
"step": 4540
},
{
"epoch": 0.75,
"learning_rate": 9.455721137375814e-06,
"loss": 1.0623,
"step": 4550
},
{
"epoch": 0.75,
"learning_rate": 9.45357999314834e-06,
"loss": 1.0279,
"step": 4560
},
{
"epoch": 0.75,
"learning_rate": 9.451438848920865e-06,
"loss": 1.0287,
"step": 4570
},
{
"epoch": 0.75,
"learning_rate": 9.449297704693388e-06,
"loss": 1.0567,
"step": 4580
},
{
"epoch": 0.75,
"learning_rate": 9.447156560465914e-06,
"loss": 1.0246,
"step": 4590
},
{
"epoch": 0.76,
"learning_rate": 9.445015416238439e-06,
"loss": 1.0352,
"step": 4600
},
{
"epoch": 0.76,
"learning_rate": 9.442874272010963e-06,
"loss": 1.0493,
"step": 4610
},
{
"epoch": 0.76,
"learning_rate": 9.440733127783488e-06,
"loss": 1.0435,
"step": 4620
},
{
"epoch": 0.76,
"learning_rate": 9.438591983556014e-06,
"loss": 1.0418,
"step": 4630
},
{
"epoch": 0.76,
"learning_rate": 9.436450839328539e-06,
"loss": 1.0586,
"step": 4640
},
{
"epoch": 0.76,
"learning_rate": 9.434309695101063e-06,
"loss": 1.0041,
"step": 4650
},
{
"epoch": 0.77,
"learning_rate": 9.432168550873588e-06,
"loss": 1.0236,
"step": 4660
},
{
"epoch": 0.77,
"learning_rate": 9.430027406646113e-06,
"loss": 1.0377,
"step": 4670
},
{
"epoch": 0.77,
"learning_rate": 9.427886262418637e-06,
"loss": 1.0385,
"step": 4680
},
{
"epoch": 0.77,
"learning_rate": 9.425745118191162e-06,
"loss": 1.0418,
"step": 4690
},
{
"epoch": 0.77,
"learning_rate": 9.423603973963686e-06,
"loss": 1.0304,
"step": 4700
},
{
"epoch": 0.77,
"learning_rate": 9.421462829736211e-06,
"loss": 1.0376,
"step": 4710
},
{
"epoch": 0.78,
"learning_rate": 9.419321685508737e-06,
"loss": 1.0377,
"step": 4720
},
{
"epoch": 0.78,
"learning_rate": 9.41718054128126e-06,
"loss": 1.0451,
"step": 4730
},
{
"epoch": 0.78,
"learning_rate": 9.415039397053788e-06,
"loss": 1.0359,
"step": 4740
},
{
"epoch": 0.78,
"learning_rate": 9.412898252826311e-06,
"loss": 1.0375,
"step": 4750
},
{
"epoch": 0.78,
"learning_rate": 9.410757108598837e-06,
"loss": 1.0575,
"step": 4760
},
{
"epoch": 0.78,
"learning_rate": 9.40861596437136e-06,
"loss": 1.042,
"step": 4770
},
{
"epoch": 0.79,
"learning_rate": 9.406474820143886e-06,
"loss": 1.0405,
"step": 4780
},
{
"epoch": 0.79,
"learning_rate": 9.404333675916411e-06,
"loss": 1.0538,
"step": 4790
},
{
"epoch": 0.79,
"learning_rate": 9.402192531688935e-06,
"loss": 1.0168,
"step": 4800
},
{
"epoch": 0.79,
"learning_rate": 9.40005138746146e-06,
"loss": 1.0406,
"step": 4810
},
{
"epoch": 0.79,
"learning_rate": 9.397910243233985e-06,
"loss": 1.0419,
"step": 4820
},
{
"epoch": 0.79,
"learning_rate": 9.39576909900651e-06,
"loss": 1.0249,
"step": 4830
},
{
"epoch": 0.79,
"learning_rate": 9.393627954779035e-06,
"loss": 1.0455,
"step": 4840
},
{
"epoch": 0.8,
"learning_rate": 9.39148681055156e-06,
"loss": 1.0314,
"step": 4850
},
{
"epoch": 0.8,
"learning_rate": 9.389345666324085e-06,
"loss": 1.0365,
"step": 4860
},
{
"epoch": 0.8,
"learning_rate": 9.387204522096609e-06,
"loss": 1.0503,
"step": 4870
},
{
"epoch": 0.8,
"learning_rate": 9.385063377869134e-06,
"loss": 1.0134,
"step": 4880
},
{
"epoch": 0.8,
"learning_rate": 9.38292223364166e-06,
"loss": 1.0655,
"step": 4890
},
{
"epoch": 0.8,
"learning_rate": 9.380781089414183e-06,
"loss": 1.0403,
"step": 4900
},
{
"epoch": 0.81,
"learning_rate": 9.378639945186709e-06,
"loss": 1.042,
"step": 4910
},
{
"epoch": 0.81,
"learning_rate": 9.376498800959234e-06,
"loss": 1.0564,
"step": 4920
},
{
"epoch": 0.81,
"learning_rate": 9.374357656731758e-06,
"loss": 1.0469,
"step": 4930
},
{
"epoch": 0.81,
"learning_rate": 9.372216512504283e-06,
"loss": 1.0323,
"step": 4940
},
{
"epoch": 0.81,
"learning_rate": 9.370075368276807e-06,
"loss": 1.0434,
"step": 4950
},
{
"epoch": 0.81,
"learning_rate": 9.367934224049334e-06,
"loss": 1.0474,
"step": 4960
},
{
"epoch": 0.82,
"learning_rate": 9.365793079821858e-06,
"loss": 1.0576,
"step": 4970
},
{
"epoch": 0.82,
"learning_rate": 9.363651935594383e-06,
"loss": 1.0588,
"step": 4980
},
{
"epoch": 0.82,
"learning_rate": 9.361510791366907e-06,
"loss": 1.0369,
"step": 4990
},
{
"epoch": 0.82,
"learning_rate": 9.359369647139432e-06,
"loss": 1.0159,
"step": 5000
},
{
"epoch": 0.82,
"eval_multi_news_accuracy": 0.5636508659548628,
"eval_multi_news_loss": 1.9013671875,
"eval_multi_news_runtime": 374.2666,
"eval_multi_news_samples_per_second": 15.021,
"eval_multi_news_steps_per_second": 1.878,
"step": 5000
},
{
"epoch": 0.82,
"eval_samsum_accuracy": 0.6542117958662441,
"eval_samsum_loss": 1.22265625,
"eval_samsum_runtime": 37.6015,
"eval_samsum_samples_per_second": 21.754,
"eval_samsum_steps_per_second": 2.739,
"step": 5000
},
{
"epoch": 0.82,
"eval_billsum_accuracy": 0.6560928552644996,
"eval_billsum_loss": 1.412109375,
"eval_billsum_runtime": 204.697,
"eval_billsum_samples_per_second": 15.97,
"eval_billsum_steps_per_second": 1.998,
"step": 5000
},
{
"epoch": 0.82,
"eval_wmt2019_zh-en_accuracy": 0.6045209655463313,
"eval_wmt2019_zh-en_loss": 1.826171875,
"eval_wmt2019_zh-en_runtime": 43.7238,
"eval_wmt2019_zh-en_samples_per_second": 22.871,
"eval_wmt2019_zh-en_steps_per_second": 2.859,
"step": 5000
},
{
"epoch": 0.82,
"eval_ted_trans_en-ja_accuracy": 0.5986221480612599,
"eval_ted_trans_en-ja_loss": 1.6884765625,
"eval_ted_trans_en-ja_runtime": 35.5277,
"eval_ted_trans_en-ja_samples_per_second": 22.546,
"eval_ted_trans_en-ja_steps_per_second": 2.843,
"step": 5000
},
{
"epoch": 0.82,
"eval_ted_trans_zh-ja_accuracy": 0.49819293855991104,
"eval_ted_trans_zh-ja_loss": 2.29296875,
"eval_ted_trans_zh-ja_runtime": 2.6448,
"eval_ted_trans_zh-ja_samples_per_second": 15.88,
"eval_ted_trans_zh-ja_steps_per_second": 2.269,
"step": 5000
},
{
"epoch": 0.82,
"eval_sharegpt_accuracy": 0.7644941905120852,
"eval_sharegpt_loss": 0.86474609375,
"eval_sharegpt_runtime": 732.8989,
"eval_sharegpt_samples_per_second": 4.568,
"eval_sharegpt_steps_per_second": 0.572,
"step": 5000
},
{
"epoch": 0.82,
"eval_dolly15k_accuracy": 0.5935786732456141,
"eval_dolly15k_loss": 1.6474609375,
"eval_dolly15k_runtime": 33.5372,
"eval_dolly15k_samples_per_second": 22.393,
"eval_dolly15k_steps_per_second": 2.803,
"step": 5000
},
{
"epoch": 0.82,
"eval_ikala_accuracy": 0.7535167297168253,
"eval_ikala_loss": 0.89990234375,
"eval_ikala_runtime": 887.8229,
"eval_ikala_samples_per_second": 16.001,
"eval_ikala_steps_per_second": 2.0,
"step": 5000
},
{
"epoch": 0.82,
"eval_oasst_export_accuracy": 0.6621677719692545,
"eval_oasst_export_loss": 1.5673828125,
"eval_oasst_export_runtime": 134.0182,
"eval_oasst_export_samples_per_second": 15.662,
"eval_oasst_export_steps_per_second": 1.962,
"step": 5000
},
{
"epoch": 0.82,
"eval_joke_accuracy": 0.5397081122062168,
"eval_joke_loss": 1.966796875,
"eval_joke_runtime": 4.7628,
"eval_joke_samples_per_second": 15.957,
"eval_joke_steps_per_second": 2.1,
"step": 5000
},
{
"epoch": 0.82,
"eval_gsm8k_accuracy": 0.7978304508956146,
"eval_gsm8k_loss": 0.73388671875,
"eval_gsm8k_runtime": 58.2683,
"eval_gsm8k_samples_per_second": 22.637,
"eval_gsm8k_steps_per_second": 2.832,
"step": 5000
},
{
"epoch": 0.82,
"eval_webgpt_accuracy": 0.4992032819813969,
"eval_webgpt_loss": 2.189453125,
"eval_webgpt_runtime": 158.2652,
"eval_webgpt_samples_per_second": 22.513,
"eval_webgpt_steps_per_second": 2.818,
"step": 5000
}
],
"max_steps": 48704,
"num_train_epochs": 8,
"total_flos": 3017973738504192.0,
"trial_name": null,
"trial_params": null
}