llama2-japanesewiki-chat / trainer_state.json
shi3z's picture
Upload folder using huggingface_hub
a22403e
{
"best_metric": 41.125,
"best_model_checkpoint": "result/Chat-Llama-2-13b-chat-hf-mezo-ft-20000-16-1e-3-1e-1-0/checkpoint-4000",
"epoch": 63.492063492063494,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 0.001,
"loss": 31.3156,
"step": 10
},
{
"epoch": 0.32,
"learning_rate": 0.001,
"loss": 31.675,
"step": 20
},
{
"epoch": 0.48,
"learning_rate": 0.001,
"loss": 31.5031,
"step": 30
},
{
"epoch": 0.63,
"learning_rate": 0.001,
"loss": 31.4563,
"step": 40
},
{
"epoch": 0.79,
"learning_rate": 0.001,
"loss": 31.4125,
"step": 50
},
{
"epoch": 0.95,
"learning_rate": 0.001,
"loss": 31.7234,
"step": 60
},
{
"epoch": 1.11,
"learning_rate": 0.001,
"loss": 31.6281,
"step": 70
},
{
"epoch": 1.27,
"learning_rate": 0.001,
"loss": 31.6859,
"step": 80
},
{
"epoch": 1.43,
"learning_rate": 0.001,
"loss": 31.6109,
"step": 90
},
{
"epoch": 1.59,
"learning_rate": 0.001,
"loss": 31.7641,
"step": 100
},
{
"epoch": 1.75,
"learning_rate": 0.001,
"loss": 31.7063,
"step": 110
},
{
"epoch": 1.9,
"learning_rate": 0.001,
"loss": 31.85,
"step": 120
},
{
"epoch": 2.06,
"learning_rate": 0.001,
"loss": 31.975,
"step": 130
},
{
"epoch": 2.22,
"learning_rate": 0.001,
"loss": 31.9344,
"step": 140
},
{
"epoch": 2.38,
"learning_rate": 0.001,
"loss": 32.0484,
"step": 150
},
{
"epoch": 2.54,
"learning_rate": 0.001,
"loss": 32.0344,
"step": 160
},
{
"epoch": 2.7,
"learning_rate": 0.001,
"loss": 32.0703,
"step": 170
},
{
"epoch": 2.86,
"learning_rate": 0.001,
"loss": 32.0656,
"step": 180
},
{
"epoch": 3.02,
"learning_rate": 0.001,
"loss": 32.0734,
"step": 190
},
{
"epoch": 3.17,
"learning_rate": 0.001,
"loss": 32.2547,
"step": 200
},
{
"epoch": 3.33,
"learning_rate": 0.001,
"loss": 32.1437,
"step": 210
},
{
"epoch": 3.49,
"learning_rate": 0.001,
"loss": 32.3672,
"step": 220
},
{
"epoch": 3.65,
"learning_rate": 0.001,
"loss": 32.2766,
"step": 230
},
{
"epoch": 3.81,
"learning_rate": 0.001,
"loss": 32.3906,
"step": 240
},
{
"epoch": 3.97,
"learning_rate": 0.001,
"loss": 32.6156,
"step": 250
},
{
"epoch": 4.13,
"learning_rate": 0.001,
"loss": 32.5547,
"step": 260
},
{
"epoch": 4.29,
"learning_rate": 0.001,
"loss": 32.8094,
"step": 270
},
{
"epoch": 4.44,
"learning_rate": 0.001,
"loss": 32.7406,
"step": 280
},
{
"epoch": 4.6,
"learning_rate": 0.001,
"loss": 32.9031,
"step": 290
},
{
"epoch": 4.76,
"learning_rate": 0.001,
"loss": 32.8188,
"step": 300
},
{
"epoch": 4.92,
"learning_rate": 0.001,
"loss": 32.9562,
"step": 310
},
{
"epoch": 5.08,
"learning_rate": 0.001,
"loss": 32.9469,
"step": 320
},
{
"epoch": 5.24,
"learning_rate": 0.001,
"loss": 33.0625,
"step": 330
},
{
"epoch": 5.4,
"learning_rate": 0.001,
"loss": 33.0594,
"step": 340
},
{
"epoch": 5.56,
"learning_rate": 0.001,
"loss": 33.1031,
"step": 350
},
{
"epoch": 5.71,
"learning_rate": 0.001,
"loss": 33.2094,
"step": 360
},
{
"epoch": 5.87,
"learning_rate": 0.001,
"loss": 33.2906,
"step": 370
},
{
"epoch": 6.03,
"learning_rate": 0.001,
"loss": 33.3406,
"step": 380
},
{
"epoch": 6.19,
"learning_rate": 0.001,
"loss": 33.2656,
"step": 390
},
{
"epoch": 6.35,
"learning_rate": 0.001,
"loss": 33.3719,
"step": 400
},
{
"epoch": 6.51,
"learning_rate": 0.001,
"loss": 33.4844,
"step": 410
},
{
"epoch": 6.67,
"learning_rate": 0.001,
"loss": 33.4625,
"step": 420
},
{
"epoch": 6.83,
"learning_rate": 0.001,
"loss": 33.4438,
"step": 430
},
{
"epoch": 6.98,
"learning_rate": 0.001,
"loss": 33.5844,
"step": 440
},
{
"epoch": 7.14,
"learning_rate": 0.001,
"loss": 33.6281,
"step": 450
},
{
"epoch": 7.3,
"learning_rate": 0.001,
"loss": 33.7062,
"step": 460
},
{
"epoch": 7.46,
"learning_rate": 0.001,
"loss": 33.6719,
"step": 470
},
{
"epoch": 7.62,
"learning_rate": 0.001,
"loss": 33.7156,
"step": 480
},
{
"epoch": 7.78,
"learning_rate": 0.001,
"loss": 33.75,
"step": 490
},
{
"epoch": 7.94,
"learning_rate": 0.001,
"loss": 33.8344,
"step": 500
},
{
"epoch": 8.1,
"learning_rate": 0.001,
"loss": 33.725,
"step": 510
},
{
"epoch": 8.25,
"learning_rate": 0.001,
"loss": 33.9688,
"step": 520
},
{
"epoch": 8.41,
"learning_rate": 0.001,
"loss": 33.9344,
"step": 530
},
{
"epoch": 8.57,
"learning_rate": 0.001,
"loss": 34.1,
"step": 540
},
{
"epoch": 8.73,
"learning_rate": 0.001,
"loss": 34.0719,
"step": 550
},
{
"epoch": 8.89,
"learning_rate": 0.001,
"loss": 34.2563,
"step": 560
},
{
"epoch": 9.05,
"learning_rate": 0.001,
"loss": 34.0438,
"step": 570
},
{
"epoch": 9.21,
"learning_rate": 0.001,
"loss": 34.3094,
"step": 580
},
{
"epoch": 9.37,
"learning_rate": 0.001,
"loss": 34.3687,
"step": 590
},
{
"epoch": 9.52,
"learning_rate": 0.001,
"loss": 34.4406,
"step": 600
},
{
"epoch": 9.68,
"learning_rate": 0.001,
"loss": 34.3531,
"step": 610
},
{
"epoch": 9.84,
"learning_rate": 0.001,
"loss": 34.3344,
"step": 620
},
{
"epoch": 10.0,
"learning_rate": 0.001,
"loss": 34.4125,
"step": 630
},
{
"epoch": 10.16,
"learning_rate": 0.001,
"loss": 34.4,
"step": 640
},
{
"epoch": 10.32,
"learning_rate": 0.001,
"loss": 34.4906,
"step": 650
},
{
"epoch": 10.48,
"learning_rate": 0.001,
"loss": 34.5125,
"step": 660
},
{
"epoch": 10.63,
"learning_rate": 0.001,
"loss": 34.5875,
"step": 670
},
{
"epoch": 10.79,
"learning_rate": 0.001,
"loss": 34.7281,
"step": 680
},
{
"epoch": 10.95,
"learning_rate": 0.001,
"loss": 34.825,
"step": 690
},
{
"epoch": 11.11,
"learning_rate": 0.001,
"loss": 34.7437,
"step": 700
},
{
"epoch": 11.27,
"learning_rate": 0.001,
"loss": 34.6531,
"step": 710
},
{
"epoch": 11.43,
"learning_rate": 0.001,
"loss": 34.8531,
"step": 720
},
{
"epoch": 11.59,
"learning_rate": 0.001,
"loss": 34.8438,
"step": 730
},
{
"epoch": 11.75,
"learning_rate": 0.001,
"loss": 35.0,
"step": 740
},
{
"epoch": 11.9,
"learning_rate": 0.001,
"loss": 34.9562,
"step": 750
},
{
"epoch": 12.06,
"learning_rate": 0.001,
"loss": 34.9781,
"step": 760
},
{
"epoch": 12.22,
"learning_rate": 0.001,
"loss": 35.0625,
"step": 770
},
{
"epoch": 12.38,
"learning_rate": 0.001,
"loss": 35.1063,
"step": 780
},
{
"epoch": 12.54,
"learning_rate": 0.001,
"loss": 35.1594,
"step": 790
},
{
"epoch": 12.7,
"learning_rate": 0.001,
"loss": 35.2,
"step": 800
},
{
"epoch": 12.86,
"learning_rate": 0.001,
"loss": 35.1281,
"step": 810
},
{
"epoch": 13.02,
"learning_rate": 0.001,
"loss": 35.3563,
"step": 820
},
{
"epoch": 13.17,
"learning_rate": 0.001,
"loss": 35.1688,
"step": 830
},
{
"epoch": 13.33,
"learning_rate": 0.001,
"loss": 35.3625,
"step": 840
},
{
"epoch": 13.49,
"learning_rate": 0.001,
"loss": 35.5438,
"step": 850
},
{
"epoch": 13.65,
"learning_rate": 0.001,
"loss": 35.3937,
"step": 860
},
{
"epoch": 13.81,
"learning_rate": 0.001,
"loss": 35.3937,
"step": 870
},
{
"epoch": 13.97,
"learning_rate": 0.001,
"loss": 35.5938,
"step": 880
},
{
"epoch": 14.13,
"learning_rate": 0.001,
"loss": 35.3781,
"step": 890
},
{
"epoch": 14.29,
"learning_rate": 0.001,
"loss": 35.775,
"step": 900
},
{
"epoch": 14.44,
"learning_rate": 0.001,
"loss": 35.7594,
"step": 910
},
{
"epoch": 14.6,
"learning_rate": 0.001,
"loss": 35.5438,
"step": 920
},
{
"epoch": 14.76,
"learning_rate": 0.001,
"loss": 35.8406,
"step": 930
},
{
"epoch": 14.92,
"learning_rate": 0.001,
"loss": 35.6,
"step": 940
},
{
"epoch": 15.08,
"learning_rate": 0.001,
"loss": 35.8844,
"step": 950
},
{
"epoch": 15.24,
"learning_rate": 0.001,
"loss": 35.9594,
"step": 960
},
{
"epoch": 15.4,
"learning_rate": 0.001,
"loss": 35.8281,
"step": 970
},
{
"epoch": 15.56,
"learning_rate": 0.001,
"loss": 36.0125,
"step": 980
},
{
"epoch": 15.71,
"learning_rate": 0.001,
"loss": 36.1313,
"step": 990
},
{
"epoch": 15.87,
"learning_rate": 0.001,
"loss": 36.0656,
"step": 1000
},
{
"epoch": 16.03,
"learning_rate": 0.001,
"loss": 36.1938,
"step": 1010
},
{
"epoch": 16.19,
"learning_rate": 0.001,
"loss": 36.2031,
"step": 1020
},
{
"epoch": 16.35,
"learning_rate": 0.001,
"loss": 36.1688,
"step": 1030
},
{
"epoch": 16.51,
"learning_rate": 0.001,
"loss": 36.3156,
"step": 1040
},
{
"epoch": 16.67,
"learning_rate": 0.001,
"loss": 36.2719,
"step": 1050
},
{
"epoch": 16.83,
"learning_rate": 0.001,
"loss": 36.4656,
"step": 1060
},
{
"epoch": 16.98,
"learning_rate": 0.001,
"loss": 36.3687,
"step": 1070
},
{
"epoch": 17.14,
"learning_rate": 0.001,
"loss": 36.4031,
"step": 1080
},
{
"epoch": 17.3,
"learning_rate": 0.001,
"loss": 36.5469,
"step": 1090
},
{
"epoch": 17.46,
"learning_rate": 0.001,
"loss": 36.5094,
"step": 1100
},
{
"epoch": 17.62,
"learning_rate": 0.001,
"loss": 36.6156,
"step": 1110
},
{
"epoch": 17.78,
"learning_rate": 0.001,
"loss": 36.5531,
"step": 1120
},
{
"epoch": 17.94,
"learning_rate": 0.001,
"loss": 36.525,
"step": 1130
},
{
"epoch": 18.1,
"learning_rate": 0.001,
"loss": 36.6562,
"step": 1140
},
{
"epoch": 18.25,
"learning_rate": 0.001,
"loss": 36.5812,
"step": 1150
},
{
"epoch": 18.41,
"learning_rate": 0.001,
"loss": 36.6625,
"step": 1160
},
{
"epoch": 18.57,
"learning_rate": 0.001,
"loss": 36.625,
"step": 1170
},
{
"epoch": 18.73,
"learning_rate": 0.001,
"loss": 36.8719,
"step": 1180
},
{
"epoch": 18.89,
"learning_rate": 0.001,
"loss": 36.9219,
"step": 1190
},
{
"epoch": 19.05,
"learning_rate": 0.001,
"loss": 36.8875,
"step": 1200
},
{
"epoch": 19.21,
"learning_rate": 0.001,
"loss": 36.7344,
"step": 1210
},
{
"epoch": 19.37,
"learning_rate": 0.001,
"loss": 36.9562,
"step": 1220
},
{
"epoch": 19.52,
"learning_rate": 0.001,
"loss": 37.1688,
"step": 1230
},
{
"epoch": 19.68,
"learning_rate": 0.001,
"loss": 37.2156,
"step": 1240
},
{
"epoch": 19.84,
"learning_rate": 0.001,
"loss": 37.1719,
"step": 1250
},
{
"epoch": 20.0,
"learning_rate": 0.001,
"loss": 37.2594,
"step": 1260
},
{
"epoch": 20.16,
"learning_rate": 0.001,
"loss": 37.4062,
"step": 1270
},
{
"epoch": 20.32,
"learning_rate": 0.001,
"loss": 37.4937,
"step": 1280
},
{
"epoch": 20.48,
"learning_rate": 0.001,
"loss": 37.5625,
"step": 1290
},
{
"epoch": 20.63,
"learning_rate": 0.001,
"loss": 37.6,
"step": 1300
},
{
"epoch": 20.79,
"learning_rate": 0.001,
"loss": 37.6156,
"step": 1310
},
{
"epoch": 20.95,
"learning_rate": 0.001,
"loss": 37.625,
"step": 1320
},
{
"epoch": 21.11,
"learning_rate": 0.001,
"loss": 37.5906,
"step": 1330
},
{
"epoch": 21.27,
"learning_rate": 0.001,
"loss": 37.8344,
"step": 1340
},
{
"epoch": 21.43,
"learning_rate": 0.001,
"loss": 37.7656,
"step": 1350
},
{
"epoch": 21.59,
"learning_rate": 0.001,
"loss": 37.8875,
"step": 1360
},
{
"epoch": 21.75,
"learning_rate": 0.001,
"loss": 37.6437,
"step": 1370
},
{
"epoch": 21.9,
"learning_rate": 0.001,
"loss": 38.0063,
"step": 1380
},
{
"epoch": 22.06,
"learning_rate": 0.001,
"loss": 38.0594,
"step": 1390
},
{
"epoch": 22.22,
"learning_rate": 0.001,
"loss": 37.9656,
"step": 1400
},
{
"epoch": 22.38,
"learning_rate": 0.001,
"loss": 38.125,
"step": 1410
},
{
"epoch": 22.54,
"learning_rate": 0.001,
"loss": 38.1469,
"step": 1420
},
{
"epoch": 22.7,
"learning_rate": 0.001,
"loss": 38.2188,
"step": 1430
},
{
"epoch": 22.86,
"learning_rate": 0.001,
"loss": 38.0969,
"step": 1440
},
{
"epoch": 23.02,
"learning_rate": 0.001,
"loss": 38.3469,
"step": 1450
},
{
"epoch": 23.17,
"learning_rate": 0.001,
"loss": 38.3563,
"step": 1460
},
{
"epoch": 23.33,
"learning_rate": 0.001,
"loss": 38.3719,
"step": 1470
},
{
"epoch": 23.49,
"learning_rate": 0.001,
"loss": 38.3375,
"step": 1480
},
{
"epoch": 23.65,
"learning_rate": 0.001,
"loss": 38.5375,
"step": 1490
},
{
"epoch": 23.81,
"learning_rate": 0.001,
"loss": 38.4531,
"step": 1500
},
{
"epoch": 23.97,
"learning_rate": 0.001,
"loss": 38.4469,
"step": 1510
},
{
"epoch": 24.13,
"learning_rate": 0.001,
"loss": 38.4375,
"step": 1520
},
{
"epoch": 24.29,
"learning_rate": 0.001,
"loss": 38.6406,
"step": 1530
},
{
"epoch": 24.44,
"learning_rate": 0.001,
"loss": 38.7156,
"step": 1540
},
{
"epoch": 24.6,
"learning_rate": 0.001,
"loss": 38.7719,
"step": 1550
},
{
"epoch": 24.76,
"learning_rate": 0.001,
"loss": 38.7313,
"step": 1560
},
{
"epoch": 24.92,
"learning_rate": 0.001,
"loss": 38.9125,
"step": 1570
},
{
"epoch": 25.08,
"learning_rate": 0.001,
"loss": 38.8281,
"step": 1580
},
{
"epoch": 25.24,
"learning_rate": 0.001,
"loss": 38.9062,
"step": 1590
},
{
"epoch": 25.4,
"learning_rate": 0.001,
"loss": 39.025,
"step": 1600
},
{
"epoch": 25.56,
"learning_rate": 0.001,
"loss": 38.9125,
"step": 1610
},
{
"epoch": 25.71,
"learning_rate": 0.001,
"loss": 39.0594,
"step": 1620
},
{
"epoch": 25.87,
"learning_rate": 0.001,
"loss": 38.9625,
"step": 1630
},
{
"epoch": 26.03,
"learning_rate": 0.001,
"loss": 39.1187,
"step": 1640
},
{
"epoch": 26.19,
"learning_rate": 0.001,
"loss": 39.1,
"step": 1650
},
{
"epoch": 26.35,
"learning_rate": 0.001,
"loss": 39.1313,
"step": 1660
},
{
"epoch": 26.51,
"learning_rate": 0.001,
"loss": 39.2906,
"step": 1670
},
{
"epoch": 26.67,
"learning_rate": 0.001,
"loss": 39.1719,
"step": 1680
},
{
"epoch": 26.83,
"learning_rate": 0.001,
"loss": 39.2687,
"step": 1690
},
{
"epoch": 26.98,
"learning_rate": 0.001,
"loss": 39.4312,
"step": 1700
},
{
"epoch": 27.14,
"learning_rate": 0.001,
"loss": 39.2563,
"step": 1710
},
{
"epoch": 27.3,
"learning_rate": 0.001,
"loss": 39.3344,
"step": 1720
},
{
"epoch": 27.46,
"learning_rate": 0.001,
"loss": 39.375,
"step": 1730
},
{
"epoch": 27.62,
"learning_rate": 0.001,
"loss": 39.4031,
"step": 1740
},
{
"epoch": 27.78,
"learning_rate": 0.001,
"loss": 39.4969,
"step": 1750
},
{
"epoch": 27.94,
"learning_rate": 0.001,
"loss": 39.5219,
"step": 1760
},
{
"epoch": 28.1,
"learning_rate": 0.001,
"loss": 39.5719,
"step": 1770
},
{
"epoch": 28.25,
"learning_rate": 0.001,
"loss": 39.6719,
"step": 1780
},
{
"epoch": 28.41,
"learning_rate": 0.001,
"loss": 39.7156,
"step": 1790
},
{
"epoch": 28.57,
"learning_rate": 0.001,
"loss": 39.8312,
"step": 1800
},
{
"epoch": 28.73,
"learning_rate": 0.001,
"loss": 39.8687,
"step": 1810
},
{
"epoch": 28.89,
"learning_rate": 0.001,
"loss": 39.9906,
"step": 1820
},
{
"epoch": 29.05,
"learning_rate": 0.001,
"loss": 40.0156,
"step": 1830
},
{
"epoch": 29.21,
"learning_rate": 0.001,
"loss": 40.1469,
"step": 1840
},
{
"epoch": 29.37,
"learning_rate": 0.001,
"loss": 40.1938,
"step": 1850
},
{
"epoch": 29.52,
"learning_rate": 0.001,
"loss": 40.1594,
"step": 1860
},
{
"epoch": 29.68,
"learning_rate": 0.001,
"loss": 39.9094,
"step": 1870
},
{
"epoch": 29.84,
"learning_rate": 0.001,
"loss": 40.1625,
"step": 1880
},
{
"epoch": 30.0,
"learning_rate": 0.001,
"loss": 40.1906,
"step": 1890
},
{
"epoch": 30.16,
"learning_rate": 0.001,
"loss": 40.3406,
"step": 1900
},
{
"epoch": 30.32,
"learning_rate": 0.001,
"loss": 40.2812,
"step": 1910
},
{
"epoch": 30.48,
"learning_rate": 0.001,
"loss": 40.2969,
"step": 1920
},
{
"epoch": 30.63,
"learning_rate": 0.001,
"loss": 40.2469,
"step": 1930
},
{
"epoch": 30.79,
"learning_rate": 0.001,
"loss": 40.3656,
"step": 1940
},
{
"epoch": 30.95,
"learning_rate": 0.001,
"loss": 40.4156,
"step": 1950
},
{
"epoch": 31.11,
"learning_rate": 0.001,
"loss": 40.4844,
"step": 1960
},
{
"epoch": 31.27,
"learning_rate": 0.001,
"loss": 40.4531,
"step": 1970
},
{
"epoch": 31.43,
"learning_rate": 0.001,
"loss": 40.6187,
"step": 1980
},
{
"epoch": 31.59,
"learning_rate": 0.001,
"loss": 40.8844,
"step": 1990
},
{
"epoch": 31.75,
"learning_rate": 0.001,
"loss": 40.8125,
"step": 2000
},
{
"epoch": 31.9,
"learning_rate": 0.001,
"loss": 40.9562,
"step": 2010
},
{
"epoch": 32.06,
"learning_rate": 0.001,
"loss": 40.8312,
"step": 2020
},
{
"epoch": 32.22,
"learning_rate": 0.001,
"loss": 40.825,
"step": 2030
},
{
"epoch": 32.38,
"learning_rate": 0.001,
"loss": 41.0594,
"step": 2040
},
{
"epoch": 32.54,
"learning_rate": 0.001,
"loss": 40.8875,
"step": 2050
},
{
"epoch": 32.7,
"learning_rate": 0.001,
"loss": 40.8062,
"step": 2060
},
{
"epoch": 32.86,
"learning_rate": 0.001,
"loss": 41.0844,
"step": 2070
},
{
"epoch": 33.02,
"learning_rate": 0.001,
"loss": 41.1187,
"step": 2080
},
{
"epoch": 33.17,
"learning_rate": 0.001,
"loss": 41.1031,
"step": 2090
},
{
"epoch": 33.33,
"learning_rate": 0.001,
"loss": 41.2344,
"step": 2100
},
{
"epoch": 33.49,
"learning_rate": 0.001,
"loss": 41.175,
"step": 2110
},
{
"epoch": 33.65,
"learning_rate": 0.001,
"loss": 41.4813,
"step": 2120
},
{
"epoch": 33.81,
"learning_rate": 0.001,
"loss": 41.4031,
"step": 2130
},
{
"epoch": 33.97,
"learning_rate": 0.001,
"loss": 41.2875,
"step": 2140
},
{
"epoch": 34.13,
"learning_rate": 0.001,
"loss": 41.6031,
"step": 2150
},
{
"epoch": 34.29,
"learning_rate": 0.001,
"loss": 41.3781,
"step": 2160
},
{
"epoch": 34.44,
"learning_rate": 0.001,
"loss": 41.7094,
"step": 2170
},
{
"epoch": 34.6,
"learning_rate": 0.001,
"loss": 41.6,
"step": 2180
},
{
"epoch": 34.76,
"learning_rate": 0.001,
"loss": 41.6313,
"step": 2190
},
{
"epoch": 34.92,
"learning_rate": 0.001,
"loss": 41.7125,
"step": 2200
},
{
"epoch": 35.08,
"learning_rate": 0.001,
"loss": 41.7625,
"step": 2210
},
{
"epoch": 35.24,
"learning_rate": 0.001,
"loss": 41.8656,
"step": 2220
},
{
"epoch": 35.4,
"learning_rate": 0.001,
"loss": 41.8531,
"step": 2230
},
{
"epoch": 35.56,
"learning_rate": 0.001,
"loss": 41.9312,
"step": 2240
},
{
"epoch": 35.71,
"learning_rate": 0.001,
"loss": 41.9406,
"step": 2250
},
{
"epoch": 35.87,
"learning_rate": 0.001,
"loss": 42.0187,
"step": 2260
},
{
"epoch": 36.03,
"learning_rate": 0.001,
"loss": 42.2437,
"step": 2270
},
{
"epoch": 36.19,
"learning_rate": 0.001,
"loss": 42.15,
"step": 2280
},
{
"epoch": 36.35,
"learning_rate": 0.001,
"loss": 42.2094,
"step": 2290
},
{
"epoch": 36.51,
"learning_rate": 0.001,
"loss": 42.2344,
"step": 2300
},
{
"epoch": 36.67,
"learning_rate": 0.001,
"loss": 42.4469,
"step": 2310
},
{
"epoch": 36.83,
"learning_rate": 0.001,
"loss": 42.3031,
"step": 2320
},
{
"epoch": 36.98,
"learning_rate": 0.001,
"loss": 42.3625,
"step": 2330
},
{
"epoch": 37.14,
"learning_rate": 0.001,
"loss": 42.7094,
"step": 2340
},
{
"epoch": 37.3,
"learning_rate": 0.001,
"loss": 42.6344,
"step": 2350
},
{
"epoch": 37.46,
"learning_rate": 0.001,
"loss": 42.6531,
"step": 2360
},
{
"epoch": 37.62,
"learning_rate": 0.001,
"loss": 42.6063,
"step": 2370
},
{
"epoch": 37.78,
"learning_rate": 0.001,
"loss": 42.6313,
"step": 2380
},
{
"epoch": 37.94,
"learning_rate": 0.001,
"loss": 42.7375,
"step": 2390
},
{
"epoch": 38.1,
"learning_rate": 0.001,
"loss": 42.7563,
"step": 2400
},
{
"epoch": 38.25,
"learning_rate": 0.001,
"loss": 42.7812,
"step": 2410
},
{
"epoch": 38.41,
"learning_rate": 0.001,
"loss": 42.7969,
"step": 2420
},
{
"epoch": 38.57,
"learning_rate": 0.001,
"loss": 42.8375,
"step": 2430
},
{
"epoch": 38.73,
"learning_rate": 0.001,
"loss": 43.0281,
"step": 2440
},
{
"epoch": 38.89,
"learning_rate": 0.001,
"loss": 42.9031,
"step": 2450
},
{
"epoch": 39.05,
"learning_rate": 0.001,
"loss": 42.9969,
"step": 2460
},
{
"epoch": 39.21,
"learning_rate": 0.001,
"loss": 43.075,
"step": 2470
},
{
"epoch": 39.37,
"learning_rate": 0.001,
"loss": 43.075,
"step": 2480
},
{
"epoch": 39.52,
"learning_rate": 0.001,
"loss": 43.0812,
"step": 2490
},
{
"epoch": 39.68,
"learning_rate": 0.001,
"loss": 43.2406,
"step": 2500
},
{
"epoch": 39.84,
"learning_rate": 0.001,
"loss": 43.1375,
"step": 2510
},
{
"epoch": 40.0,
"learning_rate": 0.001,
"loss": 43.1625,
"step": 2520
},
{
"epoch": 40.16,
"learning_rate": 0.001,
"loss": 43.2313,
"step": 2530
},
{
"epoch": 40.32,
"learning_rate": 0.001,
"loss": 43.4375,
"step": 2540
},
{
"epoch": 40.48,
"learning_rate": 0.001,
"loss": 43.4719,
"step": 2550
},
{
"epoch": 40.63,
"learning_rate": 0.001,
"loss": 43.3969,
"step": 2560
},
{
"epoch": 40.79,
"learning_rate": 0.001,
"loss": 43.675,
"step": 2570
},
{
"epoch": 40.95,
"learning_rate": 0.001,
"loss": 43.5562,
"step": 2580
},
{
"epoch": 41.11,
"learning_rate": 0.001,
"loss": 43.6844,
"step": 2590
},
{
"epoch": 41.27,
"learning_rate": 0.001,
"loss": 43.8094,
"step": 2600
},
{
"epoch": 41.43,
"learning_rate": 0.001,
"loss": 43.8656,
"step": 2610
},
{
"epoch": 41.59,
"learning_rate": 0.001,
"loss": 43.9969,
"step": 2620
},
{
"epoch": 41.75,
"learning_rate": 0.001,
"loss": 43.8469,
"step": 2630
},
{
"epoch": 41.9,
"learning_rate": 0.001,
"loss": 44.0688,
"step": 2640
},
{
"epoch": 42.06,
"learning_rate": 0.001,
"loss": 43.9531,
"step": 2650
},
{
"epoch": 42.22,
"learning_rate": 0.001,
"loss": 43.9937,
"step": 2660
},
{
"epoch": 42.38,
"learning_rate": 0.001,
"loss": 44.1125,
"step": 2670
},
{
"epoch": 42.54,
"learning_rate": 0.001,
"loss": 44.1969,
"step": 2680
},
{
"epoch": 42.7,
"learning_rate": 0.001,
"loss": 44.1781,
"step": 2690
},
{
"epoch": 42.86,
"learning_rate": 0.001,
"loss": 44.3781,
"step": 2700
},
{
"epoch": 43.02,
"learning_rate": 0.001,
"loss": 44.3125,
"step": 2710
},
{
"epoch": 43.17,
"learning_rate": 0.001,
"loss": 44.2875,
"step": 2720
},
{
"epoch": 43.33,
"learning_rate": 0.001,
"loss": 44.3344,
"step": 2730
},
{
"epoch": 43.49,
"learning_rate": 0.001,
"loss": 44.3188,
"step": 2740
},
{
"epoch": 43.65,
"learning_rate": 0.001,
"loss": 44.3281,
"step": 2750
},
{
"epoch": 43.81,
"learning_rate": 0.001,
"loss": 44.5063,
"step": 2760
},
{
"epoch": 43.97,
"learning_rate": 0.001,
"loss": 44.5625,
"step": 2770
},
{
"epoch": 44.13,
"learning_rate": 0.001,
"loss": 44.2313,
"step": 2780
},
{
"epoch": 44.29,
"learning_rate": 0.001,
"loss": 44.7313,
"step": 2790
},
{
"epoch": 44.44,
"learning_rate": 0.001,
"loss": 44.6125,
"step": 2800
},
{
"epoch": 44.6,
"learning_rate": 0.001,
"loss": 44.7125,
"step": 2810
},
{
"epoch": 44.76,
"learning_rate": 0.001,
"loss": 44.9594,
"step": 2820
},
{
"epoch": 44.92,
"learning_rate": 0.001,
"loss": 44.975,
"step": 2830
},
{
"epoch": 45.08,
"learning_rate": 0.001,
"loss": 44.9531,
"step": 2840
},
{
"epoch": 45.24,
"learning_rate": 0.001,
"loss": 44.9406,
"step": 2850
},
{
"epoch": 45.4,
"learning_rate": 0.001,
"loss": 45.0812,
"step": 2860
},
{
"epoch": 45.56,
"learning_rate": 0.001,
"loss": 45.1969,
"step": 2870
},
{
"epoch": 45.71,
"learning_rate": 0.001,
"loss": 44.9125,
"step": 2880
},
{
"epoch": 45.87,
"learning_rate": 0.001,
"loss": 45.0312,
"step": 2890
},
{
"epoch": 46.03,
"learning_rate": 0.001,
"loss": 45.1406,
"step": 2900
},
{
"epoch": 46.19,
"learning_rate": 0.001,
"loss": 45.3125,
"step": 2910
},
{
"epoch": 46.35,
"learning_rate": 0.001,
"loss": 45.4719,
"step": 2920
},
{
"epoch": 46.51,
"learning_rate": 0.001,
"loss": 45.3312,
"step": 2930
},
{
"epoch": 46.67,
"learning_rate": 0.001,
"loss": 45.5156,
"step": 2940
},
{
"epoch": 46.83,
"learning_rate": 0.001,
"loss": 45.5406,
"step": 2950
},
{
"epoch": 46.98,
"learning_rate": 0.001,
"loss": 45.6063,
"step": 2960
},
{
"epoch": 47.14,
"learning_rate": 0.001,
"loss": 45.5781,
"step": 2970
},
{
"epoch": 47.3,
"learning_rate": 0.001,
"loss": 45.8438,
"step": 2980
},
{
"epoch": 47.46,
"learning_rate": 0.001,
"loss": 45.8813,
"step": 2990
},
{
"epoch": 47.62,
"learning_rate": 0.001,
"loss": 46.0375,
"step": 3000
},
{
"epoch": 47.78,
"learning_rate": 0.001,
"loss": 45.7313,
"step": 3010
},
{
"epoch": 47.94,
"learning_rate": 0.001,
"loss": 45.7844,
"step": 3020
},
{
"epoch": 48.1,
"learning_rate": 0.001,
"loss": 46.0875,
"step": 3030
},
{
"epoch": 48.25,
"learning_rate": 0.001,
"loss": 45.8438,
"step": 3040
},
{
"epoch": 48.41,
"learning_rate": 0.001,
"loss": 45.9375,
"step": 3050
},
{
"epoch": 48.57,
"learning_rate": 0.001,
"loss": 46.0656,
"step": 3060
},
{
"epoch": 48.73,
"learning_rate": 0.001,
"loss": 46.0469,
"step": 3070
},
{
"epoch": 48.89,
"learning_rate": 0.001,
"loss": 46.325,
"step": 3080
},
{
"epoch": 49.05,
"learning_rate": 0.001,
"loss": 46.2125,
"step": 3090
},
{
"epoch": 49.21,
"learning_rate": 0.001,
"loss": 46.3438,
"step": 3100
},
{
"epoch": 49.37,
"learning_rate": 0.001,
"loss": 46.4562,
"step": 3110
},
{
"epoch": 49.52,
"learning_rate": 0.001,
"loss": 46.3875,
"step": 3120
},
{
"epoch": 49.68,
"learning_rate": 0.001,
"loss": 46.1625,
"step": 3130
},
{
"epoch": 49.84,
"learning_rate": 0.001,
"loss": 46.7,
"step": 3140
},
{
"epoch": 50.0,
"learning_rate": 0.001,
"loss": 46.4406,
"step": 3150
},
{
"epoch": 50.16,
"learning_rate": 0.001,
"loss": 46.5906,
"step": 3160
},
{
"epoch": 50.32,
"learning_rate": 0.001,
"loss": 46.6156,
"step": 3170
},
{
"epoch": 50.48,
"learning_rate": 0.001,
"loss": 46.6187,
"step": 3180
},
{
"epoch": 50.63,
"learning_rate": 0.001,
"loss": 46.8219,
"step": 3190
},
{
"epoch": 50.79,
"learning_rate": 0.001,
"loss": 46.7281,
"step": 3200
},
{
"epoch": 50.95,
"learning_rate": 0.001,
"loss": 46.6469,
"step": 3210
},
{
"epoch": 51.11,
"learning_rate": 0.001,
"loss": 46.7031,
"step": 3220
},
{
"epoch": 51.27,
"learning_rate": 0.001,
"loss": 46.8312,
"step": 3230
},
{
"epoch": 51.43,
"learning_rate": 0.001,
"loss": 47.0844,
"step": 3240
},
{
"epoch": 51.59,
"learning_rate": 0.001,
"loss": 46.9156,
"step": 3250
},
{
"epoch": 51.75,
"learning_rate": 0.001,
"loss": 46.9813,
"step": 3260
},
{
"epoch": 51.9,
"learning_rate": 0.001,
"loss": 47.3062,
"step": 3270
},
{
"epoch": 52.06,
"learning_rate": 0.001,
"loss": 47.2313,
"step": 3280
},
{
"epoch": 52.22,
"learning_rate": 0.001,
"loss": 47.2656,
"step": 3290
},
{
"epoch": 52.38,
"learning_rate": 0.001,
"loss": 47.2031,
"step": 3300
},
{
"epoch": 52.54,
"learning_rate": 0.001,
"loss": 47.2812,
"step": 3310
},
{
"epoch": 52.7,
"learning_rate": 0.001,
"loss": 47.2281,
"step": 3320
},
{
"epoch": 52.86,
"learning_rate": 0.001,
"loss": 47.2906,
"step": 3330
},
{
"epoch": 53.02,
"learning_rate": 0.001,
"loss": 47.4469,
"step": 3340
},
{
"epoch": 53.17,
"learning_rate": 0.001,
"loss": 47.3719,
"step": 3350
},
{
"epoch": 53.33,
"learning_rate": 0.001,
"loss": 47.4813,
"step": 3360
},
{
"epoch": 53.49,
"learning_rate": 0.001,
"loss": 47.5906,
"step": 3370
},
{
"epoch": 53.65,
"learning_rate": 0.001,
"loss": 47.5719,
"step": 3380
},
{
"epoch": 53.81,
"learning_rate": 0.001,
"loss": 47.5938,
"step": 3390
},
{
"epoch": 53.97,
"learning_rate": 0.001,
"loss": 47.5906,
"step": 3400
},
{
"epoch": 54.13,
"learning_rate": 0.001,
"loss": 47.8594,
"step": 3410
},
{
"epoch": 54.29,
"learning_rate": 0.001,
"loss": 47.7969,
"step": 3420
},
{
"epoch": 54.44,
"learning_rate": 0.001,
"loss": 47.7844,
"step": 3430
},
{
"epoch": 54.6,
"learning_rate": 0.001,
"loss": 47.7656,
"step": 3440
},
{
"epoch": 54.76,
"learning_rate": 0.001,
"loss": 48.0594,
"step": 3450
},
{
"epoch": 54.92,
"learning_rate": 0.001,
"loss": 47.8937,
"step": 3460
},
{
"epoch": 55.08,
"learning_rate": 0.001,
"loss": 48.0063,
"step": 3470
},
{
"epoch": 55.24,
"learning_rate": 0.001,
"loss": 47.8875,
"step": 3480
},
{
"epoch": 55.4,
"learning_rate": 0.001,
"loss": 47.9594,
"step": 3490
},
{
"epoch": 55.56,
"learning_rate": 0.001,
"loss": 48.0375,
"step": 3500
},
{
"epoch": 55.71,
"learning_rate": 0.001,
"loss": 47.9625,
"step": 3510
},
{
"epoch": 55.87,
"learning_rate": 0.001,
"loss": 48.2219,
"step": 3520
},
{
"epoch": 56.03,
"learning_rate": 0.001,
"loss": 48.2938,
"step": 3530
},
{
"epoch": 56.19,
"learning_rate": 0.001,
"loss": 48.3875,
"step": 3540
},
{
"epoch": 56.35,
"learning_rate": 0.001,
"loss": 48.3156,
"step": 3550
},
{
"epoch": 56.51,
"learning_rate": 0.001,
"loss": 48.4281,
"step": 3560
},
{
"epoch": 56.67,
"learning_rate": 0.001,
"loss": 48.3813,
"step": 3570
},
{
"epoch": 56.83,
"learning_rate": 0.001,
"loss": 48.6594,
"step": 3580
},
{
"epoch": 56.98,
"learning_rate": 0.001,
"loss": 48.5344,
"step": 3590
},
{
"epoch": 57.14,
"learning_rate": 0.001,
"loss": 48.4781,
"step": 3600
},
{
"epoch": 57.3,
"learning_rate": 0.001,
"loss": 48.6469,
"step": 3610
},
{
"epoch": 57.46,
"learning_rate": 0.001,
"loss": 48.7406,
"step": 3620
},
{
"epoch": 57.62,
"learning_rate": 0.001,
"loss": 48.8312,
"step": 3630
},
{
"epoch": 57.78,
"learning_rate": 0.001,
"loss": 48.7844,
"step": 3640
},
{
"epoch": 57.94,
"learning_rate": 0.001,
"loss": 48.9906,
"step": 3650
},
{
"epoch": 58.1,
"learning_rate": 0.001,
"loss": 48.7906,
"step": 3660
},
{
"epoch": 58.25,
"learning_rate": 0.001,
"loss": 49.2281,
"step": 3670
},
{
"epoch": 58.41,
"learning_rate": 0.001,
"loss": 49.2406,
"step": 3680
},
{
"epoch": 58.57,
"learning_rate": 0.001,
"loss": 48.9844,
"step": 3690
},
{
"epoch": 58.73,
"learning_rate": 0.001,
"loss": 49.0469,
"step": 3700
},
{
"epoch": 58.89,
"learning_rate": 0.001,
"loss": 49.3438,
"step": 3710
},
{
"epoch": 59.05,
"learning_rate": 0.001,
"loss": 49.2219,
"step": 3720
},
{
"epoch": 59.21,
"learning_rate": 0.001,
"loss": 49.3219,
"step": 3730
},
{
"epoch": 59.37,
"learning_rate": 0.001,
"loss": 49.4906,
"step": 3740
},
{
"epoch": 59.52,
"learning_rate": 0.001,
"loss": 49.2281,
"step": 3750
},
{
"epoch": 59.68,
"learning_rate": 0.001,
"loss": 49.4375,
"step": 3760
},
{
"epoch": 59.84,
"learning_rate": 0.001,
"loss": 49.4844,
"step": 3770
},
{
"epoch": 60.0,
"learning_rate": 0.001,
"loss": 49.4969,
"step": 3780
},
{
"epoch": 60.16,
"learning_rate": 0.001,
"loss": 49.475,
"step": 3790
},
{
"epoch": 60.32,
"learning_rate": 0.001,
"loss": 49.7406,
"step": 3800
},
{
"epoch": 60.48,
"learning_rate": 0.001,
"loss": 49.7406,
"step": 3810
},
{
"epoch": 60.63,
"learning_rate": 0.001,
"loss": 49.7687,
"step": 3820
},
{
"epoch": 60.79,
"learning_rate": 0.001,
"loss": 49.6281,
"step": 3830
},
{
"epoch": 60.95,
"learning_rate": 0.001,
"loss": 49.8312,
"step": 3840
},
{
"epoch": 61.11,
"learning_rate": 0.001,
"loss": 50.1094,
"step": 3850
},
{
"epoch": 61.27,
"learning_rate": 0.001,
"loss": 49.9156,
"step": 3860
},
{
"epoch": 61.43,
"learning_rate": 0.001,
"loss": 49.7563,
"step": 3870
},
{
"epoch": 61.59,
"learning_rate": 0.001,
"loss": 49.8719,
"step": 3880
},
{
"epoch": 61.75,
"learning_rate": 0.001,
"loss": 50.1219,
"step": 3890
},
{
"epoch": 61.9,
"learning_rate": 0.001,
"loss": 50.2469,
"step": 3900
},
{
"epoch": 62.06,
"learning_rate": 0.001,
"loss": 50.0844,
"step": 3910
},
{
"epoch": 62.22,
"learning_rate": 0.001,
"loss": 50.2719,
"step": 3920
},
{
"epoch": 62.38,
"learning_rate": 0.001,
"loss": 50.0219,
"step": 3930
},
{
"epoch": 62.54,
"learning_rate": 0.001,
"loss": 50.3875,
"step": 3940
},
{
"epoch": 62.7,
"learning_rate": 0.001,
"loss": 50.3594,
"step": 3950
},
{
"epoch": 62.86,
"learning_rate": 0.001,
"loss": 50.4531,
"step": 3960
},
{
"epoch": 63.02,
"learning_rate": 0.001,
"loss": 50.6,
"step": 3970
},
{
"epoch": 63.17,
"learning_rate": 0.001,
"loss": 50.5063,
"step": 3980
},
{
"epoch": 63.33,
"learning_rate": 0.001,
"loss": 50.6125,
"step": 3990
},
{
"epoch": 63.49,
"learning_rate": 0.001,
"loss": 50.6313,
"step": 4000
},
{
"epoch": 63.49,
"eval_loss": 41.125,
"eval_runtime": 97.8561,
"eval_samples_per_second": 5.11,
"eval_steps_per_second": 0.644,
"step": 4000
}
],
"max_steps": 20000,
"num_train_epochs": 318,
"total_flos": 4.897803240892662e+18,
"trial_name": null,
"trial_params": null
}