gpt2-large-lora-sft / trainer_state.json
Mikivis's picture
commit from Mikivis
1c0c5a6
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.5,
"eval_steps": 500,
"global_step": 22355,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00012989532543055242,
"loss": 2.4901,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 0.0001297790203533885,
"loss": 2.4166,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00012966271527622454,
"loss": 2.3887,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 0.00012954641019906061,
"loss": 2.2707,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 0.00012943592037575487,
"loss": 2.4148,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 0.0001293196152985909,
"loss": 2.2246,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 0.00012920331022142696,
"loss": 2.2086,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 0.000129087005144263,
"loss": 2.2448,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 0.00012897070006709908,
"loss": 2.1513,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 0.00012885439498993512,
"loss": 2.1979,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 0.00012873808991277117,
"loss": 2.1967,
"step": 220
},
{
"epoch": 0.03,
"learning_rate": 0.00012862760008946542,
"loss": 2.2009,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 0.0001285112950123015,
"loss": 2.2464,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 0.00012839498993513754,
"loss": 2.1378,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 0.0001282786848579736,
"loss": 2.1907,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 0.00012816237978080966,
"loss": 2.1482,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 0.0001280460747036457,
"loss": 2.1959,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 0.00012792976962648175,
"loss": 2.1316,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 0.00012781346454931783,
"loss": 2.1494,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 0.00012769715947215387,
"loss": 2.2006,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 0.00012758085439498992,
"loss": 2.1951,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 0.00012746454931782597,
"loss": 2.16,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 0.00012734824424066204,
"loss": 2.185,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 0.00012723193916349808,
"loss": 2.1613,
"step": 480
},
{
"epoch": 0.06,
"learning_rate": 0.00012711563408633413,
"loss": 2.1638,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 0.0001269993290091702,
"loss": 2.1574,
"step": 520
},
{
"epoch": 0.06,
"learning_rate": 0.00012688302393200625,
"loss": 2.1476,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 0.0001267667188548423,
"loss": 2.1674,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 0.00012665041377767837,
"loss": 2.1332,
"step": 580
},
{
"epoch": 0.07,
"learning_rate": 0.00012653410870051442,
"loss": 2.1414,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 0.00012641780362335046,
"loss": 2.1813,
"step": 620
},
{
"epoch": 0.07,
"learning_rate": 0.00012630149854618653,
"loss": 2.1329,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 0.00012618519346902258,
"loss": 2.2409,
"step": 660
},
{
"epoch": 0.08,
"learning_rate": 0.00012606888839185863,
"loss": 2.1425,
"step": 680
},
{
"epoch": 0.08,
"learning_rate": 0.0001259525833146947,
"loss": 2.1504,
"step": 700
},
{
"epoch": 0.08,
"learning_rate": 0.00012583627823753075,
"loss": 2.1707,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 0.0001257199731603668,
"loss": 2.1545,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 0.00012560366808320286,
"loss": 2.158,
"step": 760
},
{
"epoch": 0.09,
"learning_rate": 0.0001254873630060389,
"loss": 2.1406,
"step": 780
},
{
"epoch": 0.09,
"learning_rate": 0.00012537105792887496,
"loss": 2.1347,
"step": 800
},
{
"epoch": 0.09,
"learning_rate": 0.000125254752851711,
"loss": 2.1311,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 0.00012513844777454708,
"loss": 2.1721,
"step": 840
},
{
"epoch": 0.1,
"learning_rate": 0.00012502214269738312,
"loss": 2.133,
"step": 860
},
{
"epoch": 0.1,
"learning_rate": 0.00012490583762021917,
"loss": 2.1699,
"step": 880
},
{
"epoch": 0.1,
"learning_rate": 0.00012478953254305521,
"loss": 2.1301,
"step": 900
},
{
"epoch": 0.1,
"learning_rate": 0.0001246732274658913,
"loss": 2.1117,
"step": 920
},
{
"epoch": 0.11,
"learning_rate": 0.00012455692238872733,
"loss": 2.1259,
"step": 940
},
{
"epoch": 0.11,
"learning_rate": 0.0001244406173115634,
"loss": 2.1486,
"step": 960
},
{
"epoch": 0.11,
"learning_rate": 0.00012432431223439945,
"loss": 2.1521,
"step": 980
},
{
"epoch": 0.11,
"learning_rate": 0.0001242080071572355,
"loss": 2.1392,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 0.00012409170208007157,
"loss": 2.1488,
"step": 1020
},
{
"epoch": 0.12,
"learning_rate": 0.00012397539700290762,
"loss": 2.1047,
"step": 1040
},
{
"epoch": 0.12,
"learning_rate": 0.00012385909192574366,
"loss": 2.1408,
"step": 1060
},
{
"epoch": 0.12,
"learning_rate": 0.00012374278684857974,
"loss": 2.1569,
"step": 1080
},
{
"epoch": 0.12,
"learning_rate": 0.00012362648177141578,
"loss": 2.1465,
"step": 1100
},
{
"epoch": 0.13,
"learning_rate": 0.00012351017669425183,
"loss": 2.1701,
"step": 1120
},
{
"epoch": 0.13,
"learning_rate": 0.0001233938716170879,
"loss": 2.1293,
"step": 1140
},
{
"epoch": 0.13,
"learning_rate": 0.00012327756653992395,
"loss": 2.1176,
"step": 1160
},
{
"epoch": 0.13,
"learning_rate": 0.00012316126146276,
"loss": 2.1423,
"step": 1180
},
{
"epoch": 0.13,
"learning_rate": 0.00012304495638559607,
"loss": 2.0949,
"step": 1200
},
{
"epoch": 0.14,
"learning_rate": 0.0001229286513084321,
"loss": 2.1161,
"step": 1220
},
{
"epoch": 0.14,
"learning_rate": 0.00012281234623126816,
"loss": 2.1423,
"step": 1240
},
{
"epoch": 0.14,
"learning_rate": 0.0001226960411541042,
"loss": 2.1358,
"step": 1260
},
{
"epoch": 0.14,
"learning_rate": 0.00012257973607694028,
"loss": 2.1357,
"step": 1280
},
{
"epoch": 0.15,
"learning_rate": 0.00012246343099977632,
"loss": 2.078,
"step": 1300
},
{
"epoch": 0.15,
"learning_rate": 0.00012234712592261237,
"loss": 2.1491,
"step": 1320
},
{
"epoch": 0.15,
"learning_rate": 0.00012223082084544842,
"loss": 2.145,
"step": 1340
},
{
"epoch": 0.15,
"learning_rate": 0.0001221145157682845,
"loss": 2.1503,
"step": 1360
},
{
"epoch": 0.15,
"learning_rate": 0.00012199821069112054,
"loss": 2.1336,
"step": 1380
},
{
"epoch": 0.16,
"learning_rate": 0.00012188190561395661,
"loss": 2.1555,
"step": 1400
},
{
"epoch": 0.16,
"learning_rate": 0.00012176560053679265,
"loss": 2.1199,
"step": 1420
},
{
"epoch": 0.16,
"learning_rate": 0.0001216492954596287,
"loss": 2.1455,
"step": 1440
},
{
"epoch": 0.16,
"learning_rate": 0.00012153299038246477,
"loss": 2.1473,
"step": 1460
},
{
"epoch": 0.17,
"learning_rate": 0.00012141668530530082,
"loss": 2.0865,
"step": 1480
},
{
"epoch": 0.17,
"learning_rate": 0.00012130038022813687,
"loss": 2.1347,
"step": 1500
},
{
"epoch": 0.17,
"learning_rate": 0.00012118407515097293,
"loss": 2.1074,
"step": 1520
},
{
"epoch": 0.17,
"learning_rate": 0.00012106777007380899,
"loss": 2.1506,
"step": 1540
},
{
"epoch": 0.17,
"learning_rate": 0.00012095146499664503,
"loss": 2.1278,
"step": 1560
},
{
"epoch": 0.18,
"learning_rate": 0.00012083515991948109,
"loss": 2.1479,
"step": 1580
},
{
"epoch": 0.18,
"learning_rate": 0.00012071885484231714,
"loss": 2.0859,
"step": 1600
},
{
"epoch": 0.18,
"learning_rate": 0.00012060254976515321,
"loss": 2.1138,
"step": 1620
},
{
"epoch": 0.18,
"learning_rate": 0.00012048624468798926,
"loss": 2.1397,
"step": 1640
},
{
"epoch": 0.19,
"learning_rate": 0.0001203699396108253,
"loss": 2.0949,
"step": 1660
},
{
"epoch": 0.19,
"learning_rate": 0.00012025363453366136,
"loss": 2.1187,
"step": 1680
},
{
"epoch": 0.19,
"learning_rate": 0.00012013732945649742,
"loss": 2.1278,
"step": 1700
},
{
"epoch": 0.19,
"learning_rate": 0.00012002102437933347,
"loss": 2.1332,
"step": 1720
},
{
"epoch": 0.19,
"learning_rate": 0.00011990471930216953,
"loss": 2.1388,
"step": 1740
},
{
"epoch": 0.2,
"learning_rate": 0.00011978841422500559,
"loss": 2.0935,
"step": 1760
},
{
"epoch": 0.2,
"learning_rate": 0.00011967210914784163,
"loss": 2.1461,
"step": 1780
},
{
"epoch": 0.2,
"learning_rate": 0.00011955580407067769,
"loss": 2.1138,
"step": 1800
},
{
"epoch": 0.2,
"learning_rate": 0.00011943949899351374,
"loss": 2.0624,
"step": 1820
},
{
"epoch": 0.21,
"learning_rate": 0.00011932319391634981,
"loss": 2.0992,
"step": 1840
},
{
"epoch": 0.21,
"learning_rate": 0.00011920688883918586,
"loss": 2.1387,
"step": 1860
},
{
"epoch": 0.21,
"learning_rate": 0.0001190905837620219,
"loss": 2.1209,
"step": 1880
},
{
"epoch": 0.21,
"learning_rate": 0.00011897427868485796,
"loss": 2.1302,
"step": 1900
},
{
"epoch": 0.21,
"learning_rate": 0.00011885797360769402,
"loss": 2.1154,
"step": 1920
},
{
"epoch": 0.22,
"learning_rate": 0.00011874166853053007,
"loss": 2.1462,
"step": 1940
},
{
"epoch": 0.22,
"learning_rate": 0.00011862536345336613,
"loss": 2.1146,
"step": 1960
},
{
"epoch": 0.22,
"learning_rate": 0.00011850905837620217,
"loss": 2.1403,
"step": 1980
},
{
"epoch": 0.22,
"learning_rate": 0.00011839275329903823,
"loss": 2.0799,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 0.00011827644822187429,
"loss": 2.0788,
"step": 2020
},
{
"epoch": 0.23,
"learning_rate": 0.00011816014314471034,
"loss": 2.132,
"step": 2040
},
{
"epoch": 0.23,
"learning_rate": 0.00011804383806754641,
"loss": 2.1361,
"step": 2060
},
{
"epoch": 0.23,
"learning_rate": 0.00011792753299038246,
"loss": 2.1065,
"step": 2080
},
{
"epoch": 0.23,
"learning_rate": 0.0001178112279132185,
"loss": 2.1411,
"step": 2100
},
{
"epoch": 0.24,
"learning_rate": 0.00011769492283605456,
"loss": 2.0615,
"step": 2120
},
{
"epoch": 0.24,
"learning_rate": 0.00011757861775889062,
"loss": 2.163,
"step": 2140
},
{
"epoch": 0.24,
"learning_rate": 0.00011746231268172667,
"loss": 2.1092,
"step": 2160
},
{
"epoch": 0.24,
"learning_rate": 0.00011734600760456273,
"loss": 2.1337,
"step": 2180
},
{
"epoch": 0.25,
"learning_rate": 0.00011722970252739878,
"loss": 2.1282,
"step": 2200
},
{
"epoch": 0.25,
"learning_rate": 0.00011711339745023483,
"loss": 2.0993,
"step": 2220
},
{
"epoch": 0.25,
"learning_rate": 0.0001169970923730709,
"loss": 2.1268,
"step": 2240
},
{
"epoch": 0.25,
"learning_rate": 0.00011688078729590694,
"loss": 2.094,
"step": 2260
},
{
"epoch": 0.25,
"learning_rate": 0.00011676448221874301,
"loss": 2.1094,
"step": 2280
},
{
"epoch": 0.26,
"learning_rate": 0.00011664817714157906,
"loss": 2.0969,
"step": 2300
},
{
"epoch": 0.26,
"learning_rate": 0.0001165318720644151,
"loss": 2.1216,
"step": 2320
},
{
"epoch": 0.26,
"learning_rate": 0.00011641556698725117,
"loss": 2.1355,
"step": 2340
},
{
"epoch": 0.26,
"learning_rate": 0.00011629926191008722,
"loss": 2.117,
"step": 2360
},
{
"epoch": 0.27,
"learning_rate": 0.00011618295683292327,
"loss": 2.1401,
"step": 2380
},
{
"epoch": 0.27,
"learning_rate": 0.00011606665175575933,
"loss": 2.1409,
"step": 2400
},
{
"epoch": 0.27,
"learning_rate": 0.00011595034667859538,
"loss": 2.1422,
"step": 2420
},
{
"epoch": 0.27,
"learning_rate": 0.00011583404160143144,
"loss": 2.1285,
"step": 2440
},
{
"epoch": 0.28,
"learning_rate": 0.0001157177365242675,
"loss": 2.0899,
"step": 2460
},
{
"epoch": 0.28,
"learning_rate": 0.00011560143144710354,
"loss": 2.0972,
"step": 2480
},
{
"epoch": 0.28,
"learning_rate": 0.00011548512636993961,
"loss": 2.0817,
"step": 2500
},
{
"epoch": 0.28,
"learning_rate": 0.00011536882129277566,
"loss": 2.1663,
"step": 2520
},
{
"epoch": 0.28,
"learning_rate": 0.0001152525162156117,
"loss": 2.1295,
"step": 2540
},
{
"epoch": 0.29,
"learning_rate": 0.00011513621113844777,
"loss": 2.1089,
"step": 2560
},
{
"epoch": 0.29,
"learning_rate": 0.00011501990606128383,
"loss": 2.1504,
"step": 2580
},
{
"epoch": 0.29,
"learning_rate": 0.00011490360098411987,
"loss": 2.1026,
"step": 2600
},
{
"epoch": 0.29,
"learning_rate": 0.00011479311116081413,
"loss": 2.1709,
"step": 2620
},
{
"epoch": 0.3,
"learning_rate": 0.00011467680608365018,
"loss": 2.1135,
"step": 2640
},
{
"epoch": 0.3,
"learning_rate": 0.00011456050100648623,
"loss": 2.1048,
"step": 2660
},
{
"epoch": 0.3,
"learning_rate": 0.00011444419592932229,
"loss": 2.0994,
"step": 2680
},
{
"epoch": 0.3,
"learning_rate": 0.00011432789085215834,
"loss": 2.1139,
"step": 2700
},
{
"epoch": 0.3,
"learning_rate": 0.00011421158577499441,
"loss": 2.0648,
"step": 2720
},
{
"epoch": 0.31,
"learning_rate": 0.00011409528069783046,
"loss": 2.1184,
"step": 2740
},
{
"epoch": 0.31,
"learning_rate": 0.0001139789756206665,
"loss": 2.0812,
"step": 2760
},
{
"epoch": 0.31,
"learning_rate": 0.00011386848579736077,
"loss": 2.168,
"step": 2780
},
{
"epoch": 0.31,
"learning_rate": 0.00011375218072019681,
"loss": 2.0971,
"step": 2800
},
{
"epoch": 0.32,
"learning_rate": 0.00011363587564303286,
"loss": 2.0863,
"step": 2820
},
{
"epoch": 0.32,
"learning_rate": 0.00011351957056586893,
"loss": 2.1173,
"step": 2840
},
{
"epoch": 0.32,
"learning_rate": 0.00011340326548870498,
"loss": 2.0671,
"step": 2860
},
{
"epoch": 0.32,
"learning_rate": 0.00011328696041154103,
"loss": 2.1363,
"step": 2880
},
{
"epoch": 0.32,
"learning_rate": 0.00011317065533437709,
"loss": 2.1232,
"step": 2900
},
{
"epoch": 0.33,
"learning_rate": 0.00011305435025721314,
"loss": 2.108,
"step": 2920
},
{
"epoch": 0.33,
"learning_rate": 0.0001129380451800492,
"loss": 2.1008,
"step": 2940
},
{
"epoch": 0.33,
"learning_rate": 0.00011282174010288525,
"loss": 2.0993,
"step": 2960
},
{
"epoch": 0.33,
"learning_rate": 0.0001127054350257213,
"loss": 2.1125,
"step": 2980
},
{
"epoch": 0.34,
"learning_rate": 0.00011258912994855737,
"loss": 2.122,
"step": 3000
},
{
"epoch": 0.34,
"learning_rate": 0.00011247282487139342,
"loss": 2.1063,
"step": 3020
},
{
"epoch": 0.34,
"learning_rate": 0.00011235651979422946,
"loss": 2.0962,
"step": 3040
},
{
"epoch": 0.34,
"learning_rate": 0.00011224021471706552,
"loss": 2.1298,
"step": 3060
},
{
"epoch": 0.34,
"learning_rate": 0.00011212390963990158,
"loss": 2.1131,
"step": 3080
},
{
"epoch": 0.35,
"learning_rate": 0.00011200760456273763,
"loss": 2.1532,
"step": 3100
},
{
"epoch": 0.35,
"learning_rate": 0.00011189129948557369,
"loss": 2.0538,
"step": 3120
},
{
"epoch": 0.35,
"learning_rate": 0.00011177499440840975,
"loss": 2.1184,
"step": 3140
},
{
"epoch": 0.35,
"learning_rate": 0.0001116586893312458,
"loss": 2.0519,
"step": 3160
},
{
"epoch": 0.36,
"learning_rate": 0.00011154238425408185,
"loss": 2.1242,
"step": 3180
},
{
"epoch": 0.36,
"learning_rate": 0.0001114260791769179,
"loss": 2.1331,
"step": 3200
},
{
"epoch": 0.36,
"learning_rate": 0.00011130977409975397,
"loss": 2.108,
"step": 3220
},
{
"epoch": 0.36,
"learning_rate": 0.00011119346902259002,
"loss": 2.1189,
"step": 3240
},
{
"epoch": 0.36,
"learning_rate": 0.00011107716394542606,
"loss": 2.0963,
"step": 3260
},
{
"epoch": 0.37,
"learning_rate": 0.00011096667412212033,
"loss": 2.1936,
"step": 3280
},
{
"epoch": 0.37,
"learning_rate": 0.00011085036904495638,
"loss": 2.1254,
"step": 3300
},
{
"epoch": 0.37,
"learning_rate": 0.00011073406396779242,
"loss": 2.1475,
"step": 3320
},
{
"epoch": 0.37,
"learning_rate": 0.00011061775889062848,
"loss": 2.1016,
"step": 3340
},
{
"epoch": 0.38,
"learning_rate": 0.00011050145381346454,
"loss": 2.131,
"step": 3360
},
{
"epoch": 0.38,
"learning_rate": 0.0001103851487363006,
"loss": 2.1093,
"step": 3380
},
{
"epoch": 0.38,
"learning_rate": 0.00011026884365913665,
"loss": 2.13,
"step": 3400
},
{
"epoch": 0.38,
"learning_rate": 0.00011015253858197269,
"loss": 2.1331,
"step": 3420
},
{
"epoch": 0.38,
"learning_rate": 0.00011003623350480877,
"loss": 2.1238,
"step": 3440
},
{
"epoch": 0.39,
"learning_rate": 0.00010991992842764481,
"loss": 2.1285,
"step": 3460
},
{
"epoch": 0.39,
"learning_rate": 0.00010980362335048086,
"loss": 2.0729,
"step": 3480
},
{
"epoch": 0.39,
"learning_rate": 0.00010968731827331693,
"loss": 2.1105,
"step": 3500
},
{
"epoch": 0.39,
"learning_rate": 0.00010957101319615298,
"loss": 2.0969,
"step": 3520
},
{
"epoch": 0.4,
"learning_rate": 0.00010945470811898902,
"loss": 2.1348,
"step": 3540
},
{
"epoch": 0.4,
"learning_rate": 0.00010933840304182508,
"loss": 2.0645,
"step": 3560
},
{
"epoch": 0.4,
"learning_rate": 0.00010922209796466114,
"loss": 2.0753,
"step": 3580
},
{
"epoch": 0.4,
"learning_rate": 0.0001091057928874972,
"loss": 2.1041,
"step": 3600
},
{
"epoch": 0.4,
"learning_rate": 0.00010898948781033325,
"loss": 2.0569,
"step": 3620
},
{
"epoch": 0.41,
"learning_rate": 0.0001088731827331693,
"loss": 2.0984,
"step": 3640
},
{
"epoch": 0.41,
"learning_rate": 0.00010875687765600537,
"loss": 2.1037,
"step": 3660
},
{
"epoch": 0.41,
"learning_rate": 0.00010864057257884141,
"loss": 2.0907,
"step": 3680
},
{
"epoch": 0.41,
"learning_rate": 0.00010852426750167746,
"loss": 2.1189,
"step": 3700
},
{
"epoch": 0.42,
"learning_rate": 0.00010840796242451353,
"loss": 2.0922,
"step": 3720
},
{
"epoch": 0.42,
"learning_rate": 0.00010829165734734958,
"loss": 2.0986,
"step": 3740
},
{
"epoch": 0.42,
"learning_rate": 0.00010817535227018562,
"loss": 2.1069,
"step": 3760
},
{
"epoch": 0.42,
"learning_rate": 0.00010805904719302168,
"loss": 2.1013,
"step": 3780
},
{
"epoch": 0.42,
"learning_rate": 0.00010794274211585774,
"loss": 2.1111,
"step": 3800
},
{
"epoch": 0.43,
"learning_rate": 0.0001078264370386938,
"loss": 2.1105,
"step": 3820
},
{
"epoch": 0.43,
"learning_rate": 0.00010771013196152985,
"loss": 2.0881,
"step": 3840
},
{
"epoch": 0.43,
"learning_rate": 0.0001075938268843659,
"loss": 2.102,
"step": 3860
},
{
"epoch": 0.43,
"learning_rate": 0.00010747752180720197,
"loss": 2.1201,
"step": 3880
},
{
"epoch": 0.44,
"learning_rate": 0.00010736121673003801,
"loss": 2.128,
"step": 3900
},
{
"epoch": 0.44,
"learning_rate": 0.00010724491165287406,
"loss": 2.0483,
"step": 3920
},
{
"epoch": 0.44,
"learning_rate": 0.00010712860657571013,
"loss": 2.1274,
"step": 3940
},
{
"epoch": 0.44,
"learning_rate": 0.00010701230149854618,
"loss": 2.1372,
"step": 3960
},
{
"epoch": 0.45,
"learning_rate": 0.00010689599642138223,
"loss": 2.0902,
"step": 3980
},
{
"epoch": 0.45,
"learning_rate": 0.00010677969134421828,
"loss": 2.1103,
"step": 4000
},
{
"epoch": 0.45,
"learning_rate": 0.00010666338626705434,
"loss": 2.102,
"step": 4020
},
{
"epoch": 0.45,
"learning_rate": 0.0001065470811898904,
"loss": 2.118,
"step": 4040
},
{
"epoch": 0.45,
"learning_rate": 0.00010643077611272645,
"loss": 2.0548,
"step": 4060
},
{
"epoch": 0.46,
"learning_rate": 0.0001063144710355625,
"loss": 2.0805,
"step": 4080
},
{
"epoch": 0.46,
"learning_rate": 0.00010619816595839857,
"loss": 2.073,
"step": 4100
},
{
"epoch": 0.46,
"learning_rate": 0.00010608186088123462,
"loss": 2.0895,
"step": 4120
},
{
"epoch": 0.46,
"learning_rate": 0.00010596555580407066,
"loss": 2.0976,
"step": 4140
},
{
"epoch": 0.47,
"learning_rate": 0.00010584925072690672,
"loss": 2.1084,
"step": 4160
},
{
"epoch": 0.47,
"learning_rate": 0.00010573294564974278,
"loss": 2.0852,
"step": 4180
},
{
"epoch": 0.47,
"learning_rate": 0.00010561664057257883,
"loss": 2.0955,
"step": 4200
},
{
"epoch": 0.47,
"learning_rate": 0.00010550033549541489,
"loss": 2.1188,
"step": 4220
},
{
"epoch": 0.47,
"learning_rate": 0.00010538403041825093,
"loss": 2.1081,
"step": 4240
},
{
"epoch": 0.48,
"learning_rate": 0.000105267725341087,
"loss": 2.0932,
"step": 4260
},
{
"epoch": 0.48,
"learning_rate": 0.00010515142026392305,
"loss": 2.0762,
"step": 4280
},
{
"epoch": 0.48,
"learning_rate": 0.0001050351151867591,
"loss": 2.1456,
"step": 4300
},
{
"epoch": 0.48,
"learning_rate": 0.00010491881010959517,
"loss": 2.0773,
"step": 4320
},
{
"epoch": 0.49,
"learning_rate": 0.00010480250503243122,
"loss": 2.1198,
"step": 4340
},
{
"epoch": 0.49,
"learning_rate": 0.00010468619995526726,
"loss": 2.0874,
"step": 4360
},
{
"epoch": 0.49,
"learning_rate": 0.00010456989487810332,
"loss": 2.1002,
"step": 4380
},
{
"epoch": 0.49,
"learning_rate": 0.00010445358980093938,
"loss": 2.1528,
"step": 4400
},
{
"epoch": 0.49,
"learning_rate": 0.00010433728472377543,
"loss": 2.1448,
"step": 4420
},
{
"epoch": 0.5,
"learning_rate": 0.00010422097964661149,
"loss": 2.1091,
"step": 4440
},
{
"epoch": 0.5,
"learning_rate": 0.00010410467456944753,
"loss": 2.098,
"step": 4460
},
{
"epoch": 0.5,
"learning_rate": 0.0001039883694922836,
"loss": 2.0977,
"step": 4480
},
{
"epoch": 0.5,
"learning_rate": 0.00010387206441511965,
"loss": 2.0686,
"step": 4500
},
{
"epoch": 0.51,
"learning_rate": 0.0001037557593379557,
"loss": 2.0873,
"step": 4520
},
{
"epoch": 0.51,
"learning_rate": 0.00010363945426079177,
"loss": 2.0965,
"step": 4540
},
{
"epoch": 0.51,
"learning_rate": 0.00010352314918362782,
"loss": 2.1141,
"step": 4560
},
{
"epoch": 0.51,
"learning_rate": 0.00010340684410646386,
"loss": 2.1144,
"step": 4580
},
{
"epoch": 0.51,
"learning_rate": 0.00010329053902929992,
"loss": 2.0991,
"step": 4600
},
{
"epoch": 0.52,
"learning_rate": 0.00010317423395213598,
"loss": 2.1212,
"step": 4620
},
{
"epoch": 0.52,
"learning_rate": 0.00010305792887497203,
"loss": 2.065,
"step": 4640
},
{
"epoch": 0.52,
"learning_rate": 0.00010294162379780809,
"loss": 2.1417,
"step": 4660
},
{
"epoch": 0.52,
"learning_rate": 0.00010282531872064413,
"loss": 2.0534,
"step": 4680
},
{
"epoch": 0.53,
"learning_rate": 0.00010270901364348021,
"loss": 2.0935,
"step": 4700
},
{
"epoch": 0.53,
"learning_rate": 0.00010259270856631625,
"loss": 2.0936,
"step": 4720
},
{
"epoch": 0.53,
"learning_rate": 0.0001024764034891523,
"loss": 2.1046,
"step": 4740
},
{
"epoch": 0.53,
"learning_rate": 0.00010236009841198837,
"loss": 2.1202,
"step": 4760
},
{
"epoch": 0.53,
"learning_rate": 0.00010224379333482442,
"loss": 2.1168,
"step": 4780
},
{
"epoch": 0.54,
"learning_rate": 0.00010212748825766046,
"loss": 2.1452,
"step": 4800
},
{
"epoch": 0.54,
"learning_rate": 0.00010201118318049652,
"loss": 2.083,
"step": 4820
},
{
"epoch": 0.54,
"learning_rate": 0.00010189487810333258,
"loss": 2.0586,
"step": 4840
},
{
"epoch": 0.54,
"learning_rate": 0.00010177857302616863,
"loss": 2.0866,
"step": 4860
},
{
"epoch": 0.55,
"learning_rate": 0.00010166226794900469,
"loss": 2.1073,
"step": 4880
},
{
"epoch": 0.55,
"learning_rate": 0.00010154596287184074,
"loss": 2.085,
"step": 4900
},
{
"epoch": 0.55,
"learning_rate": 0.00010142965779467681,
"loss": 2.1288,
"step": 4920
},
{
"epoch": 0.55,
"learning_rate": 0.00010131335271751285,
"loss": 2.095,
"step": 4940
},
{
"epoch": 0.55,
"learning_rate": 0.0001011970476403489,
"loss": 2.0773,
"step": 4960
},
{
"epoch": 0.56,
"learning_rate": 0.00010108074256318496,
"loss": 2.0692,
"step": 4980
},
{
"epoch": 0.56,
"learning_rate": 0.00010096443748602102,
"loss": 2.1083,
"step": 5000
},
{
"epoch": 0.56,
"learning_rate": 0.00010084813240885707,
"loss": 2.0798,
"step": 5020
},
{
"epoch": 0.56,
"learning_rate": 0.00010073182733169313,
"loss": 2.1033,
"step": 5040
},
{
"epoch": 0.57,
"learning_rate": 0.00010061552225452917,
"loss": 2.0525,
"step": 5060
},
{
"epoch": 0.57,
"learning_rate": 0.00010049921717736523,
"loss": 2.0929,
"step": 5080
},
{
"epoch": 0.57,
"learning_rate": 0.00010038291210020129,
"loss": 2.0907,
"step": 5100
},
{
"epoch": 0.57,
"learning_rate": 0.00010026660702303734,
"loss": 2.1307,
"step": 5120
},
{
"epoch": 0.57,
"learning_rate": 0.00010015030194587341,
"loss": 2.0979,
"step": 5140
},
{
"epoch": 0.58,
"learning_rate": 0.00010003399686870946,
"loss": 2.1352,
"step": 5160
},
{
"epoch": 0.58,
"learning_rate": 9.99176917915455e-05,
"loss": 2.1116,
"step": 5180
},
{
"epoch": 0.58,
"learning_rate": 9.980138671438156e-05,
"loss": 2.0916,
"step": 5200
},
{
"epoch": 0.58,
"learning_rate": 9.968508163721762e-05,
"loss": 2.1329,
"step": 5220
},
{
"epoch": 0.59,
"learning_rate": 9.956877656005367e-05,
"loss": 2.105,
"step": 5240
},
{
"epoch": 0.59,
"learning_rate": 9.945247148288973e-05,
"loss": 2.0596,
"step": 5260
},
{
"epoch": 0.59,
"learning_rate": 9.933616640572577e-05,
"loss": 2.1042,
"step": 5280
},
{
"epoch": 0.59,
"learning_rate": 9.921986132856183e-05,
"loss": 2.0807,
"step": 5300
},
{
"epoch": 0.59,
"learning_rate": 9.910355625139789e-05,
"loss": 2.1058,
"step": 5320
},
{
"epoch": 0.6,
"learning_rate": 9.898725117423394e-05,
"loss": 2.0702,
"step": 5340
},
{
"epoch": 0.6,
"learning_rate": 9.887094609707001e-05,
"loss": 2.0739,
"step": 5360
},
{
"epoch": 0.6,
"learning_rate": 9.875464101990606e-05,
"loss": 2.0708,
"step": 5380
},
{
"epoch": 0.6,
"learning_rate": 9.86383359427421e-05,
"loss": 2.0775,
"step": 5400
},
{
"epoch": 0.61,
"learning_rate": 9.852203086557816e-05,
"loss": 2.0901,
"step": 5420
},
{
"epoch": 0.61,
"learning_rate": 9.840572578841422e-05,
"loss": 2.1068,
"step": 5440
},
{
"epoch": 0.61,
"learning_rate": 9.828942071125027e-05,
"loss": 2.0987,
"step": 5460
},
{
"epoch": 0.61,
"learning_rate": 9.817311563408633e-05,
"loss": 2.106,
"step": 5480
},
{
"epoch": 0.62,
"learning_rate": 9.805681055692237e-05,
"loss": 2.0821,
"step": 5500
},
{
"epoch": 0.62,
"learning_rate": 9.794050547975843e-05,
"loss": 2.1149,
"step": 5520
},
{
"epoch": 0.62,
"learning_rate": 9.782420040259449e-05,
"loss": 2.1055,
"step": 5540
},
{
"epoch": 0.62,
"learning_rate": 9.770789532543054e-05,
"loss": 2.0827,
"step": 5560
},
{
"epoch": 0.62,
"learning_rate": 9.759159024826661e-05,
"loss": 2.122,
"step": 5580
},
{
"epoch": 0.63,
"learning_rate": 9.747528517110266e-05,
"loss": 2.1003,
"step": 5600
},
{
"epoch": 0.63,
"learning_rate": 9.73589800939387e-05,
"loss": 2.0936,
"step": 5620
},
{
"epoch": 0.63,
"learning_rate": 9.724267501677476e-05,
"loss": 2.1016,
"step": 5640
},
{
"epoch": 0.63,
"learning_rate": 9.712636993961082e-05,
"loss": 2.0689,
"step": 5660
},
{
"epoch": 0.64,
"learning_rate": 9.701006486244687e-05,
"loss": 2.0836,
"step": 5680
},
{
"epoch": 0.64,
"learning_rate": 9.689375978528293e-05,
"loss": 2.0938,
"step": 5700
},
{
"epoch": 0.64,
"learning_rate": 9.678326996197718e-05,
"loss": 2.0761,
"step": 5720
},
{
"epoch": 0.64,
"learning_rate": 9.666696488481323e-05,
"loss": 2.1049,
"step": 5740
},
{
"epoch": 0.64,
"learning_rate": 9.655065980764929e-05,
"loss": 2.0922,
"step": 5760
},
{
"epoch": 0.65,
"learning_rate": 9.643435473048533e-05,
"loss": 2.1141,
"step": 5780
},
{
"epoch": 0.65,
"learning_rate": 9.631804965332141e-05,
"loss": 2.15,
"step": 5800
},
{
"epoch": 0.65,
"learning_rate": 9.620174457615745e-05,
"loss": 2.1099,
"step": 5820
},
{
"epoch": 0.65,
"learning_rate": 9.60854394989935e-05,
"loss": 2.0849,
"step": 5840
},
{
"epoch": 0.66,
"learning_rate": 9.596913442182956e-05,
"loss": 2.1233,
"step": 5860
},
{
"epoch": 0.66,
"learning_rate": 9.585282934466562e-05,
"loss": 2.0993,
"step": 5880
},
{
"epoch": 0.66,
"learning_rate": 9.573652426750166e-05,
"loss": 2.064,
"step": 5900
},
{
"epoch": 0.66,
"learning_rate": 9.562021919033772e-05,
"loss": 2.0972,
"step": 5920
},
{
"epoch": 0.66,
"learning_rate": 9.550391411317378e-05,
"loss": 2.0913,
"step": 5940
},
{
"epoch": 0.67,
"learning_rate": 9.538760903600983e-05,
"loss": 2.1062,
"step": 5960
},
{
"epoch": 0.67,
"learning_rate": 9.527130395884589e-05,
"loss": 2.0634,
"step": 5980
},
{
"epoch": 0.67,
"learning_rate": 9.515499888168194e-05,
"loss": 2.0826,
"step": 6000
},
{
"epoch": 0.67,
"learning_rate": 9.503869380451801e-05,
"loss": 2.0542,
"step": 6020
},
{
"epoch": 0.68,
"learning_rate": 9.492238872735405e-05,
"loss": 2.1298,
"step": 6040
},
{
"epoch": 0.68,
"learning_rate": 9.48060836501901e-05,
"loss": 2.0832,
"step": 6060
},
{
"epoch": 0.68,
"learning_rate": 9.468977857302616e-05,
"loss": 2.0809,
"step": 6080
},
{
"epoch": 0.68,
"learning_rate": 9.457347349586222e-05,
"loss": 2.0953,
"step": 6100
},
{
"epoch": 0.68,
"learning_rate": 9.445716841869827e-05,
"loss": 2.1176,
"step": 6120
},
{
"epoch": 0.69,
"learning_rate": 9.434086334153433e-05,
"loss": 2.1116,
"step": 6140
},
{
"epoch": 0.69,
"learning_rate": 9.422455826437037e-05,
"loss": 2.0793,
"step": 6160
},
{
"epoch": 0.69,
"learning_rate": 9.410825318720643e-05,
"loss": 2.1492,
"step": 6180
},
{
"epoch": 0.69,
"learning_rate": 9.399194811004249e-05,
"loss": 2.1025,
"step": 6200
},
{
"epoch": 0.7,
"learning_rate": 9.387564303287854e-05,
"loss": 2.1206,
"step": 6220
},
{
"epoch": 0.7,
"learning_rate": 9.375933795571461e-05,
"loss": 2.0791,
"step": 6240
},
{
"epoch": 0.7,
"learning_rate": 9.364303287855066e-05,
"loss": 2.1058,
"step": 6260
},
{
"epoch": 0.7,
"learning_rate": 9.35267278013867e-05,
"loss": 2.0744,
"step": 6280
},
{
"epoch": 0.7,
"learning_rate": 9.341042272422276e-05,
"loss": 2.1379,
"step": 6300
},
{
"epoch": 0.71,
"learning_rate": 9.329411764705882e-05,
"loss": 2.0771,
"step": 6320
},
{
"epoch": 0.71,
"learning_rate": 9.317781256989487e-05,
"loss": 2.0726,
"step": 6340
},
{
"epoch": 0.71,
"learning_rate": 9.306150749273093e-05,
"loss": 2.0929,
"step": 6360
},
{
"epoch": 0.71,
"learning_rate": 9.294520241556697e-05,
"loss": 2.0805,
"step": 6380
},
{
"epoch": 0.72,
"learning_rate": 9.282889733840303e-05,
"loss": 2.0669,
"step": 6400
},
{
"epoch": 0.72,
"learning_rate": 9.271259226123909e-05,
"loss": 2.0981,
"step": 6420
},
{
"epoch": 0.72,
"learning_rate": 9.259628718407514e-05,
"loss": 2.0965,
"step": 6440
},
{
"epoch": 0.72,
"learning_rate": 9.247998210691121e-05,
"loss": 2.0624,
"step": 6460
},
{
"epoch": 0.72,
"learning_rate": 9.236367702974726e-05,
"loss": 2.0785,
"step": 6480
},
{
"epoch": 0.73,
"learning_rate": 9.22473719525833e-05,
"loss": 2.0725,
"step": 6500
},
{
"epoch": 0.73,
"learning_rate": 9.213106687541936e-05,
"loss": 2.1556,
"step": 6520
},
{
"epoch": 0.73,
"learning_rate": 9.201476179825542e-05,
"loss": 2.0998,
"step": 6540
},
{
"epoch": 0.73,
"learning_rate": 9.189845672109147e-05,
"loss": 2.0678,
"step": 6560
},
{
"epoch": 0.74,
"learning_rate": 9.178215164392753e-05,
"loss": 2.0746,
"step": 6580
},
{
"epoch": 0.74,
"learning_rate": 9.166584656676357e-05,
"loss": 2.062,
"step": 6600
},
{
"epoch": 0.74,
"learning_rate": 9.154954148959963e-05,
"loss": 2.0578,
"step": 6620
},
{
"epoch": 0.74,
"learning_rate": 9.143323641243569e-05,
"loss": 2.0694,
"step": 6640
},
{
"epoch": 0.74,
"learning_rate": 9.131693133527174e-05,
"loss": 2.1157,
"step": 6660
},
{
"epoch": 0.75,
"learning_rate": 9.120062625810778e-05,
"loss": 2.0948,
"step": 6680
},
{
"epoch": 0.75,
"learning_rate": 9.108432118094386e-05,
"loss": 2.1122,
"step": 6700
},
{
"epoch": 0.75,
"learning_rate": 9.097964661149629e-05,
"loss": 2.0849,
"step": 6720
},
{
"epoch": 0.75,
"learning_rate": 9.086334153433236e-05,
"loss": 2.1186,
"step": 6740
},
{
"epoch": 0.76,
"learning_rate": 9.074703645716841e-05,
"loss": 2.113,
"step": 6760
},
{
"epoch": 0.76,
"learning_rate": 9.063073138000446e-05,
"loss": 2.1269,
"step": 6780
},
{
"epoch": 0.76,
"learning_rate": 9.051442630284053e-05,
"loss": 2.0888,
"step": 6800
},
{
"epoch": 0.76,
"learning_rate": 9.039812122567658e-05,
"loss": 2.1094,
"step": 6820
},
{
"epoch": 0.76,
"learning_rate": 9.028181614851262e-05,
"loss": 2.1165,
"step": 6840
},
{
"epoch": 0.77,
"learning_rate": 9.016551107134868e-05,
"loss": 2.0779,
"step": 6860
},
{
"epoch": 0.77,
"learning_rate": 9.004920599418474e-05,
"loss": 2.1145,
"step": 6880
},
{
"epoch": 0.77,
"learning_rate": 8.99329009170208e-05,
"loss": 2.0862,
"step": 6900
},
{
"epoch": 0.77,
"learning_rate": 8.981659583985685e-05,
"loss": 2.079,
"step": 6920
},
{
"epoch": 0.78,
"learning_rate": 8.970029076269289e-05,
"loss": 2.0633,
"step": 6940
},
{
"epoch": 0.78,
"learning_rate": 8.958398568552897e-05,
"loss": 2.1121,
"step": 6960
},
{
"epoch": 0.78,
"learning_rate": 8.946768060836501e-05,
"loss": 2.0693,
"step": 6980
},
{
"epoch": 0.78,
"learning_rate": 8.935137553120106e-05,
"loss": 2.1111,
"step": 7000
},
{
"epoch": 0.79,
"learning_rate": 8.923507045403713e-05,
"loss": 2.0946,
"step": 7020
},
{
"epoch": 0.79,
"learning_rate": 8.911876537687318e-05,
"loss": 2.0797,
"step": 7040
},
{
"epoch": 0.79,
"learning_rate": 8.900246029970922e-05,
"loss": 2.0911,
"step": 7060
},
{
"epoch": 0.79,
"learning_rate": 8.888615522254528e-05,
"loss": 2.1028,
"step": 7080
},
{
"epoch": 0.79,
"learning_rate": 8.876985014538134e-05,
"loss": 2.0305,
"step": 7100
},
{
"epoch": 0.8,
"learning_rate": 8.86535450682174e-05,
"loss": 2.1243,
"step": 7120
},
{
"epoch": 0.8,
"learning_rate": 8.853723999105345e-05,
"loss": 2.0735,
"step": 7140
},
{
"epoch": 0.8,
"learning_rate": 8.84209349138895e-05,
"loss": 2.1153,
"step": 7160
},
{
"epoch": 0.8,
"learning_rate": 8.830462983672557e-05,
"loss": 2.123,
"step": 7180
},
{
"epoch": 0.81,
"learning_rate": 8.818832475956161e-05,
"loss": 2.1111,
"step": 7200
},
{
"epoch": 0.81,
"learning_rate": 8.807201968239766e-05,
"loss": 2.1151,
"step": 7220
},
{
"epoch": 0.81,
"learning_rate": 8.795571460523372e-05,
"loss": 2.0732,
"step": 7240
},
{
"epoch": 0.81,
"learning_rate": 8.783940952806978e-05,
"loss": 2.0776,
"step": 7260
},
{
"epoch": 0.81,
"learning_rate": 8.772310445090582e-05,
"loss": 2.1173,
"step": 7280
},
{
"epoch": 0.82,
"learning_rate": 8.760679937374188e-05,
"loss": 2.109,
"step": 7300
},
{
"epoch": 0.82,
"learning_rate": 8.749049429657794e-05,
"loss": 2.0947,
"step": 7320
},
{
"epoch": 0.82,
"learning_rate": 8.7374189219414e-05,
"loss": 2.1035,
"step": 7340
},
{
"epoch": 0.82,
"learning_rate": 8.725788414225005e-05,
"loss": 2.0857,
"step": 7360
},
{
"epoch": 0.83,
"learning_rate": 8.71473943189443e-05,
"loss": 2.1273,
"step": 7380
},
{
"epoch": 0.83,
"learning_rate": 8.703108924178036e-05,
"loss": 2.083,
"step": 7400
},
{
"epoch": 0.83,
"learning_rate": 8.691478416461641e-05,
"loss": 2.0879,
"step": 7420
},
{
"epoch": 0.83,
"learning_rate": 8.679847908745245e-05,
"loss": 2.0691,
"step": 7440
},
{
"epoch": 0.83,
"learning_rate": 8.668217401028853e-05,
"loss": 2.0915,
"step": 7460
},
{
"epoch": 0.84,
"learning_rate": 8.656586893312457e-05,
"loss": 2.0725,
"step": 7480
},
{
"epoch": 0.84,
"learning_rate": 8.644956385596062e-05,
"loss": 2.0862,
"step": 7500
},
{
"epoch": 0.84,
"learning_rate": 8.633325877879668e-05,
"loss": 2.0728,
"step": 7520
},
{
"epoch": 0.84,
"learning_rate": 8.621695370163274e-05,
"loss": 2.0855,
"step": 7540
},
{
"epoch": 0.85,
"learning_rate": 8.61006486244688e-05,
"loss": 2.0736,
"step": 7560
},
{
"epoch": 0.85,
"learning_rate": 8.598434354730484e-05,
"loss": 2.0767,
"step": 7580
},
{
"epoch": 0.85,
"learning_rate": 8.586803847014089e-05,
"loss": 2.0838,
"step": 7600
},
{
"epoch": 0.85,
"learning_rate": 8.575173339297696e-05,
"loss": 2.108,
"step": 7620
},
{
"epoch": 0.85,
"learning_rate": 8.563542831581301e-05,
"loss": 2.0527,
"step": 7640
},
{
"epoch": 0.86,
"learning_rate": 8.551912323864905e-05,
"loss": 2.0648,
"step": 7660
},
{
"epoch": 0.86,
"learning_rate": 8.540281816148513e-05,
"loss": 2.0727,
"step": 7680
},
{
"epoch": 0.86,
"learning_rate": 8.528651308432117e-05,
"loss": 2.067,
"step": 7700
},
{
"epoch": 0.86,
"learning_rate": 8.517020800715722e-05,
"loss": 2.1203,
"step": 7720
},
{
"epoch": 0.87,
"learning_rate": 8.505390292999328e-05,
"loss": 2.0996,
"step": 7740
},
{
"epoch": 0.87,
"learning_rate": 8.493759785282934e-05,
"loss": 2.0957,
"step": 7760
},
{
"epoch": 0.87,
"learning_rate": 8.48212927756654e-05,
"loss": 2.0995,
"step": 7780
},
{
"epoch": 0.87,
"learning_rate": 8.470498769850144e-05,
"loss": 2.1292,
"step": 7800
},
{
"epoch": 0.87,
"learning_rate": 8.458868262133749e-05,
"loss": 2.0849,
"step": 7820
},
{
"epoch": 0.88,
"learning_rate": 8.447237754417356e-05,
"loss": 2.0608,
"step": 7840
},
{
"epoch": 0.88,
"learning_rate": 8.435607246700961e-05,
"loss": 2.0869,
"step": 7860
},
{
"epoch": 0.88,
"learning_rate": 8.423976738984566e-05,
"loss": 2.1041,
"step": 7880
},
{
"epoch": 0.88,
"learning_rate": 8.412346231268173e-05,
"loss": 2.1176,
"step": 7900
},
{
"epoch": 0.89,
"learning_rate": 8.400715723551778e-05,
"loss": 2.0637,
"step": 7920
},
{
"epoch": 0.89,
"learning_rate": 8.389085215835382e-05,
"loss": 2.0899,
"step": 7940
},
{
"epoch": 0.89,
"learning_rate": 8.377454708118988e-05,
"loss": 2.1238,
"step": 7960
},
{
"epoch": 0.89,
"learning_rate": 8.365824200402594e-05,
"loss": 2.074,
"step": 7980
},
{
"epoch": 0.89,
"learning_rate": 8.3541936926862e-05,
"loss": 2.0879,
"step": 8000
},
{
"epoch": 0.9,
"learning_rate": 8.342563184969805e-05,
"loss": 2.054,
"step": 8020
},
{
"epoch": 0.9,
"learning_rate": 8.330932677253409e-05,
"loss": 2.099,
"step": 8040
},
{
"epoch": 0.9,
"learning_rate": 8.319302169537017e-05,
"loss": 2.0973,
"step": 8060
},
{
"epoch": 0.9,
"learning_rate": 8.307671661820621e-05,
"loss": 2.0318,
"step": 8080
},
{
"epoch": 0.91,
"learning_rate": 8.296041154104226e-05,
"loss": 2.0945,
"step": 8100
},
{
"epoch": 0.91,
"learning_rate": 8.284410646387832e-05,
"loss": 2.0752,
"step": 8120
},
{
"epoch": 0.91,
"learning_rate": 8.272780138671438e-05,
"loss": 2.0991,
"step": 8140
},
{
"epoch": 0.91,
"learning_rate": 8.261149630955042e-05,
"loss": 2.0894,
"step": 8160
},
{
"epoch": 0.91,
"learning_rate": 8.249519123238648e-05,
"loss": 2.1152,
"step": 8180
},
{
"epoch": 0.92,
"learning_rate": 8.237888615522254e-05,
"loss": 2.0758,
"step": 8200
},
{
"epoch": 0.92,
"learning_rate": 8.22625810780586e-05,
"loss": 2.0666,
"step": 8220
},
{
"epoch": 0.92,
"learning_rate": 8.214627600089465e-05,
"loss": 2.1372,
"step": 8240
},
{
"epoch": 0.92,
"learning_rate": 8.20299709237307e-05,
"loss": 2.0993,
"step": 8260
},
{
"epoch": 0.93,
"learning_rate": 8.191366584656677e-05,
"loss": 2.0746,
"step": 8280
},
{
"epoch": 0.93,
"learning_rate": 8.179736076940281e-05,
"loss": 2.0846,
"step": 8300
},
{
"epoch": 0.93,
"learning_rate": 8.168105569223886e-05,
"loss": 2.1056,
"step": 8320
},
{
"epoch": 0.93,
"learning_rate": 8.156475061507492e-05,
"loss": 2.0845,
"step": 8340
},
{
"epoch": 0.93,
"learning_rate": 8.144844553791098e-05,
"loss": 2.0659,
"step": 8360
},
{
"epoch": 0.94,
"learning_rate": 8.133214046074702e-05,
"loss": 2.1159,
"step": 8380
},
{
"epoch": 0.94,
"learning_rate": 8.121583538358308e-05,
"loss": 2.0651,
"step": 8400
},
{
"epoch": 0.94,
"learning_rate": 8.109953030641913e-05,
"loss": 2.0937,
"step": 8420
},
{
"epoch": 0.94,
"learning_rate": 8.09832252292552e-05,
"loss": 2.0918,
"step": 8440
},
{
"epoch": 0.95,
"learning_rate": 8.086692015209125e-05,
"loss": 2.0971,
"step": 8460
},
{
"epoch": 0.95,
"learning_rate": 8.07506150749273e-05,
"loss": 2.1071,
"step": 8480
},
{
"epoch": 0.95,
"learning_rate": 8.063430999776337e-05,
"loss": 2.0947,
"step": 8500
},
{
"epoch": 0.95,
"learning_rate": 8.051800492059941e-05,
"loss": 2.1176,
"step": 8520
},
{
"epoch": 0.96,
"learning_rate": 8.040169984343546e-05,
"loss": 2.0751,
"step": 8540
},
{
"epoch": 0.96,
"learning_rate": 8.028539476627152e-05,
"loss": 2.1119,
"step": 8560
},
{
"epoch": 0.96,
"learning_rate": 8.016908968910758e-05,
"loss": 2.0632,
"step": 8580
},
{
"epoch": 0.96,
"learning_rate": 8.005278461194362e-05,
"loss": 2.0783,
"step": 8600
},
{
"epoch": 0.96,
"learning_rate": 7.993647953477968e-05,
"loss": 2.0886,
"step": 8620
},
{
"epoch": 0.97,
"learning_rate": 7.982017445761573e-05,
"loss": 2.0987,
"step": 8640
},
{
"epoch": 0.97,
"learning_rate": 7.97038693804518e-05,
"loss": 2.1099,
"step": 8660
},
{
"epoch": 0.97,
"learning_rate": 7.958756430328785e-05,
"loss": 2.1042,
"step": 8680
},
{
"epoch": 0.97,
"learning_rate": 7.94712592261239e-05,
"loss": 2.0701,
"step": 8700
},
{
"epoch": 0.98,
"learning_rate": 7.935495414895997e-05,
"loss": 2.095,
"step": 8720
},
{
"epoch": 0.98,
"learning_rate": 7.923864907179601e-05,
"loss": 2.082,
"step": 8740
},
{
"epoch": 0.98,
"learning_rate": 7.912234399463206e-05,
"loss": 2.0991,
"step": 8760
},
{
"epoch": 0.98,
"learning_rate": 7.901185417132633e-05,
"loss": 2.1346,
"step": 8780
},
{
"epoch": 0.98,
"learning_rate": 7.889554909416237e-05,
"loss": 2.0537,
"step": 8800
},
{
"epoch": 0.99,
"learning_rate": 7.877924401699842e-05,
"loss": 2.0679,
"step": 8820
},
{
"epoch": 0.99,
"learning_rate": 7.866293893983448e-05,
"loss": 2.0561,
"step": 8840
},
{
"epoch": 0.99,
"learning_rate": 7.854663386267054e-05,
"loss": 2.0824,
"step": 8860
},
{
"epoch": 0.99,
"learning_rate": 7.84303287855066e-05,
"loss": 2.0671,
"step": 8880
},
{
"epoch": 1.0,
"learning_rate": 7.831402370834264e-05,
"loss": 2.0845,
"step": 8900
},
{
"epoch": 1.0,
"learning_rate": 7.819771863117869e-05,
"loss": 2.0639,
"step": 8920
},
{
"epoch": 1.0,
"learning_rate": 7.808141355401476e-05,
"loss": 2.0743,
"step": 8940
},
{
"epoch": 1.0,
"learning_rate": 7.796510847685081e-05,
"loss": 2.073,
"step": 8960
},
{
"epoch": 1.0,
"learning_rate": 7.784880339968686e-05,
"loss": 2.0485,
"step": 8980
},
{
"epoch": 1.01,
"learning_rate": 7.773249832252293e-05,
"loss": 2.0882,
"step": 9000
},
{
"epoch": 1.01,
"learning_rate": 7.761619324535897e-05,
"loss": 2.0788,
"step": 9020
},
{
"epoch": 1.01,
"learning_rate": 7.749988816819502e-05,
"loss": 2.1103,
"step": 9040
},
{
"epoch": 1.01,
"learning_rate": 7.738358309103108e-05,
"loss": 2.0259,
"step": 9060
},
{
"epoch": 1.02,
"learning_rate": 7.726727801386714e-05,
"loss": 2.0938,
"step": 9080
},
{
"epoch": 1.02,
"learning_rate": 7.71509729367032e-05,
"loss": 2.0847,
"step": 9100
},
{
"epoch": 1.02,
"learning_rate": 7.703466785953925e-05,
"loss": 2.071,
"step": 9120
},
{
"epoch": 1.02,
"learning_rate": 7.691836278237529e-05,
"loss": 2.0918,
"step": 9140
},
{
"epoch": 1.02,
"learning_rate": 7.680205770521136e-05,
"loss": 2.0931,
"step": 9160
},
{
"epoch": 1.03,
"learning_rate": 7.668575262804741e-05,
"loss": 2.0808,
"step": 9180
},
{
"epoch": 1.03,
"learning_rate": 7.656944755088346e-05,
"loss": 2.0443,
"step": 9200
},
{
"epoch": 1.03,
"learning_rate": 7.645314247371952e-05,
"loss": 2.0689,
"step": 9220
},
{
"epoch": 1.03,
"learning_rate": 7.633683739655558e-05,
"loss": 2.0699,
"step": 9240
},
{
"epoch": 1.04,
"learning_rate": 7.622053231939162e-05,
"loss": 2.0754,
"step": 9260
},
{
"epoch": 1.04,
"learning_rate": 7.610422724222768e-05,
"loss": 2.0562,
"step": 9280
},
{
"epoch": 1.04,
"learning_rate": 7.598792216506374e-05,
"loss": 2.1127,
"step": 9300
},
{
"epoch": 1.04,
"learning_rate": 7.58716170878998e-05,
"loss": 2.0445,
"step": 9320
},
{
"epoch": 1.04,
"learning_rate": 7.575531201073585e-05,
"loss": 2.0969,
"step": 9340
},
{
"epoch": 1.05,
"learning_rate": 7.563900693357189e-05,
"loss": 2.0689,
"step": 9360
},
{
"epoch": 1.05,
"learning_rate": 7.552270185640797e-05,
"loss": 2.0759,
"step": 9380
},
{
"epoch": 1.05,
"learning_rate": 7.540639677924401e-05,
"loss": 2.0992,
"step": 9400
},
{
"epoch": 1.05,
"learning_rate": 7.529009170208006e-05,
"loss": 2.0964,
"step": 9420
},
{
"epoch": 1.06,
"learning_rate": 7.517378662491612e-05,
"loss": 2.124,
"step": 9440
},
{
"epoch": 1.06,
"learning_rate": 7.505748154775218e-05,
"loss": 2.0637,
"step": 9460
},
{
"epoch": 1.06,
"learning_rate": 7.494117647058822e-05,
"loss": 2.0321,
"step": 9480
},
{
"epoch": 1.06,
"learning_rate": 7.482487139342428e-05,
"loss": 2.0486,
"step": 9500
},
{
"epoch": 1.06,
"learning_rate": 7.470856631626033e-05,
"loss": 2.0797,
"step": 9520
},
{
"epoch": 1.07,
"learning_rate": 7.45922612390964e-05,
"loss": 2.0519,
"step": 9540
},
{
"epoch": 1.07,
"learning_rate": 7.447595616193245e-05,
"loss": 2.0909,
"step": 9560
},
{
"epoch": 1.07,
"learning_rate": 7.43596510847685e-05,
"loss": 2.1264,
"step": 9580
},
{
"epoch": 1.07,
"learning_rate": 7.424334600760457e-05,
"loss": 2.0636,
"step": 9600
},
{
"epoch": 1.08,
"learning_rate": 7.412704093044061e-05,
"loss": 2.0728,
"step": 9620
},
{
"epoch": 1.08,
"learning_rate": 7.401073585327666e-05,
"loss": 2.1126,
"step": 9640
},
{
"epoch": 1.08,
"learning_rate": 7.389443077611272e-05,
"loss": 2.0674,
"step": 9660
},
{
"epoch": 1.08,
"learning_rate": 7.377812569894878e-05,
"loss": 2.0758,
"step": 9680
},
{
"epoch": 1.08,
"learning_rate": 7.366182062178482e-05,
"loss": 2.1052,
"step": 9700
},
{
"epoch": 1.09,
"learning_rate": 7.354551554462088e-05,
"loss": 2.1103,
"step": 9720
},
{
"epoch": 1.09,
"learning_rate": 7.342921046745693e-05,
"loss": 2.0949,
"step": 9740
},
{
"epoch": 1.09,
"learning_rate": 7.3312905390293e-05,
"loss": 2.0553,
"step": 9760
},
{
"epoch": 1.09,
"learning_rate": 7.320823082084544e-05,
"loss": 2.1142,
"step": 9780
},
{
"epoch": 1.1,
"learning_rate": 7.30919257436815e-05,
"loss": 2.0894,
"step": 9800
},
{
"epoch": 1.1,
"learning_rate": 7.297562066651756e-05,
"loss": 2.0715,
"step": 9820
},
{
"epoch": 1.1,
"learning_rate": 7.28593155893536e-05,
"loss": 2.0851,
"step": 9840
},
{
"epoch": 1.1,
"learning_rate": 7.274301051218965e-05,
"loss": 2.0747,
"step": 9860
},
{
"epoch": 1.1,
"learning_rate": 7.262670543502572e-05,
"loss": 2.0983,
"step": 9880
},
{
"epoch": 1.11,
"learning_rate": 7.251040035786177e-05,
"loss": 2.0738,
"step": 9900
},
{
"epoch": 1.11,
"learning_rate": 7.239409528069781e-05,
"loss": 2.0499,
"step": 9920
},
{
"epoch": 1.11,
"learning_rate": 7.227779020353389e-05,
"loss": 2.0457,
"step": 9940
},
{
"epoch": 1.11,
"learning_rate": 7.216148512636993e-05,
"loss": 2.1105,
"step": 9960
},
{
"epoch": 1.12,
"learning_rate": 7.204518004920599e-05,
"loss": 2.0765,
"step": 9980
},
{
"epoch": 1.12,
"learning_rate": 7.192887497204204e-05,
"loss": 2.0242,
"step": 10000
},
{
"epoch": 1.12,
"learning_rate": 7.18125698948781e-05,
"loss": 2.0528,
"step": 10020
},
{
"epoch": 1.12,
"learning_rate": 7.169626481771416e-05,
"loss": 2.1105,
"step": 10040
},
{
"epoch": 1.13,
"learning_rate": 7.15799597405502e-05,
"loss": 2.0939,
"step": 10060
},
{
"epoch": 1.13,
"learning_rate": 7.146365466338625e-05,
"loss": 2.0825,
"step": 10080
},
{
"epoch": 1.13,
"learning_rate": 7.134734958622232e-05,
"loss": 2.0786,
"step": 10100
},
{
"epoch": 1.13,
"learning_rate": 7.123104450905837e-05,
"loss": 2.0824,
"step": 10120
},
{
"epoch": 1.13,
"learning_rate": 7.111473943189441e-05,
"loss": 2.0414,
"step": 10140
},
{
"epoch": 1.14,
"learning_rate": 7.099843435473049e-05,
"loss": 2.0486,
"step": 10160
},
{
"epoch": 1.14,
"learning_rate": 7.088212927756653e-05,
"loss": 2.0729,
"step": 10180
},
{
"epoch": 1.14,
"learning_rate": 7.076582420040259e-05,
"loss": 2.0891,
"step": 10200
},
{
"epoch": 1.14,
"learning_rate": 7.064951912323864e-05,
"loss": 2.0465,
"step": 10220
},
{
"epoch": 1.15,
"learning_rate": 7.05332140460747e-05,
"loss": 2.0474,
"step": 10240
},
{
"epoch": 1.15,
"learning_rate": 7.041690896891076e-05,
"loss": 2.1061,
"step": 10260
},
{
"epoch": 1.15,
"learning_rate": 7.03006038917468e-05,
"loss": 2.0628,
"step": 10280
},
{
"epoch": 1.15,
"learning_rate": 7.018429881458285e-05,
"loss": 2.1327,
"step": 10300
},
{
"epoch": 1.15,
"learning_rate": 7.006799373741892e-05,
"loss": 2.0707,
"step": 10320
},
{
"epoch": 1.16,
"learning_rate": 6.995168866025497e-05,
"loss": 2.0648,
"step": 10340
},
{
"epoch": 1.16,
"learning_rate": 6.983538358309102e-05,
"loss": 2.0567,
"step": 10360
},
{
"epoch": 1.16,
"learning_rate": 6.971907850592709e-05,
"loss": 2.0765,
"step": 10380
},
{
"epoch": 1.16,
"learning_rate": 6.960277342876313e-05,
"loss": 2.0661,
"step": 10400
},
{
"epoch": 1.17,
"learning_rate": 6.94864683515992e-05,
"loss": 2.1056,
"step": 10420
},
{
"epoch": 1.17,
"learning_rate": 6.937016327443524e-05,
"loss": 2.0758,
"step": 10440
},
{
"epoch": 1.17,
"learning_rate": 6.92538581972713e-05,
"loss": 2.0754,
"step": 10460
},
{
"epoch": 1.17,
"learning_rate": 6.913755312010736e-05,
"loss": 2.067,
"step": 10480
},
{
"epoch": 1.17,
"learning_rate": 6.90212480429434e-05,
"loss": 2.0442,
"step": 10500
},
{
"epoch": 1.18,
"learning_rate": 6.890494296577945e-05,
"loss": 2.1316,
"step": 10520
},
{
"epoch": 1.18,
"learning_rate": 6.878863788861552e-05,
"loss": 2.1042,
"step": 10540
},
{
"epoch": 1.18,
"learning_rate": 6.867233281145157e-05,
"loss": 2.0781,
"step": 10560
},
{
"epoch": 1.18,
"learning_rate": 6.855602773428762e-05,
"loss": 2.0834,
"step": 10580
},
{
"epoch": 1.19,
"learning_rate": 6.843972265712368e-05,
"loss": 2.0664,
"step": 10600
},
{
"epoch": 1.19,
"learning_rate": 6.832341757995974e-05,
"loss": 2.0874,
"step": 10620
},
{
"epoch": 1.19,
"learning_rate": 6.82071125027958e-05,
"loss": 2.068,
"step": 10640
},
{
"epoch": 1.19,
"learning_rate": 6.809080742563184e-05,
"loss": 2.1073,
"step": 10660
},
{
"epoch": 1.19,
"learning_rate": 6.797450234846789e-05,
"loss": 2.092,
"step": 10680
},
{
"epoch": 1.2,
"learning_rate": 6.785819727130396e-05,
"loss": 2.124,
"step": 10700
},
{
"epoch": 1.2,
"learning_rate": 6.774189219414e-05,
"loss": 2.0616,
"step": 10720
},
{
"epoch": 1.2,
"learning_rate": 6.762558711697605e-05,
"loss": 2.0983,
"step": 10740
},
{
"epoch": 1.2,
"learning_rate": 6.750928203981213e-05,
"loss": 2.0409,
"step": 10760
},
{
"epoch": 1.21,
"learning_rate": 6.740460747036456e-05,
"loss": 2.081,
"step": 10780
},
{
"epoch": 1.21,
"learning_rate": 6.728830239320063e-05,
"loss": 2.0685,
"step": 10800
},
{
"epoch": 1.21,
"learning_rate": 6.717199731603668e-05,
"loss": 2.0529,
"step": 10820
},
{
"epoch": 1.21,
"learning_rate": 6.705569223887272e-05,
"loss": 2.0319,
"step": 10840
},
{
"epoch": 1.21,
"learning_rate": 6.694520241556699e-05,
"loss": 2.0722,
"step": 10860
},
{
"epoch": 1.22,
"learning_rate": 6.682889733840304e-05,
"loss": 2.0737,
"step": 10880
},
{
"epoch": 1.22,
"learning_rate": 6.671259226123908e-05,
"loss": 2.0686,
"step": 10900
},
{
"epoch": 1.22,
"learning_rate": 6.659628718407514e-05,
"loss": 2.0804,
"step": 10920
},
{
"epoch": 1.22,
"learning_rate": 6.64799821069112e-05,
"loss": 2.0479,
"step": 10940
},
{
"epoch": 1.23,
"learning_rate": 6.636367702974725e-05,
"loss": 2.0999,
"step": 10960
},
{
"epoch": 1.23,
"learning_rate": 6.624737195258331e-05,
"loss": 2.0414,
"step": 10980
},
{
"epoch": 1.23,
"learning_rate": 6.613106687541937e-05,
"loss": 2.0921,
"step": 11000
},
{
"epoch": 1.23,
"learning_rate": 6.601476179825543e-05,
"loss": 2.0881,
"step": 11020
},
{
"epoch": 1.23,
"learning_rate": 6.589845672109147e-05,
"loss": 2.0595,
"step": 11040
},
{
"epoch": 1.24,
"learning_rate": 6.578215164392752e-05,
"loss": 2.0746,
"step": 11060
},
{
"epoch": 1.24,
"learning_rate": 6.566584656676359e-05,
"loss": 2.0711,
"step": 11080
},
{
"epoch": 1.24,
"learning_rate": 6.554954148959964e-05,
"loss": 2.0837,
"step": 11100
},
{
"epoch": 1.24,
"learning_rate": 6.543323641243568e-05,
"loss": 2.1156,
"step": 11120
},
{
"epoch": 1.25,
"learning_rate": 6.531693133527174e-05,
"loss": 2.0635,
"step": 11140
},
{
"epoch": 1.25,
"learning_rate": 6.52006262581078e-05,
"loss": 2.1057,
"step": 11160
},
{
"epoch": 1.25,
"learning_rate": 6.508432118094385e-05,
"loss": 2.0678,
"step": 11180
},
{
"epoch": 1.25,
"learning_rate": 6.496801610377991e-05,
"loss": 2.0829,
"step": 11200
},
{
"epoch": 1.25,
"learning_rate": 6.485171102661596e-05,
"loss": 2.0898,
"step": 11220
},
{
"epoch": 1.26,
"learning_rate": 6.473540594945201e-05,
"loss": 2.0493,
"step": 11240
},
{
"epoch": 1.26,
"learning_rate": 6.461910087228807e-05,
"loss": 2.0973,
"step": 11260
},
{
"epoch": 1.26,
"learning_rate": 6.450279579512413e-05,
"loss": 2.0676,
"step": 11280
},
{
"epoch": 1.26,
"learning_rate": 6.438649071796018e-05,
"loss": 2.0913,
"step": 11300
},
{
"epoch": 1.27,
"learning_rate": 6.427018564079624e-05,
"loss": 2.0759,
"step": 11320
},
{
"epoch": 1.27,
"learning_rate": 6.41538805636323e-05,
"loss": 2.0771,
"step": 11340
},
{
"epoch": 1.27,
"learning_rate": 6.403757548646834e-05,
"loss": 2.0751,
"step": 11360
},
{
"epoch": 1.27,
"learning_rate": 6.39212704093044e-05,
"loss": 2.0917,
"step": 11380
},
{
"epoch": 1.27,
"learning_rate": 6.380496533214045e-05,
"loss": 2.0849,
"step": 11400
},
{
"epoch": 1.28,
"learning_rate": 6.368866025497651e-05,
"loss": 2.1137,
"step": 11420
},
{
"epoch": 1.28,
"learning_rate": 6.357235517781256e-05,
"loss": 2.0939,
"step": 11440
},
{
"epoch": 1.28,
"learning_rate": 6.345605010064862e-05,
"loss": 2.0741,
"step": 11460
},
{
"epoch": 1.28,
"learning_rate": 6.333974502348468e-05,
"loss": 2.0774,
"step": 11480
},
{
"epoch": 1.29,
"learning_rate": 6.322343994632073e-05,
"loss": 2.0635,
"step": 11500
},
{
"epoch": 1.29,
"learning_rate": 6.310713486915678e-05,
"loss": 2.0485,
"step": 11520
},
{
"epoch": 1.29,
"learning_rate": 6.299082979199284e-05,
"loss": 2.039,
"step": 11540
},
{
"epoch": 1.29,
"learning_rate": 6.28745247148289e-05,
"loss": 2.0711,
"step": 11560
},
{
"epoch": 1.3,
"learning_rate": 6.275821963766495e-05,
"loss": 2.0571,
"step": 11580
},
{
"epoch": 1.3,
"learning_rate": 6.2641914560501e-05,
"loss": 2.0698,
"step": 11600
},
{
"epoch": 1.3,
"learning_rate": 6.252560948333705e-05,
"loss": 2.0765,
"step": 11620
},
{
"epoch": 1.3,
"learning_rate": 6.240930440617311e-05,
"loss": 2.0773,
"step": 11640
},
{
"epoch": 1.3,
"learning_rate": 6.229299932900916e-05,
"loss": 2.0709,
"step": 11660
},
{
"epoch": 1.31,
"learning_rate": 6.217669425184522e-05,
"loss": 2.1052,
"step": 11680
},
{
"epoch": 1.31,
"learning_rate": 6.206038917468128e-05,
"loss": 2.1026,
"step": 11700
},
{
"epoch": 1.31,
"learning_rate": 6.194408409751734e-05,
"loss": 2.1083,
"step": 11720
},
{
"epoch": 1.31,
"learning_rate": 6.182777902035338e-05,
"loss": 2.0498,
"step": 11740
},
{
"epoch": 1.32,
"learning_rate": 6.171147394318944e-05,
"loss": 2.0841,
"step": 11760
},
{
"epoch": 1.32,
"learning_rate": 6.159516886602549e-05,
"loss": 2.0496,
"step": 11780
},
{
"epoch": 1.32,
"learning_rate": 6.147886378886155e-05,
"loss": 2.0787,
"step": 11800
},
{
"epoch": 1.32,
"learning_rate": 6.136255871169761e-05,
"loss": 2.0895,
"step": 11820
},
{
"epoch": 1.32,
"learning_rate": 6.124625363453365e-05,
"loss": 2.0626,
"step": 11840
},
{
"epoch": 1.33,
"learning_rate": 6.112994855736971e-05,
"loss": 2.0783,
"step": 11860
},
{
"epoch": 1.33,
"learning_rate": 6.1013643480205765e-05,
"loss": 2.0455,
"step": 11880
},
{
"epoch": 1.33,
"learning_rate": 6.089733840304182e-05,
"loss": 2.1121,
"step": 11900
},
{
"epoch": 1.33,
"learning_rate": 6.078103332587788e-05,
"loss": 2.0669,
"step": 11920
},
{
"epoch": 1.34,
"learning_rate": 6.066472824871393e-05,
"loss": 2.0762,
"step": 11940
},
{
"epoch": 1.34,
"learning_rate": 6.0548423171549983e-05,
"loss": 2.0947,
"step": 11960
},
{
"epoch": 1.34,
"learning_rate": 6.0432118094386036e-05,
"loss": 2.0841,
"step": 11980
},
{
"epoch": 1.34,
"learning_rate": 6.0315813017222096e-05,
"loss": 2.011,
"step": 12000
},
{
"epoch": 1.34,
"learning_rate": 6.019950794005814e-05,
"loss": 2.0809,
"step": 12020
},
{
"epoch": 1.35,
"learning_rate": 6.00832028628942e-05,
"loss": 2.0777,
"step": 12040
},
{
"epoch": 1.35,
"learning_rate": 5.9966897785730254e-05,
"loss": 2.0817,
"step": 12060
},
{
"epoch": 1.35,
"learning_rate": 5.9850592708566314e-05,
"loss": 2.0892,
"step": 12080
},
{
"epoch": 1.35,
"learning_rate": 5.9734287631402367e-05,
"loss": 2.0662,
"step": 12100
},
{
"epoch": 1.36,
"learning_rate": 5.961798255423842e-05,
"loss": 2.0668,
"step": 12120
},
{
"epoch": 1.36,
"learning_rate": 5.950167747707448e-05,
"loss": 2.0867,
"step": 12140
},
{
"epoch": 1.36,
"learning_rate": 5.9385372399910525e-05,
"loss": 2.0867,
"step": 12160
},
{
"epoch": 1.36,
"learning_rate": 5.9269067322746585e-05,
"loss": 2.0552,
"step": 12180
},
{
"epoch": 1.36,
"learning_rate": 5.915276224558264e-05,
"loss": 2.0824,
"step": 12200
},
{
"epoch": 1.37,
"learning_rate": 5.90364571684187e-05,
"loss": 2.0578,
"step": 12220
},
{
"epoch": 1.37,
"learning_rate": 5.892015209125474e-05,
"loss": 2.0963,
"step": 12240
},
{
"epoch": 1.37,
"learning_rate": 5.88038470140908e-05,
"loss": 2.0727,
"step": 12260
},
{
"epoch": 1.37,
"learning_rate": 5.8687541936926855e-05,
"loss": 2.0608,
"step": 12280
},
{
"epoch": 1.38,
"learning_rate": 5.8571236859762915e-05,
"loss": 2.0881,
"step": 12300
},
{
"epoch": 1.38,
"learning_rate": 5.846074703645716e-05,
"loss": 2.0888,
"step": 12320
},
{
"epoch": 1.38,
"learning_rate": 5.8344441959293214e-05,
"loss": 2.0465,
"step": 12340
},
{
"epoch": 1.38,
"learning_rate": 5.823395213598747e-05,
"loss": 2.1094,
"step": 12360
},
{
"epoch": 1.38,
"learning_rate": 5.811764705882353e-05,
"loss": 2.0192,
"step": 12380
},
{
"epoch": 1.39,
"learning_rate": 5.800134198165958e-05,
"loss": 2.0697,
"step": 12400
},
{
"epoch": 1.39,
"learning_rate": 5.788503690449563e-05,
"loss": 2.0691,
"step": 12420
},
{
"epoch": 1.39,
"learning_rate": 5.776873182733169e-05,
"loss": 2.124,
"step": 12440
},
{
"epoch": 1.39,
"learning_rate": 5.7652426750167745e-05,
"loss": 2.0787,
"step": 12460
},
{
"epoch": 1.4,
"learning_rate": 5.75361216730038e-05,
"loss": 2.0961,
"step": 12480
},
{
"epoch": 1.4,
"learning_rate": 5.741981659583985e-05,
"loss": 2.0777,
"step": 12500
},
{
"epoch": 1.4,
"learning_rate": 5.730351151867591e-05,
"loss": 2.1276,
"step": 12520
},
{
"epoch": 1.4,
"learning_rate": 5.7187206441511956e-05,
"loss": 2.0695,
"step": 12540
},
{
"epoch": 1.4,
"learning_rate": 5.7070901364348016e-05,
"loss": 2.1114,
"step": 12560
},
{
"epoch": 1.41,
"learning_rate": 5.695459628718407e-05,
"loss": 2.0385,
"step": 12580
},
{
"epoch": 1.41,
"learning_rate": 5.683829121002013e-05,
"loss": 2.0712,
"step": 12600
},
{
"epoch": 1.41,
"learning_rate": 5.6721986132856174e-05,
"loss": 2.0475,
"step": 12620
},
{
"epoch": 1.41,
"learning_rate": 5.6605681055692234e-05,
"loss": 2.0444,
"step": 12640
},
{
"epoch": 1.42,
"learning_rate": 5.6489375978528293e-05,
"loss": 2.0797,
"step": 12660
},
{
"epoch": 1.42,
"learning_rate": 5.6373070901364346e-05,
"loss": 2.0404,
"step": 12680
},
{
"epoch": 1.42,
"learning_rate": 5.62567658242004e-05,
"loss": 2.0824,
"step": 12700
},
{
"epoch": 1.42,
"learning_rate": 5.614046074703645e-05,
"loss": 2.0749,
"step": 12720
},
{
"epoch": 1.42,
"learning_rate": 5.602415566987251e-05,
"loss": 2.0788,
"step": 12740
},
{
"epoch": 1.43,
"learning_rate": 5.590785059270856e-05,
"loss": 2.0504,
"step": 12760
},
{
"epoch": 1.43,
"learning_rate": 5.579154551554462e-05,
"loss": 2.0855,
"step": 12780
},
{
"epoch": 1.43,
"learning_rate": 5.567524043838067e-05,
"loss": 2.054,
"step": 12800
},
{
"epoch": 1.43,
"learning_rate": 5.555893536121673e-05,
"loss": 2.0847,
"step": 12820
},
{
"epoch": 1.44,
"learning_rate": 5.5442630284052775e-05,
"loss": 2.0575,
"step": 12840
},
{
"epoch": 1.44,
"learning_rate": 5.5326325206888835e-05,
"loss": 2.0579,
"step": 12860
},
{
"epoch": 1.44,
"learning_rate": 5.521002012972489e-05,
"loss": 2.0777,
"step": 12880
},
{
"epoch": 1.44,
"learning_rate": 5.509371505256094e-05,
"loss": 2.048,
"step": 12900
},
{
"epoch": 1.44,
"learning_rate": 5.4977409975396993e-05,
"loss": 2.0861,
"step": 12920
},
{
"epoch": 1.45,
"learning_rate": 5.486110489823305e-05,
"loss": 2.0703,
"step": 12940
},
{
"epoch": 1.45,
"learning_rate": 5.474479982106911e-05,
"loss": 2.0672,
"step": 12960
},
{
"epoch": 1.45,
"learning_rate": 5.462849474390516e-05,
"loss": 2.072,
"step": 12980
},
{
"epoch": 1.45,
"learning_rate": 5.451218966674122e-05,
"loss": 2.0926,
"step": 13000
},
{
"epoch": 1.46,
"learning_rate": 5.439588458957727e-05,
"loss": 2.1006,
"step": 13020
},
{
"epoch": 1.46,
"learning_rate": 5.427957951241333e-05,
"loss": 2.069,
"step": 13040
},
{
"epoch": 1.46,
"learning_rate": 5.416327443524938e-05,
"loss": 2.0707,
"step": 13060
},
{
"epoch": 1.46,
"learning_rate": 5.4046969358085436e-05,
"loss": 2.0648,
"step": 13080
},
{
"epoch": 1.46,
"learning_rate": 5.393066428092149e-05,
"loss": 2.083,
"step": 13100
},
{
"epoch": 1.47,
"learning_rate": 5.381435920375754e-05,
"loss": 2.0851,
"step": 13120
},
{
"epoch": 1.47,
"learning_rate": 5.3698054126593595e-05,
"loss": 2.0987,
"step": 13140
},
{
"epoch": 1.47,
"learning_rate": 5.3581749049429654e-05,
"loss": 2.0758,
"step": 13160
},
{
"epoch": 1.47,
"learning_rate": 5.346544397226571e-05,
"loss": 2.0657,
"step": 13180
},
{
"epoch": 1.48,
"learning_rate": 5.334913889510176e-05,
"loss": 2.0461,
"step": 13200
},
{
"epoch": 1.48,
"learning_rate": 5.323283381793782e-05,
"loss": 2.0693,
"step": 13220
},
{
"epoch": 1.48,
"learning_rate": 5.311652874077387e-05,
"loss": 2.0812,
"step": 13240
},
{
"epoch": 1.48,
"learning_rate": 5.300022366360993e-05,
"loss": 2.0818,
"step": 13260
},
{
"epoch": 1.49,
"learning_rate": 5.288391858644598e-05,
"loss": 2.0236,
"step": 13280
},
{
"epoch": 1.49,
"learning_rate": 5.276761350928204e-05,
"loss": 2.076,
"step": 13300
},
{
"epoch": 1.49,
"learning_rate": 5.265130843211809e-05,
"loss": 2.0957,
"step": 13320
},
{
"epoch": 1.49,
"learning_rate": 5.253500335495414e-05,
"loss": 2.129,
"step": 13340
},
{
"epoch": 1.49,
"learning_rate": 5.2418698277790196e-05,
"loss": 2.1143,
"step": 13360
},
{
"epoch": 1.5,
"learning_rate": 5.2302393200626255e-05,
"loss": 2.0316,
"step": 13380
},
{
"epoch": 1.5,
"learning_rate": 5.218608812346231e-05,
"loss": 2.0659,
"step": 13400
},
{
"epoch": 1.5,
"learning_rate": 5.206978304629836e-05,
"loss": 2.0619,
"step": 13420
},
{
"epoch": 1.5,
"learning_rate": 5.1953477969134414e-05,
"loss": 2.0418,
"step": 13440
},
{
"epoch": 1.51,
"learning_rate": 5.1837172891970473e-05,
"loss": 2.0524,
"step": 13460
},
{
"epoch": 1.51,
"learning_rate": 5.172086781480653e-05,
"loss": 2.1071,
"step": 13480
},
{
"epoch": 1.51,
"learning_rate": 5.160456273764258e-05,
"loss": 2.0871,
"step": 13500
},
{
"epoch": 1.51,
"learning_rate": 5.148825766047864e-05,
"loss": 2.0552,
"step": 13520
},
{
"epoch": 1.51,
"learning_rate": 5.137195258331469e-05,
"loss": 2.0746,
"step": 13540
},
{
"epoch": 1.52,
"learning_rate": 5.1255647506150744e-05,
"loss": 2.1019,
"step": 13560
},
{
"epoch": 1.52,
"learning_rate": 5.11393424289868e-05,
"loss": 2.0722,
"step": 13580
},
{
"epoch": 1.52,
"learning_rate": 5.102303735182286e-05,
"loss": 2.0436,
"step": 13600
},
{
"epoch": 1.52,
"learning_rate": 5.090673227465891e-05,
"loss": 2.0747,
"step": 13620
},
{
"epoch": 1.53,
"learning_rate": 5.079042719749496e-05,
"loss": 2.0315,
"step": 13640
},
{
"epoch": 1.53,
"learning_rate": 5.0674122120331015e-05,
"loss": 2.0546,
"step": 13660
},
{
"epoch": 1.53,
"learning_rate": 5.0557817043167075e-05,
"loss": 2.0612,
"step": 13680
},
{
"epoch": 1.53,
"learning_rate": 5.044151196600313e-05,
"loss": 2.0716,
"step": 13700
},
{
"epoch": 1.53,
"learning_rate": 5.032520688883918e-05,
"loss": 2.0419,
"step": 13720
},
{
"epoch": 1.54,
"learning_rate": 5.020890181167523e-05,
"loss": 2.0704,
"step": 13740
},
{
"epoch": 1.54,
"learning_rate": 5.009259673451129e-05,
"loss": 2.0328,
"step": 13760
},
{
"epoch": 1.54,
"learning_rate": 4.9976291657347345e-05,
"loss": 2.0596,
"step": 13780
},
{
"epoch": 1.54,
"learning_rate": 4.98599865801834e-05,
"loss": 2.0896,
"step": 13800
},
{
"epoch": 1.55,
"learning_rate": 4.974368150301946e-05,
"loss": 2.0619,
"step": 13820
},
{
"epoch": 1.55,
"learning_rate": 4.963319167971371e-05,
"loss": 2.1666,
"step": 13840
},
{
"epoch": 1.55,
"learning_rate": 4.951688660254976e-05,
"loss": 2.0784,
"step": 13860
},
{
"epoch": 1.55,
"learning_rate": 4.940058152538582e-05,
"loss": 2.0693,
"step": 13880
},
{
"epoch": 1.55,
"learning_rate": 4.928427644822187e-05,
"loss": 2.0624,
"step": 13900
},
{
"epoch": 1.56,
"learning_rate": 4.916797137105793e-05,
"loss": 2.0706,
"step": 13920
},
{
"epoch": 1.56,
"learning_rate": 4.9051666293893975e-05,
"loss": 2.0472,
"step": 13940
},
{
"epoch": 1.56,
"learning_rate": 4.8935361216730035e-05,
"loss": 2.1166,
"step": 13960
},
{
"epoch": 1.56,
"learning_rate": 4.881905613956609e-05,
"loss": 2.0927,
"step": 13980
},
{
"epoch": 1.57,
"learning_rate": 4.870275106240214e-05,
"loss": 2.0265,
"step": 14000
},
{
"epoch": 1.57,
"learning_rate": 4.858644598523819e-05,
"loss": 2.0714,
"step": 14020
},
{
"epoch": 1.57,
"learning_rate": 4.847014090807425e-05,
"loss": 2.052,
"step": 14040
},
{
"epoch": 1.57,
"learning_rate": 4.835383583091031e-05,
"loss": 2.1046,
"step": 14060
},
{
"epoch": 1.57,
"learning_rate": 4.823753075374636e-05,
"loss": 2.0489,
"step": 14080
},
{
"epoch": 1.58,
"learning_rate": 4.812122567658242e-05,
"loss": 2.0593,
"step": 14100
},
{
"epoch": 1.58,
"learning_rate": 4.800492059941847e-05,
"loss": 2.065,
"step": 14120
},
{
"epoch": 1.58,
"learning_rate": 4.788861552225453e-05,
"loss": 2.1017,
"step": 14140
},
{
"epoch": 1.58,
"learning_rate": 4.7772310445090576e-05,
"loss": 2.0644,
"step": 14160
},
{
"epoch": 1.59,
"learning_rate": 4.7656005367926636e-05,
"loss": 2.0813,
"step": 14180
},
{
"epoch": 1.59,
"learning_rate": 4.753970029076269e-05,
"loss": 2.0849,
"step": 14200
},
{
"epoch": 1.59,
"learning_rate": 4.742339521359874e-05,
"loss": 2.101,
"step": 14220
},
{
"epoch": 1.59,
"learning_rate": 4.7307090136434794e-05,
"loss": 2.0588,
"step": 14240
},
{
"epoch": 1.59,
"learning_rate": 4.7190785059270854e-05,
"loss": 2.0628,
"step": 14260
},
{
"epoch": 1.6,
"learning_rate": 4.707447998210691e-05,
"loss": 2.0487,
"step": 14280
},
{
"epoch": 1.6,
"learning_rate": 4.695817490494296e-05,
"loss": 2.1002,
"step": 14300
},
{
"epoch": 1.6,
"learning_rate": 4.684186982777901e-05,
"loss": 2.0855,
"step": 14320
},
{
"epoch": 1.6,
"learning_rate": 4.672556475061507e-05,
"loss": 2.0458,
"step": 14340
},
{
"epoch": 1.61,
"learning_rate": 4.660925967345113e-05,
"loss": 2.082,
"step": 14360
},
{
"epoch": 1.61,
"learning_rate": 4.649295459628718e-05,
"loss": 2.1003,
"step": 14380
},
{
"epoch": 1.61,
"learning_rate": 4.637664951912324e-05,
"loss": 2.0829,
"step": 14400
},
{
"epoch": 1.61,
"learning_rate": 4.626034444195929e-05,
"loss": 2.0158,
"step": 14420
},
{
"epoch": 1.61,
"learning_rate": 4.614403936479534e-05,
"loss": 2.0687,
"step": 14440
},
{
"epoch": 1.62,
"learning_rate": 4.6027734287631395e-05,
"loss": 2.1068,
"step": 14460
},
{
"epoch": 1.62,
"learning_rate": 4.5911429210467455e-05,
"loss": 2.0875,
"step": 14480
},
{
"epoch": 1.62,
"learning_rate": 4.579512413330351e-05,
"loss": 2.0998,
"step": 14500
},
{
"epoch": 1.62,
"learning_rate": 4.567881905613956e-05,
"loss": 2.0661,
"step": 14520
},
{
"epoch": 1.63,
"learning_rate": 4.5562513978975613e-05,
"loss": 2.0509,
"step": 14540
},
{
"epoch": 1.63,
"learning_rate": 4.544620890181167e-05,
"loss": 2.0392,
"step": 14560
},
{
"epoch": 1.63,
"learning_rate": 4.5329903824647726e-05,
"loss": 2.0608,
"step": 14580
},
{
"epoch": 1.63,
"learning_rate": 4.521359874748378e-05,
"loss": 2.0562,
"step": 14600
},
{
"epoch": 1.63,
"learning_rate": 4.509729367031984e-05,
"loss": 2.0673,
"step": 14620
},
{
"epoch": 1.64,
"learning_rate": 4.498098859315589e-05,
"loss": 2.0618,
"step": 14640
},
{
"epoch": 1.64,
"learning_rate": 4.4864683515991944e-05,
"loss": 2.0877,
"step": 14660
},
{
"epoch": 1.64,
"learning_rate": 4.4748378438828e-05,
"loss": 2.067,
"step": 14680
},
{
"epoch": 1.64,
"learning_rate": 4.4632073361664056e-05,
"loss": 2.0975,
"step": 14700
},
{
"epoch": 1.65,
"learning_rate": 4.451576828450011e-05,
"loss": 2.0698,
"step": 14720
},
{
"epoch": 1.65,
"learning_rate": 4.439946320733616e-05,
"loss": 2.0984,
"step": 14740
},
{
"epoch": 1.65,
"learning_rate": 4.4283158130172215e-05,
"loss": 2.0577,
"step": 14760
},
{
"epoch": 1.65,
"learning_rate": 4.4166853053008274e-05,
"loss": 2.1047,
"step": 14780
},
{
"epoch": 1.66,
"learning_rate": 4.405054797584433e-05,
"loss": 2.0432,
"step": 14800
},
{
"epoch": 1.66,
"learning_rate": 4.393424289868038e-05,
"loss": 2.0983,
"step": 14820
},
{
"epoch": 1.66,
"learning_rate": 4.382956832923283e-05,
"loss": 2.0853,
"step": 14840
},
{
"epoch": 1.66,
"learning_rate": 4.3713263252068886e-05,
"loss": 2.09,
"step": 14860
},
{
"epoch": 1.66,
"learning_rate": 4.359695817490494e-05,
"loss": 2.1009,
"step": 14880
},
{
"epoch": 1.67,
"learning_rate": 4.348065309774099e-05,
"loss": 2.0858,
"step": 14900
},
{
"epoch": 1.67,
"learning_rate": 4.336434802057705e-05,
"loss": 2.0515,
"step": 14920
},
{
"epoch": 1.67,
"learning_rate": 4.3248042943413104e-05,
"loss": 2.0471,
"step": 14940
},
{
"epoch": 1.67,
"learning_rate": 4.313173786624916e-05,
"loss": 2.1183,
"step": 14960
},
{
"epoch": 1.68,
"learning_rate": 4.301543278908521e-05,
"loss": 2.1014,
"step": 14980
},
{
"epoch": 1.68,
"learning_rate": 4.289912771192127e-05,
"loss": 2.0757,
"step": 15000
},
{
"epoch": 1.68,
"learning_rate": 4.278282263475732e-05,
"loss": 2.082,
"step": 15020
},
{
"epoch": 1.68,
"learning_rate": 4.2666517557593375e-05,
"loss": 2.0863,
"step": 15040
},
{
"epoch": 1.68,
"learning_rate": 4.255021248042943e-05,
"loss": 2.0706,
"step": 15060
},
{
"epoch": 1.69,
"learning_rate": 4.243390740326549e-05,
"loss": 2.0888,
"step": 15080
},
{
"epoch": 1.69,
"learning_rate": 4.231760232610154e-05,
"loss": 2.0727,
"step": 15100
},
{
"epoch": 1.69,
"learning_rate": 4.220129724893759e-05,
"loss": 2.0461,
"step": 15120
},
{
"epoch": 1.69,
"learning_rate": 4.2084992171773646e-05,
"loss": 2.0925,
"step": 15140
},
{
"epoch": 1.7,
"learning_rate": 4.1968687094609706e-05,
"loss": 2.0566,
"step": 15160
},
{
"epoch": 1.7,
"learning_rate": 4.185238201744575e-05,
"loss": 2.0723,
"step": 15180
},
{
"epoch": 1.7,
"learning_rate": 4.1741892194140005e-05,
"loss": 2.1198,
"step": 15200
},
{
"epoch": 1.7,
"learning_rate": 4.1625587116976064e-05,
"loss": 2.0785,
"step": 15220
},
{
"epoch": 1.7,
"learning_rate": 4.1509282039812124e-05,
"loss": 2.0565,
"step": 15240
},
{
"epoch": 1.71,
"learning_rate": 4.139297696264817e-05,
"loss": 2.0527,
"step": 15260
},
{
"epoch": 1.71,
"learning_rate": 4.127667188548423e-05,
"loss": 2.0889,
"step": 15280
},
{
"epoch": 1.71,
"learning_rate": 4.116036680832028e-05,
"loss": 2.0971,
"step": 15300
},
{
"epoch": 1.71,
"learning_rate": 4.104406173115634e-05,
"loss": 2.115,
"step": 15320
},
{
"epoch": 1.72,
"learning_rate": 4.092775665399239e-05,
"loss": 2.0408,
"step": 15340
},
{
"epoch": 1.72,
"learning_rate": 4.081145157682845e-05,
"loss": 2.0875,
"step": 15360
},
{
"epoch": 1.72,
"learning_rate": 4.06951464996645e-05,
"loss": 2.0583,
"step": 15380
},
{
"epoch": 1.72,
"learning_rate": 4.057884142250055e-05,
"loss": 2.0844,
"step": 15400
},
{
"epoch": 1.72,
"learning_rate": 4.0462536345336606e-05,
"loss": 2.1288,
"step": 15420
},
{
"epoch": 1.73,
"learning_rate": 4.0346231268172666e-05,
"loss": 2.0507,
"step": 15440
},
{
"epoch": 1.73,
"learning_rate": 4.022992619100872e-05,
"loss": 2.0735,
"step": 15460
},
{
"epoch": 1.73,
"learning_rate": 4.011362111384477e-05,
"loss": 2.1198,
"step": 15480
},
{
"epoch": 1.73,
"learning_rate": 3.999731603668083e-05,
"loss": 2.0738,
"step": 15500
},
{
"epoch": 1.74,
"learning_rate": 3.9881010959516884e-05,
"loss": 2.0293,
"step": 15520
},
{
"epoch": 1.74,
"learning_rate": 3.976470588235294e-05,
"loss": 2.0787,
"step": 15540
},
{
"epoch": 1.74,
"learning_rate": 3.964840080518899e-05,
"loss": 2.0727,
"step": 15560
},
{
"epoch": 1.74,
"learning_rate": 3.953209572802505e-05,
"loss": 2.0838,
"step": 15580
},
{
"epoch": 1.74,
"learning_rate": 3.94157906508611e-05,
"loss": 2.0455,
"step": 15600
},
{
"epoch": 1.75,
"learning_rate": 3.9299485573697154e-05,
"loss": 2.0603,
"step": 15620
},
{
"epoch": 1.75,
"learning_rate": 3.918318049653321e-05,
"loss": 2.0766,
"step": 15640
},
{
"epoch": 1.75,
"learning_rate": 3.906687541936927e-05,
"loss": 2.101,
"step": 15660
},
{
"epoch": 1.75,
"learning_rate": 3.895057034220532e-05,
"loss": 2.0211,
"step": 15680
},
{
"epoch": 1.76,
"learning_rate": 3.883426526504137e-05,
"loss": 2.0948,
"step": 15700
},
{
"epoch": 1.76,
"learning_rate": 3.8717960187877425e-05,
"loss": 2.0982,
"step": 15720
},
{
"epoch": 1.76,
"learning_rate": 3.8601655110713485e-05,
"loss": 2.0548,
"step": 15740
},
{
"epoch": 1.76,
"learning_rate": 3.8485350033549544e-05,
"loss": 2.0852,
"step": 15760
},
{
"epoch": 1.76,
"learning_rate": 3.836904495638559e-05,
"loss": 2.0868,
"step": 15780
},
{
"epoch": 1.77,
"learning_rate": 3.825273987922165e-05,
"loss": 2.0769,
"step": 15800
},
{
"epoch": 1.77,
"learning_rate": 3.81364348020577e-05,
"loss": 2.0442,
"step": 15820
},
{
"epoch": 1.77,
"learning_rate": 3.8020129724893756e-05,
"loss": 2.0668,
"step": 15840
},
{
"epoch": 1.77,
"learning_rate": 3.790382464772981e-05,
"loss": 2.0529,
"step": 15860
},
{
"epoch": 1.78,
"learning_rate": 3.778751957056587e-05,
"loss": 2.079,
"step": 15880
},
{
"epoch": 1.78,
"learning_rate": 3.767121449340192e-05,
"loss": 2.1526,
"step": 15900
},
{
"epoch": 1.78,
"learning_rate": 3.7554909416237974e-05,
"loss": 2.0635,
"step": 15920
},
{
"epoch": 1.78,
"learning_rate": 3.7438604339074026e-05,
"loss": 2.0773,
"step": 15940
},
{
"epoch": 1.78,
"learning_rate": 3.7322299261910086e-05,
"loss": 2.0929,
"step": 15960
},
{
"epoch": 1.79,
"learning_rate": 3.720599418474614e-05,
"loss": 2.0554,
"step": 15980
},
{
"epoch": 1.79,
"learning_rate": 3.708968910758219e-05,
"loss": 2.1168,
"step": 16000
},
{
"epoch": 1.79,
"learning_rate": 3.6973384030418244e-05,
"loss": 2.0428,
"step": 16020
},
{
"epoch": 1.79,
"learning_rate": 3.6857078953254304e-05,
"loss": 2.0781,
"step": 16040
},
{
"epoch": 1.8,
"learning_rate": 3.674077387609036e-05,
"loss": 2.0755,
"step": 16060
},
{
"epoch": 1.8,
"learning_rate": 3.662446879892641e-05,
"loss": 2.0364,
"step": 16080
},
{
"epoch": 1.8,
"learning_rate": 3.650816372176247e-05,
"loss": 2.1061,
"step": 16100
},
{
"epoch": 1.8,
"learning_rate": 3.639185864459852e-05,
"loss": 2.0723,
"step": 16120
},
{
"epoch": 1.8,
"learning_rate": 3.6275553567434575e-05,
"loss": 2.0164,
"step": 16140
},
{
"epoch": 1.81,
"learning_rate": 3.615924849027063e-05,
"loss": 2.0676,
"step": 16160
},
{
"epoch": 1.81,
"learning_rate": 3.604294341310669e-05,
"loss": 2.0667,
"step": 16180
},
{
"epoch": 1.81,
"learning_rate": 3.592663833594274e-05,
"loss": 2.0623,
"step": 16200
},
{
"epoch": 1.81,
"learning_rate": 3.581033325877879e-05,
"loss": 2.0801,
"step": 16220
},
{
"epoch": 1.82,
"learning_rate": 3.5694028181614846e-05,
"loss": 2.0449,
"step": 16240
},
{
"epoch": 1.82,
"learning_rate": 3.5577723104450905e-05,
"loss": 2.0812,
"step": 16260
},
{
"epoch": 1.82,
"learning_rate": 3.546141802728695e-05,
"loss": 2.0907,
"step": 16280
},
{
"epoch": 1.82,
"learning_rate": 3.534511295012301e-05,
"loss": 2.0436,
"step": 16300
},
{
"epoch": 1.83,
"learning_rate": 3.522880787295907e-05,
"loss": 2.111,
"step": 16320
},
{
"epoch": 1.83,
"learning_rate": 3.511250279579512e-05,
"loss": 2.0559,
"step": 16340
},
{
"epoch": 1.83,
"learning_rate": 3.4996197718631176e-05,
"loss": 2.1178,
"step": 16360
},
{
"epoch": 1.83,
"learning_rate": 3.487989264146723e-05,
"loss": 2.0414,
"step": 16380
},
{
"epoch": 1.83,
"learning_rate": 3.476358756430329e-05,
"loss": 2.0485,
"step": 16400
},
{
"epoch": 1.84,
"learning_rate": 3.464728248713934e-05,
"loss": 2.0785,
"step": 16420
},
{
"epoch": 1.84,
"learning_rate": 3.4530977409975394e-05,
"loss": 2.0779,
"step": 16440
},
{
"epoch": 1.84,
"learning_rate": 3.441467233281145e-05,
"loss": 2.0431,
"step": 16460
},
{
"epoch": 1.84,
"learning_rate": 3.4298367255647506e-05,
"loss": 2.1142,
"step": 16480
},
{
"epoch": 1.85,
"learning_rate": 3.418206217848355e-05,
"loss": 2.0406,
"step": 16500
},
{
"epoch": 1.85,
"learning_rate": 3.406575710131961e-05,
"loss": 2.0579,
"step": 16520
},
{
"epoch": 1.85,
"learning_rate": 3.3949452024155665e-05,
"loss": 2.0933,
"step": 16540
},
{
"epoch": 1.85,
"learning_rate": 3.3833146946991724e-05,
"loss": 2.0477,
"step": 16560
},
{
"epoch": 1.85,
"learning_rate": 3.371684186982777e-05,
"loss": 2.0948,
"step": 16580
},
{
"epoch": 1.86,
"learning_rate": 3.360053679266383e-05,
"loss": 2.0587,
"step": 16600
},
{
"epoch": 1.86,
"learning_rate": 3.348423171549989e-05,
"loss": 2.0628,
"step": 16620
},
{
"epoch": 1.86,
"learning_rate": 3.336792663833594e-05,
"loss": 2.0993,
"step": 16640
},
{
"epoch": 1.86,
"learning_rate": 3.3251621561171995e-05,
"loss": 2.0701,
"step": 16660
},
{
"epoch": 1.87,
"learning_rate": 3.313531648400805e-05,
"loss": 2.0582,
"step": 16680
},
{
"epoch": 1.87,
"learning_rate": 3.301901140684411e-05,
"loss": 2.044,
"step": 16700
},
{
"epoch": 1.87,
"learning_rate": 3.2902706329680154e-05,
"loss": 2.0492,
"step": 16720
},
{
"epoch": 1.87,
"learning_rate": 3.278640125251621e-05,
"loss": 2.0256,
"step": 16740
},
{
"epoch": 1.87,
"learning_rate": 3.2670096175352266e-05,
"loss": 2.0283,
"step": 16760
},
{
"epoch": 1.88,
"learning_rate": 3.2553791098188326e-05,
"loss": 2.0441,
"step": 16780
},
{
"epoch": 1.88,
"learning_rate": 3.243748602102438e-05,
"loss": 2.071,
"step": 16800
},
{
"epoch": 1.88,
"learning_rate": 3.232118094386043e-05,
"loss": 2.089,
"step": 16820
},
{
"epoch": 1.88,
"learning_rate": 3.220487586669649e-05,
"loss": 2.0854,
"step": 16840
},
{
"epoch": 1.89,
"learning_rate": 3.2088570789532544e-05,
"loss": 2.053,
"step": 16860
},
{
"epoch": 1.89,
"learning_rate": 3.1972265712368596e-05,
"loss": 2.0928,
"step": 16880
},
{
"epoch": 1.89,
"learning_rate": 3.185596063520465e-05,
"loss": 2.0675,
"step": 16900
},
{
"epoch": 1.89,
"learning_rate": 3.17396555580407e-05,
"loss": 2.0425,
"step": 16920
},
{
"epoch": 1.89,
"learning_rate": 3.162335048087676e-05,
"loss": 2.056,
"step": 16940
},
{
"epoch": 1.9,
"learning_rate": 3.1507045403712814e-05,
"loss": 2.0542,
"step": 16960
},
{
"epoch": 1.9,
"learning_rate": 3.139074032654887e-05,
"loss": 2.0871,
"step": 16980
},
{
"epoch": 1.9,
"learning_rate": 3.127443524938492e-05,
"loss": 2.1014,
"step": 17000
},
{
"epoch": 1.9,
"learning_rate": 3.115813017222098e-05,
"loss": 2.0285,
"step": 17020
},
{
"epoch": 1.91,
"learning_rate": 3.104182509505703e-05,
"loss": 2.0229,
"step": 17040
},
{
"epoch": 1.91,
"learning_rate": 3.0925520017893085e-05,
"loss": 2.0798,
"step": 17060
},
{
"epoch": 1.91,
"learning_rate": 3.080921494072914e-05,
"loss": 2.0637,
"step": 17080
},
{
"epoch": 1.91,
"learning_rate": 3.069290986356519e-05,
"loss": 2.0567,
"step": 17100
},
{
"epoch": 1.91,
"learning_rate": 3.057660478640125e-05,
"loss": 2.0258,
"step": 17120
},
{
"epoch": 1.92,
"learning_rate": 3.0460299709237303e-05,
"loss": 2.0988,
"step": 17140
},
{
"epoch": 1.92,
"learning_rate": 3.034399463207336e-05,
"loss": 2.0889,
"step": 17160
},
{
"epoch": 1.92,
"learning_rate": 3.0227689554909412e-05,
"loss": 2.0466,
"step": 17180
},
{
"epoch": 1.92,
"learning_rate": 3.0123014985461862e-05,
"loss": 2.0953,
"step": 17200
},
{
"epoch": 1.93,
"learning_rate": 3.0006709908297915e-05,
"loss": 2.0807,
"step": 17220
},
{
"epoch": 1.93,
"learning_rate": 2.989040483113397e-05,
"loss": 2.0344,
"step": 17240
},
{
"epoch": 1.93,
"learning_rate": 2.977991500782822e-05,
"loss": 2.1382,
"step": 17260
},
{
"epoch": 1.93,
"learning_rate": 2.9663609930664277e-05,
"loss": 2.0324,
"step": 17280
},
{
"epoch": 1.93,
"learning_rate": 2.954730485350033e-05,
"loss": 2.0842,
"step": 17300
},
{
"epoch": 1.94,
"learning_rate": 2.943099977633639e-05,
"loss": 2.0407,
"step": 17320
},
{
"epoch": 1.94,
"learning_rate": 2.9314694699172443e-05,
"loss": 2.0312,
"step": 17340
},
{
"epoch": 1.94,
"learning_rate": 2.91983896220085e-05,
"loss": 2.0736,
"step": 17360
},
{
"epoch": 1.94,
"learning_rate": 2.908208454484455e-05,
"loss": 2.0568,
"step": 17380
},
{
"epoch": 1.95,
"learning_rate": 2.8965779467680608e-05,
"loss": 2.0744,
"step": 17400
},
{
"epoch": 1.95,
"learning_rate": 2.884947439051666e-05,
"loss": 2.0691,
"step": 17420
},
{
"epoch": 1.95,
"learning_rate": 2.8733169313352713e-05,
"loss": 2.1048,
"step": 17440
},
{
"epoch": 1.95,
"learning_rate": 2.861686423618877e-05,
"loss": 2.0583,
"step": 17460
},
{
"epoch": 1.95,
"learning_rate": 2.8500559159024822e-05,
"loss": 2.0439,
"step": 17480
},
{
"epoch": 1.96,
"learning_rate": 2.838425408186088e-05,
"loss": 2.0741,
"step": 17500
},
{
"epoch": 1.96,
"learning_rate": 2.826794900469693e-05,
"loss": 2.0847,
"step": 17520
},
{
"epoch": 1.96,
"learning_rate": 2.8151643927532988e-05,
"loss": 2.0784,
"step": 17540
},
{
"epoch": 1.96,
"learning_rate": 2.803533885036904e-05,
"loss": 2.0681,
"step": 17560
},
{
"epoch": 1.97,
"learning_rate": 2.79190337732051e-05,
"loss": 2.0736,
"step": 17580
},
{
"epoch": 1.97,
"learning_rate": 2.7802728696041153e-05,
"loss": 2.1105,
"step": 17600
},
{
"epoch": 1.97,
"learning_rate": 2.768642361887721e-05,
"loss": 2.0879,
"step": 17620
},
{
"epoch": 1.97,
"learning_rate": 2.7570118541713262e-05,
"loss": 2.0742,
"step": 17640
},
{
"epoch": 1.97,
"learning_rate": 2.7453813464549315e-05,
"loss": 2.0421,
"step": 17660
},
{
"epoch": 1.98,
"learning_rate": 2.733750838738537e-05,
"loss": 2.0922,
"step": 17680
},
{
"epoch": 1.98,
"learning_rate": 2.7221203310221424e-05,
"loss": 2.0433,
"step": 17700
},
{
"epoch": 1.98,
"learning_rate": 2.710489823305748e-05,
"loss": 2.1093,
"step": 17720
},
{
"epoch": 1.98,
"learning_rate": 2.6988593155893533e-05,
"loss": 2.057,
"step": 17740
},
{
"epoch": 1.99,
"learning_rate": 2.687228807872959e-05,
"loss": 2.0387,
"step": 17760
},
{
"epoch": 1.99,
"learning_rate": 2.675598300156564e-05,
"loss": 2.0967,
"step": 17780
},
{
"epoch": 1.99,
"learning_rate": 2.6639677924401698e-05,
"loss": 2.0828,
"step": 17800
},
{
"epoch": 1.99,
"learning_rate": 2.652337284723775e-05,
"loss": 2.0244,
"step": 17820
},
{
"epoch": 2.0,
"learning_rate": 2.6407067770073807e-05,
"loss": 2.0618,
"step": 17840
},
{
"epoch": 2.0,
"learning_rate": 2.6290762692909863e-05,
"loss": 2.1041,
"step": 17860
},
{
"epoch": 2.0,
"learning_rate": 2.6174457615745916e-05,
"loss": 2.0283,
"step": 17880
},
{
"epoch": 2.0,
"learning_rate": 2.6058152538581972e-05,
"loss": 2.0234,
"step": 17900
},
{
"epoch": 2.0,
"learning_rate": 2.5941847461418025e-05,
"loss": 2.0634,
"step": 17920
},
{
"epoch": 2.01,
"learning_rate": 2.582554238425408e-05,
"loss": 2.0737,
"step": 17940
},
{
"epoch": 2.01,
"learning_rate": 2.5709237307090134e-05,
"loss": 2.0651,
"step": 17960
},
{
"epoch": 2.01,
"learning_rate": 2.559293222992619e-05,
"loss": 2.025,
"step": 17980
},
{
"epoch": 2.01,
"learning_rate": 2.5476627152762243e-05,
"loss": 2.0662,
"step": 18000
},
{
"epoch": 2.02,
"learning_rate": 2.53603220755983e-05,
"loss": 2.0219,
"step": 18020
},
{
"epoch": 2.02,
"learning_rate": 2.5244016998434352e-05,
"loss": 2.0623,
"step": 18040
},
{
"epoch": 2.02,
"learning_rate": 2.5127711921270405e-05,
"loss": 2.059,
"step": 18060
},
{
"epoch": 2.02,
"learning_rate": 2.501140684410646e-05,
"loss": 2.1055,
"step": 18080
},
{
"epoch": 2.02,
"learning_rate": 2.4895101766942514e-05,
"loss": 2.1067,
"step": 18100
},
{
"epoch": 2.03,
"learning_rate": 2.477879668977857e-05,
"loss": 2.0925,
"step": 18120
},
{
"epoch": 2.03,
"learning_rate": 2.4662491612614626e-05,
"loss": 2.0838,
"step": 18140
},
{
"epoch": 2.03,
"learning_rate": 2.4546186535450682e-05,
"loss": 2.0771,
"step": 18160
},
{
"epoch": 2.03,
"learning_rate": 2.4429881458286735e-05,
"loss": 2.0885,
"step": 18180
},
{
"epoch": 2.04,
"learning_rate": 2.431357638112279e-05,
"loss": 2.0648,
"step": 18200
},
{
"epoch": 2.04,
"learning_rate": 2.4197271303958844e-05,
"loss": 2.0681,
"step": 18220
},
{
"epoch": 2.04,
"learning_rate": 2.40809662267949e-05,
"loss": 2.0553,
"step": 18240
},
{
"epoch": 2.04,
"learning_rate": 2.3964661149630953e-05,
"loss": 2.0908,
"step": 18260
},
{
"epoch": 2.04,
"learning_rate": 2.3848356072467006e-05,
"loss": 2.0558,
"step": 18280
},
{
"epoch": 2.05,
"learning_rate": 2.3732050995303062e-05,
"loss": 2.0142,
"step": 18300
},
{
"epoch": 2.05,
"learning_rate": 2.3615745918139115e-05,
"loss": 2.0327,
"step": 18320
},
{
"epoch": 2.05,
"learning_rate": 2.349944084097517e-05,
"loss": 2.1027,
"step": 18340
},
{
"epoch": 2.05,
"learning_rate": 2.3383135763811224e-05,
"loss": 2.0602,
"step": 18360
},
{
"epoch": 2.06,
"learning_rate": 2.326683068664728e-05,
"loss": 2.0362,
"step": 18380
},
{
"epoch": 2.06,
"learning_rate": 2.3150525609483333e-05,
"loss": 2.0628,
"step": 18400
},
{
"epoch": 2.06,
"learning_rate": 2.3034220532319392e-05,
"loss": 2.0663,
"step": 18420
},
{
"epoch": 2.06,
"learning_rate": 2.2917915455155445e-05,
"loss": 2.0567,
"step": 18440
},
{
"epoch": 2.06,
"learning_rate": 2.28016103779915e-05,
"loss": 2.0717,
"step": 18460
},
{
"epoch": 2.07,
"learning_rate": 2.2685305300827554e-05,
"loss": 2.0677,
"step": 18480
},
{
"epoch": 2.07,
"learning_rate": 2.2569000223663607e-05,
"loss": 2.0664,
"step": 18500
},
{
"epoch": 2.07,
"learning_rate": 2.2452695146499663e-05,
"loss": 2.0965,
"step": 18520
},
{
"epoch": 2.07,
"learning_rate": 2.2336390069335716e-05,
"loss": 2.1145,
"step": 18540
},
{
"epoch": 2.08,
"learning_rate": 2.2220084992171772e-05,
"loss": 2.0512,
"step": 18560
},
{
"epoch": 2.08,
"learning_rate": 2.2103779915007825e-05,
"loss": 2.0948,
"step": 18580
},
{
"epoch": 2.08,
"learning_rate": 2.198747483784388e-05,
"loss": 2.1022,
"step": 18600
},
{
"epoch": 2.08,
"learning_rate": 2.1871169760679934e-05,
"loss": 2.0139,
"step": 18620
},
{
"epoch": 2.08,
"learning_rate": 2.175486468351599e-05,
"loss": 2.0366,
"step": 18640
},
{
"epoch": 2.09,
"learning_rate": 2.1638559606352043e-05,
"loss": 2.0667,
"step": 18660
},
{
"epoch": 2.09,
"learning_rate": 2.1522254529188103e-05,
"loss": 2.0617,
"step": 18680
},
{
"epoch": 2.09,
"learning_rate": 2.1405949452024155e-05,
"loss": 2.0557,
"step": 18700
},
{
"epoch": 2.09,
"learning_rate": 2.1289644374860208e-05,
"loss": 2.0373,
"step": 18720
},
{
"epoch": 2.1,
"learning_rate": 2.1173339297696264e-05,
"loss": 2.054,
"step": 18740
},
{
"epoch": 2.1,
"learning_rate": 2.1057034220532317e-05,
"loss": 2.0393,
"step": 18760
},
{
"epoch": 2.1,
"learning_rate": 2.0940729143368373e-05,
"loss": 2.0526,
"step": 18780
},
{
"epoch": 2.1,
"learning_rate": 2.0824424066204426e-05,
"loss": 2.0431,
"step": 18800
},
{
"epoch": 2.1,
"learning_rate": 2.0708118989040482e-05,
"loss": 2.0438,
"step": 18820
},
{
"epoch": 2.11,
"learning_rate": 2.0591813911876535e-05,
"loss": 2.077,
"step": 18840
},
{
"epoch": 2.11,
"learning_rate": 2.047550883471259e-05,
"loss": 2.0744,
"step": 18860
},
{
"epoch": 2.11,
"learning_rate": 2.0359203757548644e-05,
"loss": 2.1086,
"step": 18880
},
{
"epoch": 2.11,
"learning_rate": 2.0242898680384697e-05,
"loss": 2.1172,
"step": 18900
},
{
"epoch": 2.12,
"learning_rate": 2.0126593603220753e-05,
"loss": 2.0531,
"step": 18920
},
{
"epoch": 2.12,
"learning_rate": 2.0010288526056806e-05,
"loss": 2.0392,
"step": 18940
},
{
"epoch": 2.12,
"learning_rate": 1.9893983448892866e-05,
"loss": 2.093,
"step": 18960
},
{
"epoch": 2.12,
"learning_rate": 1.977767837172892e-05,
"loss": 2.0838,
"step": 18980
},
{
"epoch": 2.12,
"learning_rate": 1.9661373294564975e-05,
"loss": 2.0741,
"step": 19000
},
{
"epoch": 2.13,
"learning_rate": 1.9545068217401027e-05,
"loss": 2.0726,
"step": 19020
},
{
"epoch": 2.13,
"learning_rate": 1.9428763140237084e-05,
"loss": 2.0095,
"step": 19040
},
{
"epoch": 2.13,
"learning_rate": 1.9312458063073136e-05,
"loss": 2.0646,
"step": 19060
},
{
"epoch": 2.13,
"learning_rate": 1.9196152985909193e-05,
"loss": 2.051,
"step": 19080
},
{
"epoch": 2.14,
"learning_rate": 1.9079847908745245e-05,
"loss": 2.0164,
"step": 19100
},
{
"epoch": 2.14,
"learning_rate": 1.8963542831581298e-05,
"loss": 2.0951,
"step": 19120
},
{
"epoch": 2.14,
"learning_rate": 1.8847237754417354e-05,
"loss": 2.0782,
"step": 19140
},
{
"epoch": 2.14,
"learning_rate": 1.8730932677253407e-05,
"loss": 2.0496,
"step": 19160
},
{
"epoch": 2.14,
"learning_rate": 1.8614627600089463e-05,
"loss": 2.0653,
"step": 19180
},
{
"epoch": 2.15,
"learning_rate": 1.8498322522925516e-05,
"loss": 2.0701,
"step": 19200
},
{
"epoch": 2.15,
"learning_rate": 1.8382017445761572e-05,
"loss": 2.0712,
"step": 19220
},
{
"epoch": 2.15,
"learning_rate": 1.826571236859763e-05,
"loss": 2.051,
"step": 19240
},
{
"epoch": 2.15,
"learning_rate": 1.816103779915008e-05,
"loss": 2.0425,
"step": 19260
},
{
"epoch": 2.16,
"learning_rate": 1.804473272198613e-05,
"loss": 2.0742,
"step": 19280
},
{
"epoch": 2.16,
"learning_rate": 1.7928427644822188e-05,
"loss": 2.066,
"step": 19300
},
{
"epoch": 2.16,
"learning_rate": 1.781212256765824e-05,
"loss": 2.0443,
"step": 19320
},
{
"epoch": 2.16,
"learning_rate": 1.7695817490494297e-05,
"loss": 2.0841,
"step": 19340
},
{
"epoch": 2.17,
"learning_rate": 1.757951241333035e-05,
"loss": 2.0769,
"step": 19360
},
{
"epoch": 2.17,
"learning_rate": 1.7463207336166406e-05,
"loss": 2.0911,
"step": 19380
},
{
"epoch": 2.17,
"learning_rate": 1.734690225900246e-05,
"loss": 2.032,
"step": 19400
},
{
"epoch": 2.17,
"learning_rate": 1.723059718183851e-05,
"loss": 2.056,
"step": 19420
},
{
"epoch": 2.17,
"learning_rate": 1.7114292104674568e-05,
"loss": 2.0717,
"step": 19440
},
{
"epoch": 2.18,
"learning_rate": 1.699798702751062e-05,
"loss": 2.0455,
"step": 19460
},
{
"epoch": 2.18,
"learning_rate": 1.6881681950346677e-05,
"loss": 2.0697,
"step": 19480
},
{
"epoch": 2.18,
"learning_rate": 1.6771192127040926e-05,
"loss": 2.0498,
"step": 19500
},
{
"epoch": 2.18,
"learning_rate": 1.6654887049876983e-05,
"loss": 2.0415,
"step": 19520
},
{
"epoch": 2.19,
"learning_rate": 1.6538581972713035e-05,
"loss": 2.0745,
"step": 19540
},
{
"epoch": 2.19,
"learning_rate": 1.6422276895549095e-05,
"loss": 2.0936,
"step": 19560
},
{
"epoch": 2.19,
"learning_rate": 1.6305971818385148e-05,
"loss": 2.0495,
"step": 19580
},
{
"epoch": 2.19,
"learning_rate": 1.61896667412212e-05,
"loss": 2.0615,
"step": 19600
},
{
"epoch": 2.19,
"learning_rate": 1.6073361664057257e-05,
"loss": 2.0566,
"step": 19620
},
{
"epoch": 2.2,
"learning_rate": 1.595705658689331e-05,
"loss": 2.064,
"step": 19640
},
{
"epoch": 2.2,
"learning_rate": 1.5840751509729366e-05,
"loss": 2.0304,
"step": 19660
},
{
"epoch": 2.2,
"learning_rate": 1.572444643256542e-05,
"loss": 2.0897,
"step": 19680
},
{
"epoch": 2.2,
"learning_rate": 1.5608141355401475e-05,
"loss": 2.069,
"step": 19700
},
{
"epoch": 2.21,
"learning_rate": 1.549183627823753e-05,
"loss": 2.0497,
"step": 19720
},
{
"epoch": 2.21,
"learning_rate": 1.5375531201073584e-05,
"loss": 2.0427,
"step": 19740
},
{
"epoch": 2.21,
"learning_rate": 1.525922612390964e-05,
"loss": 2.0802,
"step": 19760
},
{
"epoch": 2.21,
"learning_rate": 1.5142921046745693e-05,
"loss": 2.0825,
"step": 19780
},
{
"epoch": 2.21,
"learning_rate": 1.5026615969581747e-05,
"loss": 2.0549,
"step": 19800
},
{
"epoch": 2.22,
"learning_rate": 1.4910310892417802e-05,
"loss": 2.0846,
"step": 19820
},
{
"epoch": 2.22,
"learning_rate": 1.4794005815253856e-05,
"loss": 2.0599,
"step": 19840
},
{
"epoch": 2.22,
"learning_rate": 1.4677700738089911e-05,
"loss": 2.0198,
"step": 19860
},
{
"epoch": 2.22,
"learning_rate": 1.4561395660925965e-05,
"loss": 2.0434,
"step": 19880
},
{
"epoch": 2.23,
"learning_rate": 1.4445090583762022e-05,
"loss": 2.0567,
"step": 19900
},
{
"epoch": 2.23,
"learning_rate": 1.4328785506598076e-05,
"loss": 2.0546,
"step": 19920
},
{
"epoch": 2.23,
"learning_rate": 1.421248042943413e-05,
"loss": 2.0774,
"step": 19940
},
{
"epoch": 2.23,
"learning_rate": 1.4096175352270185e-05,
"loss": 2.0797,
"step": 19960
},
{
"epoch": 2.23,
"learning_rate": 1.397987027510624e-05,
"loss": 2.0676,
"step": 19980
},
{
"epoch": 2.24,
"learning_rate": 1.3863565197942292e-05,
"loss": 2.0514,
"step": 20000
},
{
"epoch": 2.24,
"learning_rate": 1.3747260120778347e-05,
"loss": 2.0721,
"step": 20020
},
{
"epoch": 2.24,
"learning_rate": 1.3630955043614403e-05,
"loss": 2.0641,
"step": 20040
},
{
"epoch": 2.24,
"learning_rate": 1.3514649966450458e-05,
"loss": 2.0452,
"step": 20060
},
{
"epoch": 2.25,
"learning_rate": 1.3398344889286512e-05,
"loss": 2.0922,
"step": 20080
},
{
"epoch": 2.25,
"learning_rate": 1.3282039812122567e-05,
"loss": 2.0634,
"step": 20100
},
{
"epoch": 2.25,
"learning_rate": 1.3165734734958621e-05,
"loss": 2.0843,
"step": 20120
},
{
"epoch": 2.25,
"learning_rate": 1.3049429657794676e-05,
"loss": 2.0678,
"step": 20140
},
{
"epoch": 2.25,
"learning_rate": 1.293312458063073e-05,
"loss": 2.0736,
"step": 20160
},
{
"epoch": 2.26,
"learning_rate": 1.2816819503466786e-05,
"loss": 2.068,
"step": 20180
},
{
"epoch": 2.26,
"learning_rate": 1.2700514426302839e-05,
"loss": 2.1375,
"step": 20200
},
{
"epoch": 2.26,
"learning_rate": 1.2584209349138894e-05,
"loss": 2.036,
"step": 20220
},
{
"epoch": 2.26,
"learning_rate": 1.2467904271974948e-05,
"loss": 2.0958,
"step": 20240
},
{
"epoch": 2.27,
"learning_rate": 1.2351599194811003e-05,
"loss": 2.0575,
"step": 20260
},
{
"epoch": 2.27,
"learning_rate": 1.2235294117647057e-05,
"loss": 2.022,
"step": 20280
},
{
"epoch": 2.27,
"learning_rate": 1.2118989040483112e-05,
"loss": 2.1452,
"step": 20300
},
{
"epoch": 2.27,
"learning_rate": 1.2002683963319168e-05,
"loss": 2.0374,
"step": 20320
},
{
"epoch": 2.27,
"learning_rate": 1.1886378886155222e-05,
"loss": 2.0504,
"step": 20340
},
{
"epoch": 2.28,
"learning_rate": 1.1770073808991277e-05,
"loss": 2.0327,
"step": 20360
},
{
"epoch": 2.28,
"learning_rate": 1.1653768731827331e-05,
"loss": 2.0743,
"step": 20380
},
{
"epoch": 2.28,
"learning_rate": 1.1537463654663386e-05,
"loss": 2.0258,
"step": 20400
},
{
"epoch": 2.28,
"learning_rate": 1.1421158577499439e-05,
"loss": 2.0431,
"step": 20420
},
{
"epoch": 2.29,
"learning_rate": 1.1304853500335493e-05,
"loss": 2.0772,
"step": 20440
},
{
"epoch": 2.29,
"learning_rate": 1.118854842317155e-05,
"loss": 2.046,
"step": 20460
},
{
"epoch": 2.29,
"learning_rate": 1.1072243346007604e-05,
"loss": 2.0498,
"step": 20480
},
{
"epoch": 2.29,
"learning_rate": 1.0955938268843658e-05,
"loss": 2.0543,
"step": 20500
},
{
"epoch": 2.29,
"learning_rate": 1.0839633191679713e-05,
"loss": 2.0573,
"step": 20520
},
{
"epoch": 2.3,
"learning_rate": 1.0723328114515767e-05,
"loss": 2.0651,
"step": 20540
},
{
"epoch": 2.3,
"learning_rate": 1.0607023037351822e-05,
"loss": 2.0172,
"step": 20560
},
{
"epoch": 2.3,
"learning_rate": 1.0490717960187878e-05,
"loss": 2.0421,
"step": 20580
},
{
"epoch": 2.3,
"learning_rate": 1.0374412883023932e-05,
"loss": 2.0252,
"step": 20600
},
{
"epoch": 2.31,
"learning_rate": 1.0258107805859985e-05,
"loss": 2.0479,
"step": 20620
},
{
"epoch": 2.31,
"learning_rate": 1.014180272869604e-05,
"loss": 2.0661,
"step": 20640
},
{
"epoch": 2.31,
"learning_rate": 1.0025497651532094e-05,
"loss": 2.0576,
"step": 20660
},
{
"epoch": 2.31,
"learning_rate": 9.909192574368149e-06,
"loss": 2.0088,
"step": 20680
},
{
"epoch": 2.31,
"learning_rate": 9.792887497204203e-06,
"loss": 2.0433,
"step": 20700
},
{
"epoch": 2.32,
"learning_rate": 9.67658242004026e-06,
"loss": 2.0522,
"step": 20720
},
{
"epoch": 2.32,
"learning_rate": 9.560277342876314e-06,
"loss": 2.0749,
"step": 20740
},
{
"epoch": 2.32,
"learning_rate": 9.443972265712368e-06,
"loss": 2.062,
"step": 20760
},
{
"epoch": 2.32,
"learning_rate": 9.327667188548423e-06,
"loss": 2.076,
"step": 20780
},
{
"epoch": 2.33,
"learning_rate": 9.211362111384477e-06,
"loss": 2.0607,
"step": 20800
},
{
"epoch": 2.33,
"learning_rate": 9.095057034220532e-06,
"loss": 2.0379,
"step": 20820
},
{
"epoch": 2.33,
"learning_rate": 8.978751957056585e-06,
"loss": 2.0791,
"step": 20840
},
{
"epoch": 2.33,
"learning_rate": 8.862446879892641e-06,
"loss": 2.0544,
"step": 20860
},
{
"epoch": 2.34,
"learning_rate": 8.746141802728695e-06,
"loss": 2.028,
"step": 20880
},
{
"epoch": 2.34,
"learning_rate": 8.62983672556475e-06,
"loss": 2.0247,
"step": 20900
},
{
"epoch": 2.34,
"learning_rate": 8.513531648400804e-06,
"loss": 2.0644,
"step": 20920
},
{
"epoch": 2.34,
"learning_rate": 8.397226571236859e-06,
"loss": 2.0914,
"step": 20940
},
{
"epoch": 2.34,
"learning_rate": 8.280921494072913e-06,
"loss": 2.0588,
"step": 20960
},
{
"epoch": 2.35,
"learning_rate": 8.164616416908968e-06,
"loss": 2.082,
"step": 20980
},
{
"epoch": 2.35,
"learning_rate": 8.048311339745022e-06,
"loss": 2.0454,
"step": 21000
},
{
"epoch": 2.35,
"learning_rate": 7.932006262581077e-06,
"loss": 2.0487,
"step": 21020
},
{
"epoch": 2.35,
"learning_rate": 7.815701185417131e-06,
"loss": 2.073,
"step": 21040
},
{
"epoch": 2.36,
"learning_rate": 7.699396108253186e-06,
"loss": 2.0735,
"step": 21060
},
{
"epoch": 2.36,
"learning_rate": 7.583091031089241e-06,
"loss": 2.1009,
"step": 21080
},
{
"epoch": 2.36,
"learning_rate": 7.466785953925296e-06,
"loss": 2.0592,
"step": 21100
},
{
"epoch": 2.36,
"learning_rate": 7.35048087676135e-06,
"loss": 2.0746,
"step": 21120
},
{
"epoch": 2.36,
"learning_rate": 7.234175799597405e-06,
"loss": 2.0362,
"step": 21140
},
{
"epoch": 2.37,
"learning_rate": 7.117870722433459e-06,
"loss": 2.0595,
"step": 21160
},
{
"epoch": 2.37,
"learning_rate": 7.001565645269515e-06,
"loss": 2.0417,
"step": 21180
},
{
"epoch": 2.37,
"learning_rate": 6.885260568105568e-06,
"loss": 2.0312,
"step": 21200
},
{
"epoch": 2.37,
"learning_rate": 6.768955490941623e-06,
"loss": 2.0407,
"step": 21220
},
{
"epoch": 2.38,
"learning_rate": 6.652650413777678e-06,
"loss": 2.0756,
"step": 21240
},
{
"epoch": 2.38,
"learning_rate": 6.536345336613733e-06,
"loss": 2.0649,
"step": 21260
},
{
"epoch": 2.38,
"learning_rate": 6.420040259449787e-06,
"loss": 2.0822,
"step": 21280
},
{
"epoch": 2.38,
"learning_rate": 6.303735182285841e-06,
"loss": 2.0615,
"step": 21300
},
{
"epoch": 2.38,
"learning_rate": 6.187430105121896e-06,
"loss": 2.0602,
"step": 21320
},
{
"epoch": 2.39,
"learning_rate": 6.071125027957951e-06,
"loss": 2.0534,
"step": 21340
},
{
"epoch": 2.39,
"learning_rate": 5.954819950794005e-06,
"loss": 2.0889,
"step": 21360
},
{
"epoch": 2.39,
"learning_rate": 5.8385148736300605e-06,
"loss": 2.0617,
"step": 21380
},
{
"epoch": 2.39,
"learning_rate": 5.722209796466114e-06,
"loss": 2.0512,
"step": 21400
},
{
"epoch": 2.4,
"learning_rate": 5.605904719302169e-06,
"loss": 2.0741,
"step": 21420
},
{
"epoch": 2.4,
"learning_rate": 5.489599642138223e-06,
"loss": 2.1048,
"step": 21440
},
{
"epoch": 2.4,
"learning_rate": 5.3732945649742785e-06,
"loss": 2.0448,
"step": 21460
},
{
"epoch": 2.4,
"learning_rate": 5.256989487810333e-06,
"loss": 2.0922,
"step": 21480
},
{
"epoch": 2.4,
"learning_rate": 5.152314918362782e-06,
"loss": 2.0596,
"step": 21500
},
{
"epoch": 2.41,
"learning_rate": 5.036009841198836e-06,
"loss": 2.0856,
"step": 21520
},
{
"epoch": 2.41,
"learning_rate": 4.9197047640348906e-06,
"loss": 2.0461,
"step": 21540
},
{
"epoch": 2.41,
"learning_rate": 4.803399686870946e-06,
"loss": 2.0339,
"step": 21560
},
{
"epoch": 2.41,
"learning_rate": 4.687094609707e-06,
"loss": 2.0895,
"step": 21580
},
{
"epoch": 2.42,
"learning_rate": 4.570789532543055e-06,
"loss": 2.0713,
"step": 21600
},
{
"epoch": 2.42,
"learning_rate": 4.454484455379109e-06,
"loss": 2.0354,
"step": 21620
},
{
"epoch": 2.42,
"learning_rate": 4.338179378215164e-06,
"loss": 2.0733,
"step": 21640
},
{
"epoch": 2.42,
"learning_rate": 4.221874301051218e-06,
"loss": 2.041,
"step": 21660
},
{
"epoch": 2.42,
"learning_rate": 4.105569223887274e-06,
"loss": 2.0864,
"step": 21680
},
{
"epoch": 2.43,
"learning_rate": 3.989264146723327e-06,
"loss": 2.0765,
"step": 21700
},
{
"epoch": 2.43,
"learning_rate": 3.872959069559383e-06,
"loss": 2.0272,
"step": 21720
},
{
"epoch": 2.43,
"learning_rate": 3.756653992395437e-06,
"loss": 2.1094,
"step": 21740
},
{
"epoch": 2.43,
"learning_rate": 3.6403489152314913e-06,
"loss": 2.0012,
"step": 21760
},
{
"epoch": 2.44,
"learning_rate": 3.5240438380675463e-06,
"loss": 2.0396,
"step": 21780
},
{
"epoch": 2.44,
"learning_rate": 3.4077387609036008e-06,
"loss": 2.0714,
"step": 21800
},
{
"epoch": 2.44,
"learning_rate": 3.2914336837396553e-06,
"loss": 2.0882,
"step": 21820
},
{
"epoch": 2.44,
"learning_rate": 3.1751286065757098e-06,
"loss": 2.0789,
"step": 21840
},
{
"epoch": 2.44,
"learning_rate": 3.0588235294117643e-06,
"loss": 2.0701,
"step": 21860
},
{
"epoch": 2.45,
"learning_rate": 2.9483337061060162e-06,
"loss": 2.0979,
"step": 21880
},
{
"epoch": 2.45,
"learning_rate": 2.8320286289420707e-06,
"loss": 2.0812,
"step": 21900
},
{
"epoch": 2.45,
"learning_rate": 2.7157235517781256e-06,
"loss": 2.1,
"step": 21920
},
{
"epoch": 2.45,
"learning_rate": 2.59941847461418e-06,
"loss": 2.0769,
"step": 21940
},
{
"epoch": 2.46,
"learning_rate": 2.4831133974502346e-06,
"loss": 2.0691,
"step": 21960
},
{
"epoch": 2.46,
"learning_rate": 2.366808320286289e-06,
"loss": 2.0583,
"step": 21980
},
{
"epoch": 2.46,
"learning_rate": 2.2505032431223436e-06,
"loss": 2.057,
"step": 22000
},
{
"epoch": 2.46,
"learning_rate": 2.1341981659583986e-06,
"loss": 2.1085,
"step": 22020
},
{
"epoch": 2.46,
"learning_rate": 2.017893088794453e-06,
"loss": 2.0809,
"step": 22040
},
{
"epoch": 2.47,
"learning_rate": 1.9015880116305076e-06,
"loss": 2.0627,
"step": 22060
},
{
"epoch": 2.47,
"learning_rate": 1.7852829344665623e-06,
"loss": 2.0509,
"step": 22080
},
{
"epoch": 2.47,
"learning_rate": 1.6689778573026168e-06,
"loss": 2.0466,
"step": 22100
},
{
"epoch": 2.47,
"learning_rate": 1.5526727801386713e-06,
"loss": 2.0756,
"step": 22120
},
{
"epoch": 2.48,
"learning_rate": 1.4363677029747258e-06,
"loss": 2.038,
"step": 22140
},
{
"epoch": 2.48,
"learning_rate": 1.3200626258107805e-06,
"loss": 2.0362,
"step": 22160
},
{
"epoch": 2.48,
"learning_rate": 1.203757548646835e-06,
"loss": 2.0785,
"step": 22180
},
{
"epoch": 2.48,
"learning_rate": 1.0874524714828897e-06,
"loss": 2.0653,
"step": 22200
},
{
"epoch": 2.48,
"learning_rate": 9.711473943189442e-07,
"loss": 2.1218,
"step": 22220
},
{
"epoch": 2.49,
"learning_rate": 8.548423171549988e-07,
"loss": 2.0656,
"step": 22240
},
{
"epoch": 2.49,
"learning_rate": 7.385372399910534e-07,
"loss": 2.0818,
"step": 22260
},
{
"epoch": 2.49,
"learning_rate": 6.222321628271079e-07,
"loss": 2.0546,
"step": 22280
},
{
"epoch": 2.49,
"learning_rate": 5.059270856631625e-07,
"loss": 2.0465,
"step": 22300
},
{
"epoch": 2.5,
"learning_rate": 3.8962200849921713e-07,
"loss": 2.0968,
"step": 22320
},
{
"epoch": 2.5,
"learning_rate": 2.7331693133527173e-07,
"loss": 2.038,
"step": 22340
},
{
"epoch": 2.5,
"step": 22355,
"total_flos": 2.924979686683443e+17,
"train_loss": 2.08698912321708,
"train_runtime": 8788.5423,
"train_samples_per_second": 15.261,
"train_steps_per_second": 2.544
}
],
"logging_steps": 20,
"max_steps": 22355,
"num_train_epochs": 3,
"save_steps": 5000,
"total_flos": 2.924979686683443e+17,
"trial_name": null,
"trial_params": null
}