gpt2-large-fine-tuned-context-256 / trainer_state.json
dwojcik's picture
Upload 12 files
703bdc7
raw
history blame
48.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.411764705882353,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.9999999999999997e-05,
"loss": 4.4008,
"step": 1
},
{
"epoch": 0.05,
"learning_rate": 5.9999999999999995e-05,
"loss": 4.1205,
"step": 2
},
{
"epoch": 0.07,
"learning_rate": 8.999999999999999e-05,
"loss": 3.856,
"step": 3
},
{
"epoch": 0.09,
"learning_rate": 0.00011999999999999999,
"loss": 4.2166,
"step": 4
},
{
"epoch": 0.12,
"learning_rate": 0.00015,
"loss": 3.853,
"step": 5
},
{
"epoch": 0.14,
"learning_rate": 0.00017999999999999998,
"loss": 3.8786,
"step": 6
},
{
"epoch": 0.16,
"learning_rate": 0.00020999999999999998,
"loss": 3.7177,
"step": 7
},
{
"epoch": 0.19,
"learning_rate": 0.00023999999999999998,
"loss": 4.0304,
"step": 8
},
{
"epoch": 0.21,
"learning_rate": 0.00027,
"loss": 3.8415,
"step": 9
},
{
"epoch": 0.24,
"learning_rate": 0.0003,
"loss": 3.8809,
"step": 10
},
{
"epoch": 0.26,
"learning_rate": 0.000299995596569254,
"loss": 3.8165,
"step": 11
},
{
"epoch": 0.28,
"learning_rate": 0.0002999823865355522,
"loss": 3.672,
"step": 12
},
{
"epoch": 0.31,
"learning_rate": 0.0002999603706744874,
"loss": 3.6258,
"step": 13
},
{
"epoch": 0.33,
"learning_rate": 0.00029992955027866394,
"loss": 3.7398,
"step": 14
},
{
"epoch": 0.35,
"learning_rate": 0.00029988992715762147,
"loss": 3.7463,
"step": 15
},
{
"epoch": 0.38,
"learning_rate": 0.000299841503637729,
"loss": 3.5098,
"step": 16
},
{
"epoch": 0.4,
"learning_rate": 0.0002997842825620479,
"loss": 3.6305,
"step": 17
},
{
"epoch": 0.42,
"learning_rate": 0.0002997182672901657,
"loss": 3.4217,
"step": 18
},
{
"epoch": 0.45,
"learning_rate": 0.00029964346169799786,
"loss": 3.2482,
"step": 19
},
{
"epoch": 0.47,
"learning_rate": 0.000299559870177561,
"loss": 3.0741,
"step": 20
},
{
"epoch": 0.49,
"learning_rate": 0.0002994674976367149,
"loss": 3.8112,
"step": 21
},
{
"epoch": 0.52,
"learning_rate": 0.0002993663494988739,
"loss": 3.3994,
"step": 22
},
{
"epoch": 0.54,
"learning_rate": 0.0002992564317026891,
"loss": 3.2294,
"step": 23
},
{
"epoch": 0.56,
"learning_rate": 0.00029913775070169893,
"loss": 3.0507,
"step": 24
},
{
"epoch": 0.59,
"learning_rate": 0.0002990103134639512,
"loss": 3.3722,
"step": 25
},
{
"epoch": 0.61,
"learning_rate": 0.0002988741274715932,
"loss": 3.2097,
"step": 26
},
{
"epoch": 0.64,
"learning_rate": 0.00029872920072043275,
"loss": 3.4514,
"step": 27
},
{
"epoch": 0.66,
"learning_rate": 0.00029857554171946863,
"loss": 3.7536,
"step": 28
},
{
"epoch": 0.68,
"learning_rate": 0.00029841315949039114,
"loss": 3.5086,
"step": 29
},
{
"epoch": 0.71,
"learning_rate": 0.0002982420635670523,
"loss": 3.6062,
"step": 30
},
{
"epoch": 0.73,
"learning_rate": 0.000298062263994906,
"loss": 3.5467,
"step": 31
},
{
"epoch": 0.75,
"learning_rate": 0.0002978737713304185,
"loss": 3.6129,
"step": 32
},
{
"epoch": 0.78,
"learning_rate": 0.0002976765966404484,
"loss": 3.6464,
"step": 33
},
{
"epoch": 0.8,
"learning_rate": 0.0002974707515015969,
"loss": 3.4577,
"step": 34
},
{
"epoch": 0.82,
"learning_rate": 0.0002972562479995282,
"loss": 3.3455,
"step": 35
},
{
"epoch": 0.85,
"learning_rate": 0.0002970330987282599,
"loss": 3.6202,
"step": 36
},
{
"epoch": 0.87,
"learning_rate": 0.0002968013167894234,
"loss": 3.1719,
"step": 37
},
{
"epoch": 0.89,
"learning_rate": 0.00029656091579149485,
"loss": 3.2336,
"step": 38
},
{
"epoch": 0.92,
"learning_rate": 0.0002963119098489964,
"loss": 3.7547,
"step": 39
},
{
"epoch": 0.94,
"learning_rate": 0.00029605431358166684,
"loss": 3.419,
"step": 40
},
{
"epoch": 0.96,
"learning_rate": 0.00029578814211360393,
"loss": 3.5237,
"step": 41
},
{
"epoch": 0.99,
"learning_rate": 0.00029551341107237597,
"loss": 2.983,
"step": 42
},
{
"epoch": 1.01,
"learning_rate": 0.00029523013658810444,
"loss": 2.863,
"step": 43
},
{
"epoch": 1.04,
"learning_rate": 0.00029493833529251707,
"loss": 2.4656,
"step": 44
},
{
"epoch": 1.06,
"learning_rate": 0.00029463802431797115,
"loss": 2.1783,
"step": 45
},
{
"epoch": 1.08,
"learning_rate": 0.0002943292212964476,
"loss": 2.4046,
"step": 46
},
{
"epoch": 1.11,
"learning_rate": 0.00029401194435851614,
"loss": 2.4341,
"step": 47
},
{
"epoch": 1.13,
"learning_rate": 0.00029368621213227044,
"loss": 2.2513,
"step": 48
},
{
"epoch": 1.15,
"learning_rate": 0.00029335204374223437,
"loss": 2.2957,
"step": 49
},
{
"epoch": 1.18,
"learning_rate": 0.00029300945880823956,
"loss": 2.6274,
"step": 50
},
{
"epoch": 1.2,
"learning_rate": 0.00029265847744427303,
"loss": 2.2075,
"step": 51
},
{
"epoch": 1.22,
"learning_rate": 0.00029229912025729646,
"loss": 2.2498,
"step": 52
},
{
"epoch": 1.25,
"learning_rate": 0.00029193140834603645,
"loss": 2.3298,
"step": 53
},
{
"epoch": 1.27,
"learning_rate": 0.0002915553632997454,
"loss": 1.9839,
"step": 54
},
{
"epoch": 1.29,
"learning_rate": 0.0002911710071969342,
"loss": 1.9872,
"step": 55
},
{
"epoch": 1.32,
"learning_rate": 0.000290778362604076,
"loss": 2.1115,
"step": 56
},
{
"epoch": 1.34,
"learning_rate": 0.0002903774525742811,
"loss": 2.3718,
"step": 57
},
{
"epoch": 1.36,
"learning_rate": 0.00028996830064594335,
"loss": 2.5018,
"step": 58
},
{
"epoch": 1.39,
"learning_rate": 0.0002895509308413587,
"loss": 2.5214,
"step": 59
},
{
"epoch": 1.41,
"learning_rate": 0.0002891253676653142,
"loss": 2.3296,
"step": 60
},
{
"epoch": 1.44,
"learning_rate": 0.0002886916361036494,
"loss": 2.575,
"step": 61
},
{
"epoch": 1.46,
"learning_rate": 0.0002882497616217896,
"loss": 2.5662,
"step": 62
},
{
"epoch": 1.48,
"learning_rate": 0.0002877997701632505,
"loss": 2.4488,
"step": 63
},
{
"epoch": 1.51,
"learning_rate": 0.0002873416881481151,
"loss": 2.6527,
"step": 64
},
{
"epoch": 1.53,
"learning_rate": 0.00028687554247148247,
"loss": 2.2993,
"step": 65
},
{
"epoch": 1.55,
"learning_rate": 0.0002864013605018887,
"loss": 2.2061,
"step": 66
},
{
"epoch": 1.58,
"learning_rate": 0.00028591917007969993,
"loss": 2.398,
"step": 67
},
{
"epoch": 1.6,
"learning_rate": 0.00028542899951547793,
"loss": 2.4864,
"step": 68
},
{
"epoch": 1.62,
"learning_rate": 0.0002849308775883178,
"loss": 2.0955,
"step": 69
},
{
"epoch": 1.65,
"learning_rate": 0.0002844248335441583,
"loss": 2.1742,
"step": 70
},
{
"epoch": 1.67,
"learning_rate": 0.00028391089709406484,
"loss": 2.4512,
"step": 71
},
{
"epoch": 1.69,
"learning_rate": 0.00028338909841248497,
"loss": 2.4987,
"step": 72
},
{
"epoch": 1.72,
"learning_rate": 0.0002828594681354768,
"loss": 2.3906,
"step": 73
},
{
"epoch": 1.74,
"learning_rate": 0.00028232203735891023,
"loss": 2.5329,
"step": 74
},
{
"epoch": 1.76,
"learning_rate": 0.00028177683763664133,
"loss": 2.4044,
"step": 75
},
{
"epoch": 1.79,
"learning_rate": 0.0002812239009786597,
"loss": 2.3984,
"step": 76
},
{
"epoch": 1.81,
"learning_rate": 0.00028066325984920916,
"loss": 2.4416,
"step": 77
},
{
"epoch": 1.84,
"learning_rate": 0.00028009494716488146,
"loss": 2.4914,
"step": 78
},
{
"epoch": 1.86,
"learning_rate": 0.00027951899629268385,
"loss": 2.2867,
"step": 79
},
{
"epoch": 1.88,
"learning_rate": 0.0002789354410480802,
"loss": 2.2848,
"step": 80
},
{
"epoch": 1.91,
"learning_rate": 0.0002783443156930051,
"loss": 2.2625,
"step": 81
},
{
"epoch": 1.93,
"learning_rate": 0.00027774565493385273,
"loss": 2.4286,
"step": 82
},
{
"epoch": 1.95,
"learning_rate": 0.0002771394939194392,
"loss": 2.4524,
"step": 83
},
{
"epoch": 1.98,
"learning_rate": 0.0002765258682389382,
"loss": 2.3957,
"step": 84
},
{
"epoch": 2.0,
"learning_rate": 0.0002759048139197925,
"loss": 2.3202,
"step": 85
},
{
"epoch": 2.02,
"learning_rate": 0.0002752763674255977,
"loss": 1.6208,
"step": 86
},
{
"epoch": 2.05,
"learning_rate": 0.000274640565653962,
"loss": 1.6103,
"step": 87
},
{
"epoch": 2.07,
"learning_rate": 0.00027399744593433986,
"loss": 1.2998,
"step": 88
},
{
"epoch": 2.09,
"learning_rate": 0.0002733470460258397,
"loss": 1.4186,
"step": 89
},
{
"epoch": 2.12,
"learning_rate": 0.00027268940411500763,
"loss": 1.221,
"step": 90
},
{
"epoch": 2.14,
"learning_rate": 0.00027202455881358514,
"loss": 1.3517,
"step": 91
},
{
"epoch": 2.16,
"learning_rate": 0.0002713525491562421,
"loss": 1.4388,
"step": 92
},
{
"epoch": 2.19,
"learning_rate": 0.000270673414598285,
"loss": 1.301,
"step": 93
},
{
"epoch": 2.21,
"learning_rate": 0.0002699871950133404,
"loss": 1.4386,
"step": 94
},
{
"epoch": 2.24,
"learning_rate": 0.000269293930691014,
"loss": 1.399,
"step": 95
},
{
"epoch": 2.26,
"learning_rate": 0.0002685936623345247,
"loss": 1.4381,
"step": 96
},
{
"epoch": 2.28,
"learning_rate": 0.0002678864310583154,
"loss": 1.2775,
"step": 97
},
{
"epoch": 2.31,
"learning_rate": 0.0002671722783856388,
"loss": 1.4458,
"step": 98
},
{
"epoch": 2.33,
"learning_rate": 0.00026645124624611927,
"loss": 1.3532,
"step": 99
},
{
"epoch": 2.35,
"learning_rate": 0.00026572337697329144,
"loss": 1.2288,
"step": 100
},
{
"epoch": 2.38,
"learning_rate": 0.0002649887133021144,
"loss": 1.4598,
"step": 101
},
{
"epoch": 2.4,
"learning_rate": 0.0002642472983664628,
"loss": 1.4812,
"step": 102
},
{
"epoch": 2.42,
"learning_rate": 0.00026349917569659426,
"loss": 1.3315,
"step": 103
},
{
"epoch": 2.45,
"learning_rate": 0.0002627443892165937,
"loss": 1.4532,
"step": 104
},
{
"epoch": 2.47,
"learning_rate": 0.00026198298324179437,
"loss": 1.3674,
"step": 105
},
{
"epoch": 2.49,
"learning_rate": 0.000261215002476176,
"loss": 1.2486,
"step": 106
},
{
"epoch": 2.52,
"learning_rate": 0.00026044049200974004,
"loss": 1.2998,
"step": 107
},
{
"epoch": 2.54,
"learning_rate": 0.00025965949731586257,
"loss": 1.4161,
"step": 108
},
{
"epoch": 2.56,
"learning_rate": 0.0002588720642486242,
"loss": 1.4535,
"step": 109
},
{
"epoch": 2.59,
"learning_rate": 0.000258078239040118,
"loss": 1.2735,
"step": 110
},
{
"epoch": 2.61,
"learning_rate": 0.0002572780682977351,
"loss": 1.2421,
"step": 111
},
{
"epoch": 2.64,
"learning_rate": 0.000256471599001428,
"loss": 1.4072,
"step": 112
},
{
"epoch": 2.66,
"learning_rate": 0.0002556588785009528,
"loss": 1.4291,
"step": 113
},
{
"epoch": 2.68,
"learning_rate": 0.0002548399545130886,
"loss": 1.3882,
"step": 114
},
{
"epoch": 2.71,
"learning_rate": 0.0002540148751188362,
"loss": 1.6521,
"step": 115
},
{
"epoch": 2.73,
"learning_rate": 0.00025318368876059546,
"loss": 1.3777,
"step": 116
},
{
"epoch": 2.75,
"learning_rate": 0.0002523464442393204,
"loss": 1.3828,
"step": 117
},
{
"epoch": 2.78,
"learning_rate": 0.0002515031907116547,
"loss": 1.5907,
"step": 118
},
{
"epoch": 2.8,
"learning_rate": 0.0002506539776870451,
"loss": 1.2091,
"step": 119
},
{
"epoch": 2.82,
"learning_rate": 0.00024979885502483476,
"loss": 1.3796,
"step": 120
},
{
"epoch": 2.85,
"learning_rate": 0.0002489378729313361,
"loss": 1.5818,
"step": 121
},
{
"epoch": 2.87,
"learning_rate": 0.00024807108195688273,
"loss": 1.4737,
"step": 122
},
{
"epoch": 2.89,
"learning_rate": 0.0002471985329928617,
"loss": 1.4355,
"step": 123
},
{
"epoch": 2.92,
"learning_rate": 0.00024632027726872535,
"loss": 1.2755,
"step": 124
},
{
"epoch": 2.94,
"learning_rate": 0.00024543636634898394,
"loss": 1.3296,
"step": 125
},
{
"epoch": 2.96,
"learning_rate": 0.00024454685213017767,
"loss": 1.3869,
"step": 126
},
{
"epoch": 2.99,
"learning_rate": 0.00024365178683783008,
"loss": 1.2251,
"step": 127
},
{
"epoch": 3.01,
"learning_rate": 0.00024275122302338143,
"loss": 1.2315,
"step": 128
},
{
"epoch": 3.04,
"learning_rate": 0.00024184521356110367,
"loss": 0.8248,
"step": 129
},
{
"epoch": 3.06,
"learning_rate": 0.00024093381164499568,
"loss": 0.6754,
"step": 130
},
{
"epoch": 3.08,
"learning_rate": 0.0002400170707856605,
"loss": 0.8092,
"step": 131
},
{
"epoch": 3.11,
"learning_rate": 0.00023909504480716317,
"loss": 0.8068,
"step": 132
},
{
"epoch": 3.13,
"learning_rate": 0.00023816778784387094,
"loss": 0.7371,
"step": 133
},
{
"epoch": 3.15,
"learning_rate": 0.00023723535433727485,
"loss": 0.8762,
"step": 134
},
{
"epoch": 3.18,
"learning_rate": 0.0002362977990327931,
"loss": 0.4843,
"step": 135
},
{
"epoch": 3.2,
"learning_rate": 0.00023535517697655708,
"loss": 0.662,
"step": 136
},
{
"epoch": 3.22,
"learning_rate": 0.0002344075435121794,
"loss": 0.7358,
"step": 137
},
{
"epoch": 3.25,
"learning_rate": 0.0002334549542775045,
"loss": 0.7616,
"step": 138
},
{
"epoch": 3.27,
"learning_rate": 0.00023249746520134201,
"loss": 0.6971,
"step": 139
},
{
"epoch": 3.29,
"learning_rate": 0.00023153513250018316,
"loss": 0.9456,
"step": 140
},
{
"epoch": 3.32,
"learning_rate": 0.00023056801267489995,
"loss": 0.7159,
"step": 141
},
{
"epoch": 3.34,
"learning_rate": 0.000229596162507428,
"loss": 0.7363,
"step": 142
},
{
"epoch": 3.36,
"learning_rate": 0.00022861963905743281,
"loss": 0.7258,
"step": 143
},
{
"epoch": 3.39,
"learning_rate": 0.00022763849965895942,
"loss": 0.6632,
"step": 144
},
{
"epoch": 3.41,
"learning_rate": 0.00022665280191706653,
"loss": 0.6326,
"step": 145
},
{
"epoch": 3.44,
"learning_rate": 0.00022566260370444395,
"loss": 0.6292,
"step": 146
},
{
"epoch": 3.46,
"learning_rate": 0.00022466796315801508,
"loss": 0.6591,
"step": 147
},
{
"epoch": 3.48,
"learning_rate": 0.00022366893867552346,
"loss": 0.7075,
"step": 148
},
{
"epoch": 3.51,
"learning_rate": 0.00022266558891210402,
"loss": 0.6943,
"step": 149
},
{
"epoch": 3.53,
"learning_rate": 0.0002216579727768394,
"loss": 0.6478,
"step": 150
},
{
"epoch": 3.55,
"learning_rate": 0.00022064614942930122,
"loss": 0.6094,
"step": 151
},
{
"epoch": 3.58,
"learning_rate": 0.00021963017827607666,
"loss": 0.6405,
"step": 152
},
{
"epoch": 3.6,
"learning_rate": 0.00021861011896728052,
"loss": 0.6633,
"step": 153
},
{
"epoch": 3.62,
"learning_rate": 0.00021758603139305314,
"loss": 0.6685,
"step": 154
},
{
"epoch": 3.65,
"learning_rate": 0.00021655797568004395,
"loss": 0.6076,
"step": 155
},
{
"epoch": 3.67,
"learning_rate": 0.00021552601218788146,
"loss": 0.5769,
"step": 156
},
{
"epoch": 3.69,
"learning_rate": 0.00021449020150562928,
"loss": 0.6439,
"step": 157
},
{
"epoch": 3.72,
"learning_rate": 0.00021345060444822879,
"loss": 0.6128,
"step": 158
},
{
"epoch": 3.74,
"learning_rate": 0.00021240728205292863,
"loss": 0.723,
"step": 159
},
{
"epoch": 3.76,
"learning_rate": 0.000211360295575701,
"loss": 0.5633,
"step": 160
},
{
"epoch": 3.79,
"learning_rate": 0.00021030970648764505,
"loss": 0.7373,
"step": 161
},
{
"epoch": 3.81,
"learning_rate": 0.0002092555764713781,
"loss": 0.5987,
"step": 162
},
{
"epoch": 3.84,
"learning_rate": 0.00020819796741741375,
"loss": 0.7485,
"step": 163
},
{
"epoch": 3.86,
"learning_rate": 0.00020713694142052838,
"loss": 0.8548,
"step": 164
},
{
"epoch": 3.88,
"learning_rate": 0.00020607256077611528,
"loss": 0.6598,
"step": 165
},
{
"epoch": 3.91,
"learning_rate": 0.0002050048879765272,
"loss": 0.676,
"step": 166
},
{
"epoch": 3.93,
"learning_rate": 0.00020393398570740716,
"loss": 0.6663,
"step": 167
},
{
"epoch": 3.95,
"learning_rate": 0.00020285991684400827,
"loss": 0.7429,
"step": 168
},
{
"epoch": 3.98,
"learning_rate": 0.00020178274444750187,
"loss": 0.6373,
"step": 169
},
{
"epoch": 4.0,
"learning_rate": 0.0002007025317612754,
"loss": 0.6915,
"step": 170
},
{
"epoch": 4.02,
"learning_rate": 0.00019961934220721883,
"loss": 0.322,
"step": 171
},
{
"epoch": 4.05,
"learning_rate": 0.00019853323938200134,
"loss": 0.333,
"step": 172
},
{
"epoch": 4.07,
"learning_rate": 0.00019744428705333728,
"loss": 0.3354,
"step": 173
},
{
"epoch": 4.09,
"learning_rate": 0.0001963525491562421,
"loss": 0.3078,
"step": 174
},
{
"epoch": 4.12,
"learning_rate": 0.00019525808978927887,
"loss": 0.3663,
"step": 175
},
{
"epoch": 4.14,
"learning_rate": 0.00019416097321079448,
"loss": 0.3445,
"step": 176
},
{
"epoch": 4.16,
"learning_rate": 0.00019306126383514737,
"loss": 0.3903,
"step": 177
},
{
"epoch": 4.19,
"learning_rate": 0.00019195902622892518,
"loss": 0.2701,
"step": 178
},
{
"epoch": 4.21,
"learning_rate": 0.0001908543251071541,
"loss": 0.3602,
"step": 179
},
{
"epoch": 4.24,
"learning_rate": 0.00018974722532949927,
"loss": 0.381,
"step": 180
},
{
"epoch": 4.26,
"learning_rate": 0.00018863779189645666,
"loss": 0.3546,
"step": 181
},
{
"epoch": 4.28,
"learning_rate": 0.00018752608994553678,
"loss": 0.3365,
"step": 182
},
{
"epoch": 4.31,
"learning_rate": 0.00018641218474744039,
"loss": 0.3302,
"step": 183
},
{
"epoch": 4.33,
"learning_rate": 0.0001852961417022261,
"loss": 0.2817,
"step": 184
},
{
"epoch": 4.35,
"learning_rate": 0.00018417802633547065,
"loss": 0.3544,
"step": 185
},
{
"epoch": 4.38,
"learning_rate": 0.00018305790429442182,
"loss": 0.3249,
"step": 186
},
{
"epoch": 4.4,
"learning_rate": 0.0001819358413441441,
"loss": 0.354,
"step": 187
},
{
"epoch": 4.42,
"learning_rate": 0.00018081190336365744,
"loss": 0.2666,
"step": 188
},
{
"epoch": 4.45,
"learning_rate": 0.00017968615634206928,
"loss": 0.2664,
"step": 189
},
{
"epoch": 4.47,
"learning_rate": 0.00017855866637470023,
"loss": 0.37,
"step": 190
},
{
"epoch": 4.49,
"learning_rate": 0.0001774294996592035,
"loss": 0.3381,
"step": 191
},
{
"epoch": 4.52,
"learning_rate": 0.00017629872249167816,
"loss": 0.3329,
"step": 192
},
{
"epoch": 4.54,
"learning_rate": 0.0001751664012627768,
"loss": 0.2704,
"step": 193
},
{
"epoch": 4.56,
"learning_rate": 0.00017403260245380762,
"loss": 0.3881,
"step": 194
},
{
"epoch": 4.59,
"learning_rate": 0.00017289739263283115,
"loss": 0.4599,
"step": 195
},
{
"epoch": 4.61,
"learning_rate": 0.00017176083845075172,
"loss": 0.2881,
"step": 196
},
{
"epoch": 4.64,
"learning_rate": 0.0001706230066374044,
"loss": 0.343,
"step": 197
},
{
"epoch": 4.66,
"learning_rate": 0.00016948396399763704,
"loss": 0.4128,
"step": 198
},
{
"epoch": 4.68,
"learning_rate": 0.0001683437774073881,
"loss": 0.2909,
"step": 199
},
{
"epoch": 4.71,
"learning_rate": 0.00016720251380976007,
"loss": 0.4494,
"step": 200
},
{
"epoch": 4.73,
"learning_rate": 0.0001660602402110891,
"loss": 0.2766,
"step": 201
},
{
"epoch": 4.75,
"learning_rate": 0.00016491702367701103,
"loss": 0.3148,
"step": 202
},
{
"epoch": 4.78,
"learning_rate": 0.0001637729313285237,
"loss": 0.295,
"step": 203
},
{
"epoch": 4.8,
"learning_rate": 0.00016262803033804604,
"loss": 0.3496,
"step": 204
},
{
"epoch": 4.82,
"learning_rate": 0.0001614823879254744,
"loss": 0.2802,
"step": 205
},
{
"epoch": 4.85,
"learning_rate": 0.0001603360713542356,
"loss": 0.349,
"step": 206
},
{
"epoch": 4.87,
"learning_rate": 0.0001591891479273383,
"loss": 0.5714,
"step": 207
},
{
"epoch": 4.89,
"learning_rate": 0.00015804168498342083,
"loss": 0.3354,
"step": 208
},
{
"epoch": 4.92,
"learning_rate": 0.00015689374989279797,
"loss": 0.3117,
"step": 209
},
{
"epoch": 4.94,
"learning_rate": 0.0001557454100535053,
"loss": 0.3284,
"step": 210
},
{
"epoch": 4.96,
"learning_rate": 0.0001545967328873423,
"loss": 0.4018,
"step": 211
},
{
"epoch": 4.99,
"learning_rate": 0.00015344778583591356,
"loss": 0.33,
"step": 212
},
{
"epoch": 5.01,
"learning_rate": 0.00015229863635666944,
"loss": 0.2153,
"step": 213
},
{
"epoch": 5.04,
"learning_rate": 0.00015114935191894524,
"loss": 0.1414,
"step": 214
},
{
"epoch": 5.06,
"learning_rate": 0.00015,
"loss": 0.1691,
"step": 215
},
{
"epoch": 5.08,
"learning_rate": 0.00014885064808105476,
"loss": 0.1569,
"step": 216
},
{
"epoch": 5.11,
"learning_rate": 0.00014770136364333054,
"loss": 0.1469,
"step": 217
},
{
"epoch": 5.13,
"learning_rate": 0.00014655221416408644,
"loss": 0.1712,
"step": 218
},
{
"epoch": 5.15,
"learning_rate": 0.00014540326711265768,
"loss": 0.1795,
"step": 219
},
{
"epoch": 5.18,
"learning_rate": 0.0001442545899464947,
"loss": 0.2716,
"step": 220
},
{
"epoch": 5.2,
"learning_rate": 0.00014310625010720203,
"loss": 0.112,
"step": 221
},
{
"epoch": 5.22,
"learning_rate": 0.00014195831501657917,
"loss": 0.2199,
"step": 222
},
{
"epoch": 5.25,
"learning_rate": 0.0001408108520726617,
"loss": 0.1946,
"step": 223
},
{
"epoch": 5.27,
"learning_rate": 0.0001396639286457644,
"loss": 0.1926,
"step": 224
},
{
"epoch": 5.29,
"learning_rate": 0.00013851761207452564,
"loss": 0.2176,
"step": 225
},
{
"epoch": 5.32,
"learning_rate": 0.00013737196966195393,
"loss": 0.1611,
"step": 226
},
{
"epoch": 5.34,
"learning_rate": 0.00013622706867147627,
"loss": 0.1369,
"step": 227
},
{
"epoch": 5.36,
"learning_rate": 0.00013508297632298892,
"loss": 0.1554,
"step": 228
},
{
"epoch": 5.39,
"learning_rate": 0.00013393975978891087,
"loss": 0.1581,
"step": 229
},
{
"epoch": 5.41,
"learning_rate": 0.00013279748619023993,
"loss": 0.1599,
"step": 230
},
{
"epoch": 5.44,
"learning_rate": 0.00013165622259261187,
"loss": 0.2309,
"step": 231
},
{
"epoch": 5.46,
"learning_rate": 0.0001305160360023629,
"loss": 0.1977,
"step": 232
},
{
"epoch": 5.48,
"learning_rate": 0.00012937699336259555,
"loss": 0.1367,
"step": 233
},
{
"epoch": 5.51,
"learning_rate": 0.00012823916154924825,
"loss": 0.1378,
"step": 234
},
{
"epoch": 5.53,
"learning_rate": 0.0001271026073671688,
"loss": 0.2091,
"step": 235
},
{
"epoch": 5.55,
"learning_rate": 0.0001259673975461923,
"loss": 0.1367,
"step": 236
},
{
"epoch": 5.58,
"learning_rate": 0.0001248335987372232,
"loss": 0.1847,
"step": 237
},
{
"epoch": 5.6,
"learning_rate": 0.0001237012775083218,
"loss": 0.1688,
"step": 238
},
{
"epoch": 5.62,
"learning_rate": 0.00012257050034079645,
"loss": 0.149,
"step": 239
},
{
"epoch": 5.65,
"learning_rate": 0.00012144133362529971,
"loss": 0.1458,
"step": 240
},
{
"epoch": 5.67,
"learning_rate": 0.00012031384365793073,
"loss": 0.1646,
"step": 241
},
{
"epoch": 5.69,
"learning_rate": 0.00011918809663634253,
"loss": 0.1478,
"step": 242
},
{
"epoch": 5.72,
"learning_rate": 0.00011806415865585587,
"loss": 0.1426,
"step": 243
},
{
"epoch": 5.74,
"learning_rate": 0.0001169420957055782,
"loss": 0.1442,
"step": 244
},
{
"epoch": 5.76,
"learning_rate": 0.00011582197366452939,
"loss": 0.1989,
"step": 245
},
{
"epoch": 5.79,
"learning_rate": 0.00011470385829777393,
"loss": 0.2061,
"step": 246
},
{
"epoch": 5.81,
"learning_rate": 0.00011358781525255963,
"loss": 0.1528,
"step": 247
},
{
"epoch": 5.84,
"learning_rate": 0.00011247391005446323,
"loss": 0.1925,
"step": 248
},
{
"epoch": 5.86,
"learning_rate": 0.00011136220810354336,
"loss": 0.252,
"step": 249
},
{
"epoch": 5.88,
"learning_rate": 0.00011025277467050076,
"loss": 0.1626,
"step": 250
},
{
"epoch": 5.91,
"learning_rate": 0.00010914567489284591,
"loss": 0.2026,
"step": 251
},
{
"epoch": 5.93,
"learning_rate": 0.00010804097377107482,
"loss": 0.1206,
"step": 252
},
{
"epoch": 5.95,
"learning_rate": 0.00010693873616485264,
"loss": 0.1395,
"step": 253
},
{
"epoch": 5.98,
"learning_rate": 0.00010583902678920553,
"loss": 0.1204,
"step": 254
},
{
"epoch": 6.0,
"learning_rate": 0.00010474191021072114,
"loss": 0.1886,
"step": 255
},
{
"epoch": 6.02,
"learning_rate": 0.0001036474508437579,
"loss": 0.0958,
"step": 256
},
{
"epoch": 6.05,
"learning_rate": 0.00010255571294666272,
"loss": 0.1227,
"step": 257
},
{
"epoch": 6.07,
"learning_rate": 0.00010146676061799863,
"loss": 0.1148,
"step": 258
},
{
"epoch": 6.09,
"learning_rate": 0.00010038065779278117,
"loss": 0.0816,
"step": 259
},
{
"epoch": 6.12,
"learning_rate": 9.92974682387246e-05,
"loss": 0.1013,
"step": 260
},
{
"epoch": 6.14,
"learning_rate": 9.82172555524981e-05,
"loss": 0.0973,
"step": 261
},
{
"epoch": 6.16,
"learning_rate": 9.714008315599173e-05,
"loss": 0.0757,
"step": 262
},
{
"epoch": 6.19,
"learning_rate": 9.606601429259282e-05,
"loss": 0.0977,
"step": 263
},
{
"epoch": 6.21,
"learning_rate": 9.499511202347281e-05,
"loss": 0.0735,
"step": 264
},
{
"epoch": 6.24,
"learning_rate": 9.392743922388468e-05,
"loss": 0.0987,
"step": 265
},
{
"epoch": 6.26,
"learning_rate": 9.286305857947158e-05,
"loss": 0.1316,
"step": 266
},
{
"epoch": 6.28,
"learning_rate": 9.180203258258622e-05,
"loss": 0.0973,
"step": 267
},
{
"epoch": 6.31,
"learning_rate": 9.074442352862188e-05,
"loss": 0.0753,
"step": 268
},
{
"epoch": 6.33,
"learning_rate": 8.969029351235493e-05,
"loss": 0.0806,
"step": 269
},
{
"epoch": 6.35,
"learning_rate": 8.8639704424299e-05,
"loss": 0.088,
"step": 270
},
{
"epoch": 6.38,
"learning_rate": 8.759271794707134e-05,
"loss": 0.0691,
"step": 271
},
{
"epoch": 6.4,
"learning_rate": 8.654939555177119e-05,
"loss": 0.0884,
"step": 272
},
{
"epoch": 6.42,
"learning_rate": 8.550979849437068e-05,
"loss": 0.0778,
"step": 273
},
{
"epoch": 6.45,
"learning_rate": 8.447398781211854e-05,
"loss": 0.0578,
"step": 274
},
{
"epoch": 6.47,
"learning_rate": 8.344202431995602e-05,
"loss": 0.0759,
"step": 275
},
{
"epoch": 6.49,
"learning_rate": 8.24139686069469e-05,
"loss": 0.1102,
"step": 276
},
{
"epoch": 6.52,
"learning_rate": 8.138988103271947e-05,
"loss": 0.0947,
"step": 277
},
{
"epoch": 6.54,
"learning_rate": 8.03698217239233e-05,
"loss": 0.1158,
"step": 278
},
{
"epoch": 6.56,
"learning_rate": 7.935385057069874e-05,
"loss": 0.0978,
"step": 279
},
{
"epoch": 6.59,
"learning_rate": 7.834202722316054e-05,
"loss": 0.0948,
"step": 280
},
{
"epoch": 6.61,
"learning_rate": 7.733441108789596e-05,
"loss": 0.073,
"step": 281
},
{
"epoch": 6.64,
"learning_rate": 7.63310613244765e-05,
"loss": 0.1076,
"step": 282
},
{
"epoch": 6.66,
"learning_rate": 7.53320368419849e-05,
"loss": 0.1503,
"step": 283
},
{
"epoch": 6.68,
"learning_rate": 7.433739629555601e-05,
"loss": 0.0844,
"step": 284
},
{
"epoch": 6.71,
"learning_rate": 7.334719808293341e-05,
"loss": 0.0836,
"step": 285
},
{
"epoch": 6.73,
"learning_rate": 7.236150034104052e-05,
"loss": 0.1257,
"step": 286
},
{
"epoch": 6.75,
"learning_rate": 7.138036094256716e-05,
"loss": 0.0872,
"step": 287
},
{
"epoch": 6.78,
"learning_rate": 7.040383749257198e-05,
"loss": 0.0813,
"step": 288
},
{
"epoch": 6.8,
"learning_rate": 6.943198732510002e-05,
"loss": 0.1105,
"step": 289
},
{
"epoch": 6.82,
"learning_rate": 6.846486749981684e-05,
"loss": 0.1228,
"step": 290
},
{
"epoch": 6.85,
"learning_rate": 6.750253479865795e-05,
"loss": 0.0875,
"step": 291
},
{
"epoch": 6.87,
"learning_rate": 6.654504572249551e-05,
"loss": 0.0858,
"step": 292
},
{
"epoch": 6.89,
"learning_rate": 6.559245648782064e-05,
"loss": 0.0841,
"step": 293
},
{
"epoch": 6.92,
"learning_rate": 6.464482302344295e-05,
"loss": 0.0964,
"step": 294
},
{
"epoch": 6.94,
"learning_rate": 6.370220096720691e-05,
"loss": 0.0731,
"step": 295
},
{
"epoch": 6.96,
"learning_rate": 6.276464566272519e-05,
"loss": 0.091,
"step": 296
},
{
"epoch": 6.99,
"learning_rate": 6.183221215612904e-05,
"loss": 0.0842,
"step": 297
},
{
"epoch": 7.01,
"learning_rate": 6.0904955192836867e-05,
"loss": 0.0818,
"step": 298
},
{
"epoch": 7.04,
"learning_rate": 5.998292921433952e-05,
"loss": 0.0631,
"step": 299
},
{
"epoch": 7.06,
"learning_rate": 5.9066188355004337e-05,
"loss": 0.0614,
"step": 300
},
{
"epoch": 7.08,
"learning_rate": 5.815478643889635e-05,
"loss": 0.0701,
"step": 301
},
{
"epoch": 7.11,
"learning_rate": 5.724877697661855e-05,
"loss": 0.0588,
"step": 302
},
{
"epoch": 7.13,
"learning_rate": 5.634821316216995e-05,
"loss": 0.0406,
"step": 303
},
{
"epoch": 7.15,
"learning_rate": 5.545314786982229e-05,
"loss": 0.0629,
"step": 304
},
{
"epoch": 7.18,
"learning_rate": 5.4563633651016056e-05,
"loss": 0.0495,
"step": 305
},
{
"epoch": 7.2,
"learning_rate": 5.367972273127461e-05,
"loss": 0.0789,
"step": 306
},
{
"epoch": 7.22,
"learning_rate": 5.280146700713833e-05,
"loss": 0.0552,
"step": 307
},
{
"epoch": 7.25,
"learning_rate": 5.1928918043117236e-05,
"loss": 0.0682,
"step": 308
},
{
"epoch": 7.27,
"learning_rate": 5.106212706866384e-05,
"loss": 0.0635,
"step": 309
},
{
"epoch": 7.29,
"learning_rate": 5.020114497516521e-05,
"loss": 0.0635,
"step": 310
},
{
"epoch": 7.32,
"learning_rate": 4.9346022312954915e-05,
"loss": 0.0823,
"step": 311
},
{
"epoch": 7.34,
"learning_rate": 4.8496809288345314e-05,
"loss": 0.0463,
"step": 312
},
{
"epoch": 7.36,
"learning_rate": 4.7653555760679555e-05,
"loss": 0.0828,
"step": 313
},
{
"epoch": 7.39,
"learning_rate": 4.6816311239404556e-05,
"loss": 0.0554,
"step": 314
},
{
"epoch": 7.41,
"learning_rate": 4.5985124881163754e-05,
"loss": 0.0655,
"step": 315
},
{
"epoch": 7.44,
"learning_rate": 4.5160045486911405e-05,
"loss": 0.0596,
"step": 316
},
{
"epoch": 7.46,
"learning_rate": 4.434112149904721e-05,
"loss": 0.048,
"step": 317
},
{
"epoch": 7.48,
"learning_rate": 4.352840099857195e-05,
"loss": 0.0675,
"step": 318
},
{
"epoch": 7.51,
"learning_rate": 4.272193170226492e-05,
"loss": 0.0507,
"step": 319
},
{
"epoch": 7.53,
"learning_rate": 4.1921760959881954e-05,
"loss": 0.0804,
"step": 320
},
{
"epoch": 7.55,
"learning_rate": 4.112793575137575e-05,
"loss": 0.0617,
"step": 321
},
{
"epoch": 7.58,
"learning_rate": 4.0340502684137436e-05,
"loss": 0.0573,
"step": 322
},
{
"epoch": 7.6,
"learning_rate": 3.9559507990259956e-05,
"loss": 0.0554,
"step": 323
},
{
"epoch": 7.62,
"learning_rate": 3.878499752382404e-05,
"loss": 0.0531,
"step": 324
},
{
"epoch": 7.65,
"learning_rate": 3.8017016758205594e-05,
"loss": 0.0409,
"step": 325
},
{
"epoch": 7.67,
"learning_rate": 3.7255610783406275e-05,
"loss": 0.0441,
"step": 326
},
{
"epoch": 7.69,
"learning_rate": 3.6500824303405704e-05,
"loss": 0.0663,
"step": 327
},
{
"epoch": 7.72,
"learning_rate": 3.575270163353717e-05,
"loss": 0.0656,
"step": 328
},
{
"epoch": 7.74,
"learning_rate": 3.501128669788561e-05,
"loss": 0.0688,
"step": 329
},
{
"epoch": 7.76,
"learning_rate": 3.427662302670855e-05,
"loss": 0.057,
"step": 330
},
{
"epoch": 7.79,
"learning_rate": 3.3548753753880734e-05,
"loss": 0.0624,
"step": 331
},
{
"epoch": 7.81,
"learning_rate": 3.282772161436119e-05,
"loss": 0.0657,
"step": 332
},
{
"epoch": 7.84,
"learning_rate": 3.211356894168459e-05,
"loss": 0.0617,
"step": 333
},
{
"epoch": 7.86,
"learning_rate": 3.14063376654753e-05,
"loss": 0.0823,
"step": 334
},
{
"epoch": 7.88,
"learning_rate": 3.070606930898602e-05,
"loss": 0.0407,
"step": 335
},
{
"epoch": 7.91,
"learning_rate": 3.0012804986659565e-05,
"loss": 0.0615,
"step": 336
},
{
"epoch": 7.93,
"learning_rate": 2.9326585401714974e-05,
"loss": 0.0636,
"step": 337
},
{
"epoch": 7.95,
"learning_rate": 2.8647450843757897e-05,
"loss": 0.076,
"step": 338
},
{
"epoch": 7.98,
"learning_rate": 2.7975441186414834e-05,
"loss": 0.0761,
"step": 339
},
{
"epoch": 8.0,
"learning_rate": 2.7310595884992355e-05,
"loss": 0.0528,
"step": 340
},
{
"epoch": 8.02,
"learning_rate": 2.665295397416029e-05,
"loss": 0.0412,
"step": 341
},
{
"epoch": 8.05,
"learning_rate": 2.6002554065660098e-05,
"loss": 0.0491,
"step": 342
},
{
"epoch": 8.07,
"learning_rate": 2.5359434346037915e-05,
"loss": 0.0361,
"step": 343
},
{
"epoch": 8.09,
"learning_rate": 2.4723632574402317e-05,
"loss": 0.0482,
"step": 344
},
{
"epoch": 8.12,
"learning_rate": 2.4095186080207502e-05,
"loss": 0.0622,
"step": 345
},
{
"epoch": 8.14,
"learning_rate": 2.347413176106178e-05,
"loss": 0.0431,
"step": 346
},
{
"epoch": 8.16,
"learning_rate": 2.2860506080560835e-05,
"loss": 0.0581,
"step": 347
},
{
"epoch": 8.19,
"learning_rate": 2.2254345066147243e-05,
"loss": 0.0477,
"step": 348
},
{
"epoch": 8.21,
"learning_rate": 2.165568430699493e-05,
"loss": 0.0791,
"step": 349
},
{
"epoch": 8.24,
"learning_rate": 2.106455895191985e-05,
"loss": 0.0478,
"step": 350
},
{
"epoch": 8.26,
"learning_rate": 2.0481003707316134e-05,
"loss": 0.0585,
"step": 351
},
{
"epoch": 8.28,
"learning_rate": 1.9905052835118533e-05,
"loss": 0.0575,
"step": 352
},
{
"epoch": 8.31,
"learning_rate": 1.933674015079083e-05,
"loss": 0.0403,
"step": 353
},
{
"epoch": 8.33,
"learning_rate": 1.8776099021340245e-05,
"loss": 0.0505,
"step": 354
},
{
"epoch": 8.35,
"learning_rate": 1.8223162363358667e-05,
"loss": 0.0468,
"step": 355
},
{
"epoch": 8.38,
"learning_rate": 1.767796264108977e-05,
"loss": 0.0353,
"step": 356
},
{
"epoch": 8.4,
"learning_rate": 1.71405318645232e-05,
"loss": 0.0687,
"step": 357
},
{
"epoch": 8.42,
"learning_rate": 1.6610901587514995e-05,
"loss": 0.0425,
"step": 358
},
{
"epoch": 8.45,
"learning_rate": 1.6089102905935107e-05,
"loss": 0.0593,
"step": 359
},
{
"epoch": 8.47,
"learning_rate": 1.5575166455841677e-05,
"loss": 0.055,
"step": 360
},
{
"epoch": 8.49,
"learning_rate": 1.5069122411682188e-05,
"loss": 0.0432,
"step": 361
},
{
"epoch": 8.52,
"learning_rate": 1.4571000484522055e-05,
"loss": 0.04,
"step": 362
},
{
"epoch": 8.54,
"learning_rate": 1.4080829920300047e-05,
"loss": 0.0392,
"step": 363
},
{
"epoch": 8.56,
"learning_rate": 1.359863949811127e-05,
"loss": 0.0387,
"step": 364
},
{
"epoch": 8.59,
"learning_rate": 1.3124457528517502e-05,
"loss": 0.0484,
"step": 365
},
{
"epoch": 8.61,
"learning_rate": 1.265831185188489e-05,
"loss": 0.0419,
"step": 366
},
{
"epoch": 8.64,
"learning_rate": 1.220022983674952e-05,
"loss": 0.0315,
"step": 367
},
{
"epoch": 8.66,
"learning_rate": 1.1750238378210425e-05,
"loss": 0.2463,
"step": 368
},
{
"epoch": 8.68,
"learning_rate": 1.1308363896350625e-05,
"loss": 0.0532,
"step": 369
},
{
"epoch": 8.71,
"learning_rate": 1.0874632334685806e-05,
"loss": 0.0404,
"step": 370
},
{
"epoch": 8.73,
"learning_rate": 1.0449069158641238e-05,
"loss": 0.0376,
"step": 371
},
{
"epoch": 8.75,
"learning_rate": 1.0031699354056616e-05,
"loss": 0.0484,
"step": 372
},
{
"epoch": 8.78,
"learning_rate": 9.622547425718924e-06,
"loss": 0.0421,
"step": 373
},
{
"epoch": 8.8,
"learning_rate": 9.22163739592398e-06,
"loss": 0.0523,
"step": 374
},
{
"epoch": 8.82,
"learning_rate": 8.82899280306577e-06,
"loss": 0.053,
"step": 375
},
{
"epoch": 8.85,
"learning_rate": 8.444636700254598e-06,
"loss": 0.054,
"step": 376
},
{
"epoch": 8.87,
"learning_rate": 8.068591653963535e-06,
"loss": 0.0476,
"step": 377
},
{
"epoch": 8.89,
"learning_rate": 7.700879742703486e-06,
"loss": 0.0482,
"step": 378
},
{
"epoch": 8.92,
"learning_rate": 7.34152255572697e-06,
"loss": 0.0542,
"step": 379
},
{
"epoch": 8.94,
"learning_rate": 6.990541191760418e-06,
"loss": 0.0458,
"step": 380
},
{
"epoch": 8.96,
"learning_rate": 6.647956257765585e-06,
"loss": 0.0402,
"step": 381
},
{
"epoch": 8.99,
"learning_rate": 6.3137878677295306e-06,
"loss": 0.0567,
"step": 382
},
{
"epoch": 9.01,
"learning_rate": 5.988055641483796e-06,
"loss": 0.0475,
"step": 383
},
{
"epoch": 9.04,
"learning_rate": 5.670778703552348e-06,
"loss": 0.0444,
"step": 384
},
{
"epoch": 9.06,
"learning_rate": 5.361975682028852e-06,
"loss": 0.0463,
"step": 385
},
{
"epoch": 9.08,
"learning_rate": 5.061664707482904e-06,
"loss": 0.0422,
"step": 386
},
{
"epoch": 9.11,
"learning_rate": 4.769863411895514e-06,
"loss": 0.0488,
"step": 387
},
{
"epoch": 9.13,
"learning_rate": 4.486588927624046e-06,
"loss": 0.0476,
"step": 388
},
{
"epoch": 9.15,
"learning_rate": 4.211857886396064e-06,
"loss": 0.057,
"step": 389
},
{
"epoch": 9.18,
"learning_rate": 3.945686418333155e-06,
"loss": 0.0316,
"step": 390
},
{
"epoch": 9.2,
"learning_rate": 3.6880901510036086e-06,
"loss": 0.0411,
"step": 391
},
{
"epoch": 9.22,
"learning_rate": 3.4390842085051164e-06,
"loss": 0.0452,
"step": 392
},
{
"epoch": 9.25,
"learning_rate": 3.1986832105766467e-06,
"loss": 0.0337,
"step": 393
},
{
"epoch": 9.27,
"learning_rate": 2.9669012717401187e-06,
"loss": 0.0462,
"step": 394
},
{
"epoch": 9.29,
"learning_rate": 2.7437520004717608e-06,
"loss": 0.0567,
"step": 395
},
{
"epoch": 9.32,
"learning_rate": 2.5292484984030693e-06,
"loss": 0.0427,
"step": 396
},
{
"epoch": 9.34,
"learning_rate": 2.32340335955159e-06,
"loss": 0.0401,
"step": 397
},
{
"epoch": 9.36,
"learning_rate": 2.126228669581492e-06,
"loss": 0.0414,
"step": 398
},
{
"epoch": 9.39,
"learning_rate": 1.937736005094004e-06,
"loss": 0.0408,
"step": 399
},
{
"epoch": 9.41,
"learning_rate": 1.7579364329477375e-06,
"loss": 0.0301,
"step": 400
}
],
"max_steps": 420,
"num_train_epochs": 10,
"total_flos": 6953968199270400.0,
"trial_name": null,
"trial_params": null
}