GeLM / EgoQA-GeLM-7B /trainer_state.json
qiruichen1206@gmail.com
Initial model upload
e1ac07a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.0816326530612243e-07,
"loss": 11.0156,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 8.163265306122449e-07,
"loss": 10.5312,
"step": 2
},
{
"epoch": 0.02,
"learning_rate": 1.2244897959183673e-06,
"loss": 10.9531,
"step": 3
},
{
"epoch": 0.02,
"learning_rate": 1.6326530612244897e-06,
"loss": 10.4062,
"step": 4
},
{
"epoch": 0.03,
"learning_rate": 2.0408163265306125e-06,
"loss": 10.5156,
"step": 5
},
{
"epoch": 0.04,
"learning_rate": 2.4489795918367347e-06,
"loss": 10.5156,
"step": 6
},
{
"epoch": 0.04,
"learning_rate": 2.8571428571428573e-06,
"loss": 9.8281,
"step": 7
},
{
"epoch": 0.05,
"learning_rate": 3.2653061224489794e-06,
"loss": 8.9531,
"step": 8
},
{
"epoch": 0.06,
"learning_rate": 3.6734693877551024e-06,
"loss": 9.0156,
"step": 9
},
{
"epoch": 0.06,
"learning_rate": 4.081632653061225e-06,
"loss": 8.9062,
"step": 10
},
{
"epoch": 0.07,
"learning_rate": 4.489795918367348e-06,
"loss": 8.0781,
"step": 11
},
{
"epoch": 0.07,
"learning_rate": 4.897959183673469e-06,
"loss": 7.625,
"step": 12
},
{
"epoch": 0.08,
"learning_rate": 5.306122448979593e-06,
"loss": 7.7188,
"step": 13
},
{
"epoch": 0.09,
"learning_rate": 5.7142857142857145e-06,
"loss": 7.0391,
"step": 14
},
{
"epoch": 0.09,
"learning_rate": 6.122448979591837e-06,
"loss": 6.6875,
"step": 15
},
{
"epoch": 0.1,
"learning_rate": 6.530612244897959e-06,
"loss": 6.4922,
"step": 16
},
{
"epoch": 0.1,
"learning_rate": 6.938775510204082e-06,
"loss": 6.1953,
"step": 17
},
{
"epoch": 0.11,
"learning_rate": 7.346938775510205e-06,
"loss": 6.2578,
"step": 18
},
{
"epoch": 0.12,
"learning_rate": 7.755102040816327e-06,
"loss": 5.8906,
"step": 19
},
{
"epoch": 0.12,
"learning_rate": 8.16326530612245e-06,
"loss": 5.7656,
"step": 20
},
{
"epoch": 0.13,
"learning_rate": 8.571428571428571e-06,
"loss": 5.4844,
"step": 21
},
{
"epoch": 0.13,
"learning_rate": 8.979591836734695e-06,
"loss": 5.4922,
"step": 22
},
{
"epoch": 0.14,
"learning_rate": 9.387755102040818e-06,
"loss": 5.3594,
"step": 23
},
{
"epoch": 0.15,
"learning_rate": 9.795918367346939e-06,
"loss": 5.0625,
"step": 24
},
{
"epoch": 0.15,
"learning_rate": 1.0204081632653063e-05,
"loss": 4.9219,
"step": 25
},
{
"epoch": 0.16,
"learning_rate": 1.0612244897959186e-05,
"loss": 4.9609,
"step": 26
},
{
"epoch": 0.17,
"learning_rate": 1.1020408163265306e-05,
"loss": 4.6172,
"step": 27
},
{
"epoch": 0.17,
"learning_rate": 1.1428571428571429e-05,
"loss": 4.7578,
"step": 28
},
{
"epoch": 0.18,
"learning_rate": 1.1836734693877552e-05,
"loss": 4.4219,
"step": 29
},
{
"epoch": 0.18,
"learning_rate": 1.2244897959183674e-05,
"loss": 4.4688,
"step": 30
},
{
"epoch": 0.19,
"learning_rate": 1.2653061224489798e-05,
"loss": 4.8281,
"step": 31
},
{
"epoch": 0.2,
"learning_rate": 1.3061224489795918e-05,
"loss": 4.6484,
"step": 32
},
{
"epoch": 0.2,
"learning_rate": 1.3469387755102042e-05,
"loss": 4.6172,
"step": 33
},
{
"epoch": 0.21,
"learning_rate": 1.3877551020408165e-05,
"loss": 4.5938,
"step": 34
},
{
"epoch": 0.21,
"learning_rate": 1.4285714285714287e-05,
"loss": 4.5156,
"step": 35
},
{
"epoch": 0.22,
"learning_rate": 1.469387755102041e-05,
"loss": 4.5938,
"step": 36
},
{
"epoch": 0.23,
"learning_rate": 1.510204081632653e-05,
"loss": 4.2109,
"step": 37
},
{
"epoch": 0.23,
"learning_rate": 1.5510204081632655e-05,
"loss": 4.1094,
"step": 38
},
{
"epoch": 0.24,
"learning_rate": 1.5918367346938776e-05,
"loss": 4.0742,
"step": 39
},
{
"epoch": 0.25,
"learning_rate": 1.63265306122449e-05,
"loss": 4.1406,
"step": 40
},
{
"epoch": 0.25,
"learning_rate": 1.673469387755102e-05,
"loss": 3.9453,
"step": 41
},
{
"epoch": 0.26,
"learning_rate": 1.7142857142857142e-05,
"loss": 3.7773,
"step": 42
},
{
"epoch": 0.26,
"learning_rate": 1.7551020408163266e-05,
"loss": 4.0781,
"step": 43
},
{
"epoch": 0.27,
"learning_rate": 1.795918367346939e-05,
"loss": 4.3906,
"step": 44
},
{
"epoch": 0.28,
"learning_rate": 1.836734693877551e-05,
"loss": 4.2031,
"step": 45
},
{
"epoch": 0.28,
"learning_rate": 1.8775510204081636e-05,
"loss": 4.0469,
"step": 46
},
{
"epoch": 0.29,
"learning_rate": 1.9183673469387756e-05,
"loss": 4.3359,
"step": 47
},
{
"epoch": 0.29,
"learning_rate": 1.9591836734693877e-05,
"loss": 3.9688,
"step": 48
},
{
"epoch": 0.3,
"learning_rate": 2e-05,
"loss": 4.1289,
"step": 49
},
{
"epoch": 0.31,
"learning_rate": 1.9999980257330883e-05,
"loss": 4.2734,
"step": 50
},
{
"epoch": 0.31,
"learning_rate": 1.9999921029401478e-05,
"loss": 4.2344,
"step": 51
},
{
"epoch": 0.32,
"learning_rate": 1.9999822316445652e-05,
"loss": 4.1797,
"step": 52
},
{
"epoch": 0.33,
"learning_rate": 1.9999684118853177e-05,
"loss": 4.0859,
"step": 53
},
{
"epoch": 0.33,
"learning_rate": 1.9999506437169723e-05,
"loss": 4.082,
"step": 54
},
{
"epoch": 0.34,
"learning_rate": 1.9999289272096886e-05,
"loss": 4.1875,
"step": 55
},
{
"epoch": 0.34,
"learning_rate": 1.9999032624492144e-05,
"loss": 3.9062,
"step": 56
},
{
"epoch": 0.35,
"learning_rate": 1.999873649536887e-05,
"loss": 4.0664,
"step": 57
},
{
"epoch": 0.36,
"learning_rate": 1.9998400885896355e-05,
"loss": 3.9922,
"step": 58
},
{
"epoch": 0.36,
"learning_rate": 1.9998025797399753e-05,
"loss": 4.0312,
"step": 59
},
{
"epoch": 0.37,
"learning_rate": 1.9997611231360117e-05,
"loss": 3.9297,
"step": 60
},
{
"epoch": 0.37,
"learning_rate": 1.9997157189414373e-05,
"loss": 4.0781,
"step": 61
},
{
"epoch": 0.38,
"learning_rate": 1.9996663673355326e-05,
"loss": 3.8125,
"step": 62
},
{
"epoch": 0.39,
"learning_rate": 1.9996130685131637e-05,
"loss": 3.9375,
"step": 63
},
{
"epoch": 0.39,
"learning_rate": 1.999555822684783e-05,
"loss": 4.1602,
"step": 64
},
{
"epoch": 0.4,
"learning_rate": 1.9994946300764276e-05,
"loss": 3.9336,
"step": 65
},
{
"epoch": 0.4,
"learning_rate": 1.999429490929718e-05,
"loss": 4.207,
"step": 66
},
{
"epoch": 0.41,
"learning_rate": 1.999360405501859e-05,
"loss": 3.7422,
"step": 67
},
{
"epoch": 0.42,
"learning_rate": 1.9992873740656372e-05,
"loss": 3.9414,
"step": 68
},
{
"epoch": 0.42,
"learning_rate": 1.9992103969094182e-05,
"loss": 3.8711,
"step": 69
},
{
"epoch": 0.43,
"learning_rate": 1.99912947433715e-05,
"loss": 4.25,
"step": 70
},
{
"epoch": 0.44,
"learning_rate": 1.999044606668358e-05,
"loss": 3.8203,
"step": 71
},
{
"epoch": 0.44,
"learning_rate": 1.998955794238145e-05,
"loss": 3.9688,
"step": 72
},
{
"epoch": 0.45,
"learning_rate": 1.9988630373971896e-05,
"loss": 3.9414,
"step": 73
},
{
"epoch": 0.45,
"learning_rate": 1.9987663365117456e-05,
"loss": 3.5312,
"step": 74
},
{
"epoch": 0.46,
"learning_rate": 1.99866569196364e-05,
"loss": 3.8789,
"step": 75
},
{
"epoch": 0.47,
"learning_rate": 1.9985611041502704e-05,
"loss": 3.9062,
"step": 76
},
{
"epoch": 0.47,
"learning_rate": 1.9984525734846056e-05,
"loss": 3.75,
"step": 77
},
{
"epoch": 0.48,
"learning_rate": 1.998340100395183e-05,
"loss": 3.8984,
"step": 78
},
{
"epoch": 0.48,
"learning_rate": 1.9982236853261067e-05,
"loss": 4.0781,
"step": 79
},
{
"epoch": 0.49,
"learning_rate": 1.9981033287370443e-05,
"loss": 3.8672,
"step": 80
},
{
"epoch": 0.5,
"learning_rate": 1.9979790311032288e-05,
"loss": 3.7461,
"step": 81
},
{
"epoch": 0.5,
"learning_rate": 1.9978507929154534e-05,
"loss": 3.6602,
"step": 82
},
{
"epoch": 0.51,
"learning_rate": 1.9977186146800707e-05,
"loss": 3.8555,
"step": 83
},
{
"epoch": 0.52,
"learning_rate": 1.9975824969189913e-05,
"loss": 3.8086,
"step": 84
},
{
"epoch": 0.52,
"learning_rate": 1.997442440169681e-05,
"loss": 3.9727,
"step": 85
},
{
"epoch": 0.53,
"learning_rate": 1.997298444985158e-05,
"loss": 3.6172,
"step": 86
},
{
"epoch": 0.53,
"learning_rate": 1.9971505119339923e-05,
"loss": 3.8359,
"step": 87
},
{
"epoch": 0.54,
"learning_rate": 1.9969986416003026e-05,
"loss": 3.8594,
"step": 88
},
{
"epoch": 0.55,
"learning_rate": 1.9968428345837542e-05,
"loss": 3.7227,
"step": 89
},
{
"epoch": 0.55,
"learning_rate": 1.996683091499556e-05,
"loss": 4.125,
"step": 90
},
{
"epoch": 0.56,
"learning_rate": 1.9965194129784597e-05,
"loss": 3.832,
"step": 91
},
{
"epoch": 0.56,
"learning_rate": 1.9963517996667548e-05,
"loss": 4.0,
"step": 92
},
{
"epoch": 0.57,
"learning_rate": 1.9961802522262685e-05,
"loss": 4.0703,
"step": 93
},
{
"epoch": 0.58,
"learning_rate": 1.996004771334361e-05,
"loss": 3.7461,
"step": 94
},
{
"epoch": 0.58,
"learning_rate": 1.9958253576839256e-05,
"loss": 3.9727,
"step": 95
},
{
"epoch": 0.59,
"learning_rate": 1.9956420119833826e-05,
"loss": 4.0664,
"step": 96
},
{
"epoch": 0.6,
"learning_rate": 1.9954547349566783e-05,
"loss": 4.2539,
"step": 97
},
{
"epoch": 0.6,
"learning_rate": 1.9952635273432835e-05,
"loss": 4.0156,
"step": 98
},
{
"epoch": 0.61,
"learning_rate": 1.9950683898981866e-05,
"loss": 4.1406,
"step": 99
},
{
"epoch": 0.61,
"learning_rate": 1.994869323391895e-05,
"loss": 4.1523,
"step": 100
},
{
"epoch": 0.62,
"learning_rate": 1.9946663286104303e-05,
"loss": 3.9023,
"step": 101
},
{
"epoch": 0.63,
"learning_rate": 1.994459406355323e-05,
"loss": 3.8086,
"step": 102
},
{
"epoch": 0.63,
"learning_rate": 1.994248557443613e-05,
"loss": 4.0391,
"step": 103
},
{
"epoch": 0.64,
"learning_rate": 1.9940337827078448e-05,
"loss": 3.9453,
"step": 104
},
{
"epoch": 0.64,
"learning_rate": 1.9938150829960634e-05,
"loss": 4.0039,
"step": 105
},
{
"epoch": 0.65,
"learning_rate": 1.993592459171812e-05,
"loss": 3.9883,
"step": 106
},
{
"epoch": 0.66,
"learning_rate": 1.9933659121141283e-05,
"loss": 3.6758,
"step": 107
},
{
"epoch": 0.66,
"learning_rate": 1.993135442717541e-05,
"loss": 3.793,
"step": 108
},
{
"epoch": 0.67,
"learning_rate": 1.9929010518920667e-05,
"loss": 3.7383,
"step": 109
},
{
"epoch": 0.67,
"learning_rate": 1.9926627405632048e-05,
"loss": 3.7227,
"step": 110
},
{
"epoch": 0.68,
"learning_rate": 1.992420509671936e-05,
"loss": 3.9023,
"step": 111
},
{
"epoch": 0.69,
"learning_rate": 1.992174360174717e-05,
"loss": 4.0078,
"step": 112
},
{
"epoch": 0.69,
"learning_rate": 1.991924293043478e-05,
"loss": 4.2109,
"step": 113
},
{
"epoch": 0.7,
"learning_rate": 1.991670309265617e-05,
"loss": 3.7461,
"step": 114
},
{
"epoch": 0.71,
"learning_rate": 1.9914124098439976e-05,
"loss": 4.0039,
"step": 115
},
{
"epoch": 0.71,
"learning_rate": 1.9911505957969443e-05,
"loss": 3.8867,
"step": 116
},
{
"epoch": 0.72,
"learning_rate": 1.990884868158239e-05,
"loss": 3.9883,
"step": 117
},
{
"epoch": 0.72,
"learning_rate": 1.9906152279771162e-05,
"loss": 3.8359,
"step": 118
},
{
"epoch": 0.73,
"learning_rate": 1.990341676318259e-05,
"loss": 3.6719,
"step": 119
},
{
"epoch": 0.74,
"learning_rate": 1.9900642142617958e-05,
"loss": 3.5898,
"step": 120
},
{
"epoch": 0.74,
"learning_rate": 1.9897828429032946e-05,
"loss": 3.9922,
"step": 121
},
{
"epoch": 0.75,
"learning_rate": 1.98949756335376e-05,
"loss": 3.8711,
"step": 122
},
{
"epoch": 0.75,
"learning_rate": 1.9892083767396274e-05,
"loss": 3.6797,
"step": 123
},
{
"epoch": 0.76,
"learning_rate": 1.9889152842027607e-05,
"loss": 4.0078,
"step": 124
},
{
"epoch": 0.77,
"learning_rate": 1.9886182869004447e-05,
"loss": 3.8164,
"step": 125
},
{
"epoch": 0.77,
"learning_rate": 1.9883173860053845e-05,
"loss": 3.6953,
"step": 126
},
{
"epoch": 0.78,
"learning_rate": 1.9880125827056967e-05,
"loss": 3.7344,
"step": 127
},
{
"epoch": 0.79,
"learning_rate": 1.9877038782049074e-05,
"loss": 3.6562,
"step": 128
},
{
"epoch": 0.79,
"learning_rate": 1.9873912737219468e-05,
"loss": 3.5625,
"step": 129
},
{
"epoch": 0.8,
"learning_rate": 1.987074770491145e-05,
"loss": 4.0859,
"step": 130
},
{
"epoch": 0.8,
"learning_rate": 1.9867543697622248e-05,
"loss": 3.7344,
"step": 131
},
{
"epoch": 0.81,
"learning_rate": 1.9864300728002997e-05,
"loss": 3.9453,
"step": 132
},
{
"epoch": 0.82,
"learning_rate": 1.986101880885867e-05,
"loss": 4.1211,
"step": 133
},
{
"epoch": 0.82,
"learning_rate": 1.985769795314804e-05,
"loss": 3.7344,
"step": 134
},
{
"epoch": 0.83,
"learning_rate": 1.9854338173983615e-05,
"loss": 3.6875,
"step": 135
},
{
"epoch": 0.83,
"learning_rate": 1.9850939484631598e-05,
"loss": 3.8125,
"step": 136
},
{
"epoch": 0.84,
"learning_rate": 1.9847501898511824e-05,
"loss": 3.707,
"step": 137
},
{
"epoch": 0.85,
"learning_rate": 1.9844025429197727e-05,
"loss": 4.0781,
"step": 138
},
{
"epoch": 0.85,
"learning_rate": 1.984051009041626e-05,
"loss": 3.8281,
"step": 139
},
{
"epoch": 0.86,
"learning_rate": 1.983695589604785e-05,
"loss": 4.0391,
"step": 140
},
{
"epoch": 0.87,
"learning_rate": 1.9833362860126364e-05,
"loss": 3.6719,
"step": 141
},
{
"epoch": 0.87,
"learning_rate": 1.982973099683902e-05,
"loss": 4.0898,
"step": 142
},
{
"epoch": 0.88,
"learning_rate": 1.9826060320526355e-05,
"loss": 3.8281,
"step": 143
},
{
"epoch": 0.88,
"learning_rate": 1.982235084568216e-05,
"loss": 3.9219,
"step": 144
},
{
"epoch": 0.89,
"learning_rate": 1.9818602586953414e-05,
"loss": 3.9961,
"step": 145
},
{
"epoch": 0.9,
"learning_rate": 1.9814815559140258e-05,
"loss": 3.8125,
"step": 146
},
{
"epoch": 0.9,
"learning_rate": 1.9810989777195884e-05,
"loss": 3.8164,
"step": 147
},
{
"epoch": 0.91,
"learning_rate": 1.9807125256226532e-05,
"loss": 4.1094,
"step": 148
},
{
"epoch": 0.91,
"learning_rate": 1.9803222011491385e-05,
"loss": 3.4805,
"step": 149
},
{
"epoch": 0.92,
"learning_rate": 1.979928005840255e-05,
"loss": 3.7305,
"step": 150
},
{
"epoch": 0.93,
"learning_rate": 1.9795299412524948e-05,
"loss": 3.793,
"step": 151
},
{
"epoch": 0.93,
"learning_rate": 1.9791280089576302e-05,
"loss": 4.0312,
"step": 152
},
{
"epoch": 0.94,
"learning_rate": 1.978722210542704e-05,
"loss": 3.6953,
"step": 153
},
{
"epoch": 0.94,
"learning_rate": 1.9783125476100254e-05,
"loss": 3.7891,
"step": 154
},
{
"epoch": 0.95,
"learning_rate": 1.977899021777162e-05,
"loss": 3.6523,
"step": 155
},
{
"epoch": 0.96,
"learning_rate": 1.977481634676935e-05,
"loss": 3.9414,
"step": 156
},
{
"epoch": 0.96,
"learning_rate": 1.9770603879574108e-05,
"loss": 3.9609,
"step": 157
},
{
"epoch": 0.97,
"learning_rate": 1.9766352832818972e-05,
"loss": 3.4336,
"step": 158
},
{
"epoch": 0.98,
"learning_rate": 1.9762063223289334e-05,
"loss": 3.6484,
"step": 159
},
{
"epoch": 0.98,
"learning_rate": 1.975773506792287e-05,
"loss": 3.8281,
"step": 160
},
{
"epoch": 0.99,
"learning_rate": 1.9753368383809445e-05,
"loss": 3.7578,
"step": 161
},
{
"epoch": 0.99,
"learning_rate": 1.974896318819106e-05,
"loss": 3.8555,
"step": 162
},
{
"epoch": 1.0,
"learning_rate": 1.974451949846177e-05,
"loss": 3.7617,
"step": 163
},
{
"epoch": 1.01,
"learning_rate": 1.974003733216765e-05,
"loss": 3.5039,
"step": 164
},
{
"epoch": 1.01,
"learning_rate": 1.9735516707006676e-05,
"loss": 3.7344,
"step": 165
},
{
"epoch": 1.02,
"learning_rate": 1.973095764082869e-05,
"loss": 3.6172,
"step": 166
},
{
"epoch": 1.02,
"learning_rate": 1.972636015163532e-05,
"loss": 3.7734,
"step": 167
},
{
"epoch": 1.03,
"learning_rate": 1.9721724257579907e-05,
"loss": 3.543,
"step": 168
},
{
"epoch": 1.04,
"learning_rate": 1.9717049976967437e-05,
"loss": 3.7031,
"step": 169
},
{
"epoch": 1.04,
"learning_rate": 1.971233732825446e-05,
"loss": 3.543,
"step": 170
},
{
"epoch": 1.05,
"learning_rate": 1.9707586330049037e-05,
"loss": 3.6836,
"step": 171
},
{
"epoch": 1.06,
"learning_rate": 1.9702797001110642e-05,
"loss": 3.2969,
"step": 172
},
{
"epoch": 1.06,
"learning_rate": 1.9697969360350098e-05,
"loss": 3.3789,
"step": 173
},
{
"epoch": 1.07,
"learning_rate": 1.969310342682951e-05,
"loss": 3.5625,
"step": 174
},
{
"epoch": 1.07,
"learning_rate": 1.9688199219762183e-05,
"loss": 3.9297,
"step": 175
},
{
"epoch": 1.08,
"learning_rate": 1.9683256758512544e-05,
"loss": 3.6094,
"step": 176
},
{
"epoch": 1.09,
"learning_rate": 1.967827606259607e-05,
"loss": 3.5547,
"step": 177
},
{
"epoch": 1.09,
"learning_rate": 1.96732571516792e-05,
"loss": 3.5742,
"step": 178
},
{
"epoch": 1.1,
"learning_rate": 1.9668200045579283e-05,
"loss": 3.3047,
"step": 179
},
{
"epoch": 1.1,
"learning_rate": 1.9663104764264468e-05,
"loss": 3.5117,
"step": 180
},
{
"epoch": 1.11,
"learning_rate": 1.9657971327853644e-05,
"loss": 3.4805,
"step": 181
},
{
"epoch": 1.12,
"learning_rate": 1.9652799756616364e-05,
"loss": 3.4453,
"step": 182
},
{
"epoch": 1.12,
"learning_rate": 1.964759007097275e-05,
"loss": 3.5195,
"step": 183
},
{
"epoch": 1.13,
"learning_rate": 1.964234229149342e-05,
"loss": 3.375,
"step": 184
},
{
"epoch": 1.13,
"learning_rate": 1.963705643889941e-05,
"loss": 3.4648,
"step": 185
},
{
"epoch": 1.14,
"learning_rate": 1.9631732534062088e-05,
"loss": 3.6719,
"step": 186
},
{
"epoch": 1.15,
"learning_rate": 1.962637059800307e-05,
"loss": 3.582,
"step": 187
},
{
"epoch": 1.15,
"learning_rate": 1.9620970651894146e-05,
"loss": 3.8086,
"step": 188
},
{
"epoch": 1.16,
"learning_rate": 1.9615532717057185e-05,
"loss": 3.5234,
"step": 189
},
{
"epoch": 1.17,
"learning_rate": 1.9610056814964053e-05,
"loss": 3.6016,
"step": 190
},
{
"epoch": 1.17,
"learning_rate": 1.9604542967236535e-05,
"loss": 3.6172,
"step": 191
},
{
"epoch": 1.18,
"learning_rate": 1.9598991195646252e-05,
"loss": 3.3477,
"step": 192
},
{
"epoch": 1.18,
"learning_rate": 1.959340152211455e-05,
"loss": 3.3125,
"step": 193
},
{
"epoch": 1.19,
"learning_rate": 1.9587773968712458e-05,
"loss": 3.7891,
"step": 194
},
{
"epoch": 1.2,
"learning_rate": 1.958210855766055e-05,
"loss": 3.8008,
"step": 195
},
{
"epoch": 1.2,
"learning_rate": 1.95764053113289e-05,
"loss": 3.5156,
"step": 196
},
{
"epoch": 1.21,
"learning_rate": 1.9570664252236966e-05,
"loss": 3.9531,
"step": 197
},
{
"epoch": 1.21,
"learning_rate": 1.956488540305351e-05,
"loss": 3.3164,
"step": 198
},
{
"epoch": 1.22,
"learning_rate": 1.9559068786596526e-05,
"loss": 3.6797,
"step": 199
},
{
"epoch": 1.23,
"learning_rate": 1.9553214425833108e-05,
"loss": 3.4844,
"step": 200
},
{
"epoch": 1.23,
"learning_rate": 1.9547322343879397e-05,
"loss": 3.6641,
"step": 201
},
{
"epoch": 1.24,
"learning_rate": 1.954139256400049e-05,
"loss": 3.5195,
"step": 202
},
{
"epoch": 1.25,
"learning_rate": 1.9535425109610317e-05,
"loss": 3.7773,
"step": 203
},
{
"epoch": 1.25,
"learning_rate": 1.9529420004271568e-05,
"loss": 3.3711,
"step": 204
},
{
"epoch": 1.26,
"learning_rate": 1.952337727169561e-05,
"loss": 3.8828,
"step": 205
},
{
"epoch": 1.26,
"learning_rate": 1.951729693574238e-05,
"loss": 3.5781,
"step": 206
},
{
"epoch": 1.27,
"learning_rate": 1.9511179020420284e-05,
"loss": 3.457,
"step": 207
},
{
"epoch": 1.28,
"learning_rate": 1.950502354988612e-05,
"loss": 3.5312,
"step": 208
},
{
"epoch": 1.28,
"learning_rate": 1.9498830548444972e-05,
"loss": 3.6367,
"step": 209
},
{
"epoch": 1.29,
"learning_rate": 1.9492600040550114e-05,
"loss": 3.5625,
"step": 210
},
{
"epoch": 1.29,
"learning_rate": 1.948633205080292e-05,
"loss": 3.5703,
"step": 211
},
{
"epoch": 1.3,
"learning_rate": 1.948002660395276e-05,
"loss": 3.7461,
"step": 212
},
{
"epoch": 1.31,
"learning_rate": 1.9473683724896898e-05,
"loss": 3.7148,
"step": 213
},
{
"epoch": 1.31,
"learning_rate": 1.9467303438680414e-05,
"loss": 3.5039,
"step": 214
},
{
"epoch": 1.32,
"learning_rate": 1.946088577049608e-05,
"loss": 3.5273,
"step": 215
},
{
"epoch": 1.33,
"learning_rate": 1.9454430745684276e-05,
"loss": 3.7188,
"step": 216
},
{
"epoch": 1.33,
"learning_rate": 1.944793838973289e-05,
"loss": 3.5586,
"step": 217
},
{
"epoch": 1.34,
"learning_rate": 1.94414087282772e-05,
"loss": 3.6602,
"step": 218
},
{
"epoch": 1.34,
"learning_rate": 1.9434841787099804e-05,
"loss": 3.3633,
"step": 219
},
{
"epoch": 1.35,
"learning_rate": 1.9428237592130487e-05,
"loss": 3.2969,
"step": 220
},
{
"epoch": 1.36,
"learning_rate": 1.9421596169446135e-05,
"loss": 3.7031,
"step": 221
},
{
"epoch": 1.36,
"learning_rate": 1.941491754527064e-05,
"loss": 3.4375,
"step": 222
},
{
"epoch": 1.37,
"learning_rate": 1.940820174597476e-05,
"loss": 3.6016,
"step": 223
},
{
"epoch": 1.37,
"learning_rate": 1.9401448798076064e-05,
"loss": 3.6406,
"step": 224
},
{
"epoch": 1.38,
"learning_rate": 1.9394658728238797e-05,
"loss": 3.5273,
"step": 225
},
{
"epoch": 1.39,
"learning_rate": 1.9387831563273775e-05,
"loss": 3.4336,
"step": 226
},
{
"epoch": 1.39,
"learning_rate": 1.938096733013829e-05,
"loss": 3.4141,
"step": 227
},
{
"epoch": 1.4,
"learning_rate": 1.9374066055936004e-05,
"loss": 3.6797,
"step": 228
},
{
"epoch": 1.4,
"learning_rate": 1.9367127767916828e-05,
"loss": 3.6953,
"step": 229
},
{
"epoch": 1.41,
"learning_rate": 1.9360152493476828e-05,
"loss": 3.6797,
"step": 230
},
{
"epoch": 1.42,
"learning_rate": 1.9353140260158108e-05,
"loss": 3.5938,
"step": 231
},
{
"epoch": 1.42,
"learning_rate": 1.9346091095648712e-05,
"loss": 3.9492,
"step": 232
},
{
"epoch": 1.43,
"learning_rate": 1.93390050277825e-05,
"loss": 3.6172,
"step": 233
},
{
"epoch": 1.44,
"learning_rate": 1.9331882084539056e-05,
"loss": 3.5977,
"step": 234
},
{
"epoch": 1.44,
"learning_rate": 1.932472229404356e-05,
"loss": 3.5703,
"step": 235
},
{
"epoch": 1.45,
"learning_rate": 1.9317525684566686e-05,
"loss": 3.4336,
"step": 236
},
{
"epoch": 1.45,
"learning_rate": 1.931029228452449e-05,
"loss": 3.5508,
"step": 237
},
{
"epoch": 1.46,
"learning_rate": 1.9303022122478303e-05,
"loss": 3.7188,
"step": 238
},
{
"epoch": 1.47,
"learning_rate": 1.9295715227134595e-05,
"loss": 3.4766,
"step": 239
},
{
"epoch": 1.47,
"learning_rate": 1.9288371627344894e-05,
"loss": 3.6484,
"step": 240
},
{
"epoch": 1.48,
"learning_rate": 1.9280991352105656e-05,
"loss": 3.5703,
"step": 241
},
{
"epoch": 1.48,
"learning_rate": 1.9273574430558143e-05,
"loss": 3.4336,
"step": 242
},
{
"epoch": 1.49,
"learning_rate": 1.9266120891988326e-05,
"loss": 3.5469,
"step": 243
},
{
"epoch": 1.5,
"learning_rate": 1.925863076582674e-05,
"loss": 3.2812,
"step": 244
},
{
"epoch": 1.5,
"learning_rate": 1.9251104081648423e-05,
"loss": 3.4102,
"step": 245
},
{
"epoch": 1.51,
"learning_rate": 1.9243540869172724e-05,
"loss": 3.332,
"step": 246
},
{
"epoch": 1.52,
"learning_rate": 1.9235941158263253e-05,
"loss": 3.5039,
"step": 247
},
{
"epoch": 1.52,
"learning_rate": 1.922830497892772e-05,
"loss": 3.4883,
"step": 248
},
{
"epoch": 1.53,
"learning_rate": 1.9220632361317843e-05,
"loss": 3.5664,
"step": 249
},
{
"epoch": 1.53,
"learning_rate": 1.9212923335729206e-05,
"loss": 3.5195,
"step": 250
},
{
"epoch": 1.54,
"learning_rate": 1.920517793260116e-05,
"loss": 3.4531,
"step": 251
},
{
"epoch": 1.55,
"learning_rate": 1.9197396182516694e-05,
"loss": 3.7734,
"step": 252
},
{
"epoch": 1.55,
"learning_rate": 1.918957811620231e-05,
"loss": 3.6953,
"step": 253
},
{
"epoch": 1.56,
"learning_rate": 1.9181723764527902e-05,
"loss": 3.6133,
"step": 254
},
{
"epoch": 1.56,
"learning_rate": 1.917383315850665e-05,
"loss": 3.5391,
"step": 255
},
{
"epoch": 1.57,
"learning_rate": 1.9165906329294875e-05,
"loss": 3.5898,
"step": 256
},
{
"epoch": 1.58,
"learning_rate": 1.9157943308191934e-05,
"loss": 3.7188,
"step": 257
},
{
"epoch": 1.58,
"learning_rate": 1.914994412664008e-05,
"loss": 3.8125,
"step": 258
},
{
"epoch": 1.59,
"learning_rate": 1.9141908816224356e-05,
"loss": 3.875,
"step": 259
},
{
"epoch": 1.6,
"learning_rate": 1.9133837408672456e-05,
"loss": 3.4102,
"step": 260
},
{
"epoch": 1.6,
"learning_rate": 1.9125729935854606e-05,
"loss": 3.2344,
"step": 261
},
{
"epoch": 1.61,
"learning_rate": 1.9117586429783433e-05,
"loss": 3.7656,
"step": 262
},
{
"epoch": 1.61,
"learning_rate": 1.910940692261385e-05,
"loss": 3.6992,
"step": 263
},
{
"epoch": 1.62,
"learning_rate": 1.9101191446642917e-05,
"loss": 3.4766,
"step": 264
},
{
"epoch": 1.63,
"learning_rate": 1.909294003430972e-05,
"loss": 3.1211,
"step": 265
},
{
"epoch": 1.63,
"learning_rate": 1.9084652718195237e-05,
"loss": 3.4102,
"step": 266
},
{
"epoch": 1.64,
"learning_rate": 1.907632953102222e-05,
"loss": 3.6602,
"step": 267
},
{
"epoch": 1.64,
"learning_rate": 1.906797050565505e-05,
"loss": 3.6836,
"step": 268
},
{
"epoch": 1.65,
"learning_rate": 1.9059575675099622e-05,
"loss": 3.582,
"step": 269
},
{
"epoch": 1.66,
"learning_rate": 1.9051145072503216e-05,
"loss": 3.6172,
"step": 270
},
{
"epoch": 1.66,
"learning_rate": 1.9042678731154337e-05,
"loss": 3.457,
"step": 271
},
{
"epoch": 1.67,
"learning_rate": 1.9034176684482638e-05,
"loss": 3.3398,
"step": 272
},
{
"epoch": 1.67,
"learning_rate": 1.9025638966058722e-05,
"loss": 3.4883,
"step": 273
},
{
"epoch": 1.68,
"learning_rate": 1.901706560959407e-05,
"loss": 3.6602,
"step": 274
},
{
"epoch": 1.69,
"learning_rate": 1.900845664894086e-05,
"loss": 3.6797,
"step": 275
},
{
"epoch": 1.69,
"learning_rate": 1.8999812118091877e-05,
"loss": 3.4766,
"step": 276
},
{
"epoch": 1.7,
"learning_rate": 1.8991132051180332e-05,
"loss": 3.3945,
"step": 277
},
{
"epoch": 1.71,
"learning_rate": 1.898241648247977e-05,
"loss": 3.2461,
"step": 278
},
{
"epoch": 1.71,
"learning_rate": 1.8973665446403902e-05,
"loss": 3.4023,
"step": 279
},
{
"epoch": 1.72,
"learning_rate": 1.8964878977506496e-05,
"loss": 3.4492,
"step": 280
},
{
"epoch": 1.72,
"learning_rate": 1.895605711048122e-05,
"loss": 3.5,
"step": 281
},
{
"epoch": 1.73,
"learning_rate": 1.8947199880161515e-05,
"loss": 3.4531,
"step": 282
},
{
"epoch": 1.74,
"learning_rate": 1.8938307321520453e-05,
"loss": 3.6523,
"step": 283
},
{
"epoch": 1.74,
"learning_rate": 1.89293794696706e-05,
"loss": 3.6445,
"step": 284
},
{
"epoch": 1.75,
"learning_rate": 1.8920416359863885e-05,
"loss": 3.3711,
"step": 285
},
{
"epoch": 1.75,
"learning_rate": 1.8911418027491453e-05,
"loss": 3.4414,
"step": 286
},
{
"epoch": 1.76,
"learning_rate": 1.8902384508083518e-05,
"loss": 3.2656,
"step": 287
},
{
"epoch": 1.77,
"learning_rate": 1.8893315837309235e-05,
"loss": 3.6289,
"step": 288
},
{
"epoch": 1.77,
"learning_rate": 1.8884212050976568e-05,
"loss": 3.4023,
"step": 289
},
{
"epoch": 1.78,
"learning_rate": 1.8875073185032116e-05,
"loss": 3.6914,
"step": 290
},
{
"epoch": 1.79,
"learning_rate": 1.8865899275561003e-05,
"loss": 3.3281,
"step": 291
},
{
"epoch": 1.79,
"learning_rate": 1.885669035878672e-05,
"loss": 3.7227,
"step": 292
},
{
"epoch": 1.8,
"learning_rate": 1.8847446471070985e-05,
"loss": 3.2891,
"step": 293
},
{
"epoch": 1.8,
"learning_rate": 1.8838167648913606e-05,
"loss": 3.4844,
"step": 294
},
{
"epoch": 1.81,
"learning_rate": 1.882885392895232e-05,
"loss": 3.7617,
"step": 295
},
{
"epoch": 1.82,
"learning_rate": 1.881950534796267e-05,
"loss": 3.3945,
"step": 296
},
{
"epoch": 1.82,
"learning_rate": 1.8810121942857848e-05,
"loss": 3.5547,
"step": 297
},
{
"epoch": 1.83,
"learning_rate": 1.8800703750688536e-05,
"loss": 3.6484,
"step": 298
},
{
"epoch": 1.83,
"learning_rate": 1.8791250808642792e-05,
"loss": 3.668,
"step": 299
},
{
"epoch": 1.84,
"learning_rate": 1.8781763154045873e-05,
"loss": 3.5664,
"step": 300
},
{
"epoch": 1.85,
"learning_rate": 1.877224082436011e-05,
"loss": 3.2695,
"step": 301
},
{
"epoch": 1.85,
"learning_rate": 1.8762683857184738e-05,
"loss": 3.5781,
"step": 302
},
{
"epoch": 1.86,
"learning_rate": 1.8753092290255765e-05,
"loss": 3.8359,
"step": 303
},
{
"epoch": 1.87,
"learning_rate": 1.8743466161445823e-05,
"loss": 3.3242,
"step": 304
},
{
"epoch": 1.87,
"learning_rate": 1.8733805508764e-05,
"loss": 3.3086,
"step": 305
},
{
"epoch": 1.88,
"learning_rate": 1.872411037035572e-05,
"loss": 3.4531,
"step": 306
},
{
"epoch": 1.88,
"learning_rate": 1.8714380784502553e-05,
"loss": 3.5586,
"step": 307
},
{
"epoch": 1.89,
"learning_rate": 1.870461678962211e-05,
"loss": 3.6797,
"step": 308
},
{
"epoch": 1.9,
"learning_rate": 1.869481842426784e-05,
"loss": 3.4609,
"step": 309
},
{
"epoch": 1.9,
"learning_rate": 1.8684985727128936e-05,
"loss": 3.6289,
"step": 310
},
{
"epoch": 1.91,
"learning_rate": 1.8675118737030123e-05,
"loss": 3.4844,
"step": 311
},
{
"epoch": 1.91,
"learning_rate": 1.866521749293155e-05,
"loss": 3.7461,
"step": 312
},
{
"epoch": 1.92,
"learning_rate": 1.8655282033928618e-05,
"loss": 3.2852,
"step": 313
},
{
"epoch": 1.93,
"learning_rate": 1.8645312399251818e-05,
"loss": 3.6875,
"step": 314
},
{
"epoch": 1.93,
"learning_rate": 1.8635308628266586e-05,
"loss": 3.2266,
"step": 315
},
{
"epoch": 1.94,
"learning_rate": 1.8625270760473164e-05,
"loss": 3.5977,
"step": 316
},
{
"epoch": 1.94,
"learning_rate": 1.8615198835506393e-05,
"loss": 3.6133,
"step": 317
},
{
"epoch": 1.95,
"learning_rate": 1.8605092893135626e-05,
"loss": 3.6172,
"step": 318
},
{
"epoch": 1.96,
"learning_rate": 1.8594952973264512e-05,
"loss": 3.4766,
"step": 319
},
{
"epoch": 1.96,
"learning_rate": 1.8584779115930866e-05,
"loss": 3.4766,
"step": 320
},
{
"epoch": 1.97,
"learning_rate": 1.857457136130651e-05,
"loss": 3.6875,
"step": 321
},
{
"epoch": 1.98,
"learning_rate": 1.856432974969711e-05,
"loss": 3.3359,
"step": 322
},
{
"epoch": 1.98,
"learning_rate": 1.855405432154201e-05,
"loss": 3.5,
"step": 323
},
{
"epoch": 1.99,
"learning_rate": 1.8543745117414094e-05,
"loss": 3.5547,
"step": 324
},
{
"epoch": 1.99,
"learning_rate": 1.8533402178019596e-05,
"loss": 3.1367,
"step": 325
},
{
"epoch": 2.0,
"learning_rate": 1.8523025544197964e-05,
"loss": 3.4141,
"step": 326
},
{
"epoch": 2.01,
"learning_rate": 1.8512615256921692e-05,
"loss": 3.0078,
"step": 327
},
{
"epoch": 2.01,
"learning_rate": 1.8502171357296144e-05,
"loss": 3.0586,
"step": 328
},
{
"epoch": 2.02,
"learning_rate": 1.8491693886559413e-05,
"loss": 3.1953,
"step": 329
},
{
"epoch": 2.02,
"learning_rate": 1.848118288608215e-05,
"loss": 3.0625,
"step": 330
},
{
"epoch": 2.03,
"learning_rate": 1.8470638397367397e-05,
"loss": 3.25,
"step": 331
},
{
"epoch": 2.04,
"learning_rate": 1.846006046205042e-05,
"loss": 3.2422,
"step": 332
},
{
"epoch": 2.04,
"learning_rate": 1.8449449121898552e-05,
"loss": 2.9258,
"step": 333
},
{
"epoch": 2.05,
"learning_rate": 1.8438804418811038e-05,
"loss": 2.9883,
"step": 334
},
{
"epoch": 2.06,
"learning_rate": 1.842812639481884e-05,
"loss": 3.3203,
"step": 335
},
{
"epoch": 2.06,
"learning_rate": 1.84174150920845e-05,
"loss": 3.0195,
"step": 336
},
{
"epoch": 2.07,
"learning_rate": 1.8406670552901958e-05,
"loss": 2.9375,
"step": 337
},
{
"epoch": 2.07,
"learning_rate": 1.839589281969639e-05,
"loss": 3.2578,
"step": 338
},
{
"epoch": 2.08,
"learning_rate": 1.8385081935024044e-05,
"loss": 3.0469,
"step": 339
},
{
"epoch": 2.09,
"learning_rate": 1.837423794157206e-05,
"loss": 3.1367,
"step": 340
},
{
"epoch": 2.09,
"learning_rate": 1.836336088215831e-05,
"loss": 3.0234,
"step": 341
},
{
"epoch": 2.1,
"learning_rate": 1.835245079973124e-05,
"loss": 2.8242,
"step": 342
},
{
"epoch": 2.1,
"learning_rate": 1.834150773736967e-05,
"loss": 2.9414,
"step": 343
},
{
"epoch": 2.11,
"learning_rate": 1.8330531738282656e-05,
"loss": 3.0742,
"step": 344
},
{
"epoch": 2.12,
"learning_rate": 1.8319522845809306e-05,
"loss": 3.0625,
"step": 345
},
{
"epoch": 2.12,
"learning_rate": 1.8308481103418597e-05,
"loss": 2.8828,
"step": 346
},
{
"epoch": 2.13,
"learning_rate": 1.8297406554709228e-05,
"loss": 3.1836,
"step": 347
},
{
"epoch": 2.13,
"learning_rate": 1.8286299243409424e-05,
"loss": 2.8086,
"step": 348
},
{
"epoch": 2.14,
"learning_rate": 1.8275159213376783e-05,
"loss": 2.9258,
"step": 349
},
{
"epoch": 2.15,
"learning_rate": 1.826398650859809e-05,
"loss": 3.0977,
"step": 350
},
{
"epoch": 2.15,
"learning_rate": 1.8252781173189148e-05,
"loss": 3.3086,
"step": 351
},
{
"epoch": 2.16,
"learning_rate": 1.82415432513946e-05,
"loss": 3.0117,
"step": 352
},
{
"epoch": 2.17,
"learning_rate": 1.823027278758776e-05,
"loss": 2.957,
"step": 353
},
{
"epoch": 2.17,
"learning_rate": 1.821896982627044e-05,
"loss": 3.2617,
"step": 354
},
{
"epoch": 2.18,
"learning_rate": 1.8207634412072765e-05,
"loss": 3.1172,
"step": 355
},
{
"epoch": 2.18,
"learning_rate": 1.8196266589753e-05,
"loss": 2.8867,
"step": 356
},
{
"epoch": 2.19,
"learning_rate": 1.818486640419737e-05,
"loss": 3.2539,
"step": 357
},
{
"epoch": 2.2,
"learning_rate": 1.81734339004199e-05,
"loss": 2.8633,
"step": 358
},
{
"epoch": 2.2,
"learning_rate": 1.816196912356222e-05,
"loss": 3.1016,
"step": 359
},
{
"epoch": 2.21,
"learning_rate": 1.8150472118893382e-05,
"loss": 3.0898,
"step": 360
},
{
"epoch": 2.21,
"learning_rate": 1.8138942931809702e-05,
"loss": 2.9453,
"step": 361
},
{
"epoch": 2.22,
"learning_rate": 1.8127381607834563e-05,
"loss": 3.2383,
"step": 362
},
{
"epoch": 2.23,
"learning_rate": 1.8115788192618247e-05,
"loss": 3.0703,
"step": 363
},
{
"epoch": 2.23,
"learning_rate": 1.8104162731937746e-05,
"loss": 3.0977,
"step": 364
},
{
"epoch": 2.24,
"learning_rate": 1.8092505271696582e-05,
"loss": 3.2344,
"step": 365
},
{
"epoch": 2.25,
"learning_rate": 1.808081585792463e-05,
"loss": 2.7617,
"step": 366
},
{
"epoch": 2.25,
"learning_rate": 1.8069094536777938e-05,
"loss": 3.0898,
"step": 367
},
{
"epoch": 2.26,
"learning_rate": 1.805734135453854e-05,
"loss": 3.0781,
"step": 368
},
{
"epoch": 2.26,
"learning_rate": 1.8045556357614273e-05,
"loss": 3.4922,
"step": 369
},
{
"epoch": 2.27,
"learning_rate": 1.8033739592538598e-05,
"loss": 3.1211,
"step": 370
},
{
"epoch": 2.28,
"learning_rate": 1.8021891105970405e-05,
"loss": 2.9453,
"step": 371
},
{
"epoch": 2.28,
"learning_rate": 1.8010010944693846e-05,
"loss": 3.1016,
"step": 372
},
{
"epoch": 2.29,
"learning_rate": 1.7998099155618147e-05,
"loss": 3.0117,
"step": 373
},
{
"epoch": 2.29,
"learning_rate": 1.7986155785777402e-05,
"loss": 3.1523,
"step": 374
},
{
"epoch": 2.3,
"learning_rate": 1.7974180882330413e-05,
"loss": 3.0352,
"step": 375
},
{
"epoch": 2.31,
"learning_rate": 1.7962174492560492e-05,
"loss": 2.8711,
"step": 376
},
{
"epoch": 2.31,
"learning_rate": 1.7950136663875274e-05,
"loss": 3.1953,
"step": 377
},
{
"epoch": 2.32,
"learning_rate": 1.7938067443806538e-05,
"loss": 3.2188,
"step": 378
},
{
"epoch": 2.33,
"learning_rate": 1.7925966880009998e-05,
"loss": 2.8203,
"step": 379
},
{
"epoch": 2.33,
"learning_rate": 1.791383502026515e-05,
"loss": 3.1172,
"step": 380
},
{
"epoch": 2.34,
"learning_rate": 1.790167191247504e-05,
"loss": 2.9414,
"step": 381
},
{
"epoch": 2.34,
"learning_rate": 1.7889477604666124e-05,
"loss": 2.8398,
"step": 382
},
{
"epoch": 2.35,
"learning_rate": 1.787725214498803e-05,
"loss": 3.1836,
"step": 383
},
{
"epoch": 2.36,
"learning_rate": 1.78649955817134e-05,
"loss": 3.0625,
"step": 384
},
{
"epoch": 2.36,
"learning_rate": 1.785270796323769e-05,
"loss": 2.8945,
"step": 385
},
{
"epoch": 2.37,
"learning_rate": 1.784038933807898e-05,
"loss": 2.9688,
"step": 386
},
{
"epoch": 2.37,
"learning_rate": 1.7828039754877778e-05,
"loss": 3.0352,
"step": 387
},
{
"epoch": 2.38,
"learning_rate": 1.7815659262396825e-05,
"loss": 3.0977,
"step": 388
},
{
"epoch": 2.39,
"learning_rate": 1.780324790952092e-05,
"loss": 3.1445,
"step": 389
},
{
"epoch": 2.39,
"learning_rate": 1.7790805745256703e-05,
"loss": 2.9766,
"step": 390
},
{
"epoch": 2.4,
"learning_rate": 1.7778332818732492e-05,
"loss": 3.0547,
"step": 391
},
{
"epoch": 2.4,
"learning_rate": 1.7765829179198048e-05,
"loss": 3.1758,
"step": 392
},
{
"epoch": 2.41,
"learning_rate": 1.7753294876024417e-05,
"loss": 3.0625,
"step": 393
},
{
"epoch": 2.42,
"learning_rate": 1.7740729958703725e-05,
"loss": 2.9297,
"step": 394
},
{
"epoch": 2.42,
"learning_rate": 1.7728134476848965e-05,
"loss": 3.0586,
"step": 395
},
{
"epoch": 2.43,
"learning_rate": 1.7715508480193832e-05,
"loss": 3.0039,
"step": 396
},
{
"epoch": 2.44,
"learning_rate": 1.7702852018592493e-05,
"loss": 2.8086,
"step": 397
},
{
"epoch": 2.44,
"learning_rate": 1.769016514201942e-05,
"loss": 2.9336,
"step": 398
},
{
"epoch": 2.45,
"learning_rate": 1.7677447900569166e-05,
"loss": 3.4219,
"step": 399
},
{
"epoch": 2.45,
"learning_rate": 1.7664700344456198e-05,
"loss": 3.0625,
"step": 400
},
{
"epoch": 2.46,
"learning_rate": 1.765192252401467e-05,
"loss": 3.2617,
"step": 401
},
{
"epoch": 2.47,
"learning_rate": 1.7639114489698238e-05,
"loss": 3.0977,
"step": 402
},
{
"epoch": 2.47,
"learning_rate": 1.762627629207986e-05,
"loss": 3.0703,
"step": 403
},
{
"epoch": 2.48,
"learning_rate": 1.7613407981851586e-05,
"loss": 3.0938,
"step": 404
},
{
"epoch": 2.48,
"learning_rate": 1.760050960982439e-05,
"loss": 3.3047,
"step": 405
},
{
"epoch": 2.49,
"learning_rate": 1.758758122692791e-05,
"loss": 2.8867,
"step": 406
},
{
"epoch": 2.5,
"learning_rate": 1.757462288421032e-05,
"loss": 3.2148,
"step": 407
},
{
"epoch": 2.5,
"learning_rate": 1.7561634632838062e-05,
"loss": 3.1172,
"step": 408
},
{
"epoch": 2.51,
"learning_rate": 1.7548616524095697e-05,
"loss": 2.9141,
"step": 409
},
{
"epoch": 2.52,
"learning_rate": 1.753556860938566e-05,
"loss": 3.0938,
"step": 410
},
{
"epoch": 2.52,
"learning_rate": 1.7522490940228086e-05,
"loss": 2.8672,
"step": 411
},
{
"epoch": 2.53,
"learning_rate": 1.7509383568260597e-05,
"loss": 3.1641,
"step": 412
},
{
"epoch": 2.53,
"learning_rate": 1.749624654523809e-05,
"loss": 2.9883,
"step": 413
},
{
"epoch": 2.54,
"learning_rate": 1.7483079923032543e-05,
"loss": 3.0898,
"step": 414
},
{
"epoch": 2.55,
"learning_rate": 1.7469883753632817e-05,
"loss": 3.0391,
"step": 415
},
{
"epoch": 2.55,
"learning_rate": 1.745665808914443e-05,
"loss": 3.1055,
"step": 416
},
{
"epoch": 2.56,
"learning_rate": 1.744340298178936e-05,
"loss": 3.0664,
"step": 417
},
{
"epoch": 2.56,
"learning_rate": 1.743011848390585e-05,
"loss": 2.8672,
"step": 418
},
{
"epoch": 2.57,
"learning_rate": 1.7416804647948194e-05,
"loss": 3.2891,
"step": 419
},
{
"epoch": 2.58,
"learning_rate": 1.740346152648652e-05,
"loss": 2.9805,
"step": 420
},
{
"epoch": 2.58,
"learning_rate": 1.7390089172206594e-05,
"loss": 2.7305,
"step": 421
},
{
"epoch": 2.59,
"learning_rate": 1.7376687637909607e-05,
"loss": 3.0547,
"step": 422
},
{
"epoch": 2.6,
"learning_rate": 1.7363256976511972e-05,
"loss": 2.7773,
"step": 423
},
{
"epoch": 2.6,
"learning_rate": 1.7349797241045115e-05,
"loss": 3.2188,
"step": 424
},
{
"epoch": 2.61,
"learning_rate": 1.733630848465525e-05,
"loss": 3.0156,
"step": 425
},
{
"epoch": 2.61,
"learning_rate": 1.732279076060319e-05,
"loss": 3.1328,
"step": 426
},
{
"epoch": 2.62,
"learning_rate": 1.730924412226413e-05,
"loss": 3.0664,
"step": 427
},
{
"epoch": 2.63,
"learning_rate": 1.729566862312742e-05,
"loss": 2.9102,
"step": 428
},
{
"epoch": 2.63,
"learning_rate": 1.7282064316796387e-05,
"loss": 3.0508,
"step": 429
},
{
"epoch": 2.64,
"learning_rate": 1.726843125698809e-05,
"loss": 2.8711,
"step": 430
},
{
"epoch": 2.64,
"learning_rate": 1.7254769497533128e-05,
"loss": 2.75,
"step": 431
},
{
"epoch": 2.65,
"learning_rate": 1.724107909237542e-05,
"loss": 2.8438,
"step": 432
},
{
"epoch": 2.66,
"learning_rate": 1.7227360095571992e-05,
"loss": 2.9883,
"step": 433
},
{
"epoch": 2.66,
"learning_rate": 1.721361256129277e-05,
"loss": 3.2461,
"step": 434
},
{
"epoch": 2.67,
"learning_rate": 1.719983654382036e-05,
"loss": 3.0781,
"step": 435
},
{
"epoch": 2.67,
"learning_rate": 1.7186032097549822e-05,
"loss": 3.1523,
"step": 436
},
{
"epoch": 2.68,
"learning_rate": 1.717219927698849e-05,
"loss": 2.832,
"step": 437
},
{
"epoch": 2.69,
"learning_rate": 1.7158338136755724e-05,
"loss": 3.2617,
"step": 438
},
{
"epoch": 2.69,
"learning_rate": 1.7144448731582698e-05,
"loss": 3.0781,
"step": 439
},
{
"epoch": 2.7,
"learning_rate": 1.7130531116312202e-05,
"loss": 3.1641,
"step": 440
},
{
"epoch": 2.71,
"learning_rate": 1.7116585345898413e-05,
"loss": 3.1484,
"step": 441
},
{
"epoch": 2.71,
"learning_rate": 1.7102611475406676e-05,
"loss": 3.2656,
"step": 442
},
{
"epoch": 2.72,
"learning_rate": 1.7088609560013284e-05,
"loss": 3.0938,
"step": 443
},
{
"epoch": 2.72,
"learning_rate": 1.7074579655005282e-05,
"loss": 2.9648,
"step": 444
},
{
"epoch": 2.73,
"learning_rate": 1.7060521815780225e-05,
"loss": 3.1328,
"step": 445
},
{
"epoch": 2.74,
"learning_rate": 1.704643609784596e-05,
"loss": 3.1211,
"step": 446
},
{
"epoch": 2.74,
"learning_rate": 1.7032322556820428e-05,
"loss": 3.1719,
"step": 447
},
{
"epoch": 2.75,
"learning_rate": 1.7018181248431416e-05,
"loss": 2.9883,
"step": 448
},
{
"epoch": 2.75,
"learning_rate": 1.700401222851636e-05,
"loss": 3.1172,
"step": 449
},
{
"epoch": 2.76,
"learning_rate": 1.698981555302212e-05,
"loss": 2.9531,
"step": 450
},
{
"epoch": 2.77,
"learning_rate": 1.6975591278004747e-05,
"loss": 2.9375,
"step": 451
},
{
"epoch": 2.77,
"learning_rate": 1.696133945962927e-05,
"loss": 3.1875,
"step": 452
},
{
"epoch": 2.78,
"learning_rate": 1.6947060154169473e-05,
"loss": 3.0742,
"step": 453
},
{
"epoch": 2.79,
"learning_rate": 1.6932753418007683e-05,
"loss": 3.0977,
"step": 454
},
{
"epoch": 2.79,
"learning_rate": 1.691841930763453e-05,
"loss": 2.9531,
"step": 455
},
{
"epoch": 2.8,
"learning_rate": 1.690405787964873e-05,
"loss": 2.9609,
"step": 456
},
{
"epoch": 2.8,
"learning_rate": 1.688966919075687e-05,
"loss": 2.7578,
"step": 457
},
{
"epoch": 2.81,
"learning_rate": 1.687525329777317e-05,
"loss": 2.9961,
"step": 458
},
{
"epoch": 2.82,
"learning_rate": 1.686081025761928e-05,
"loss": 3.3203,
"step": 459
},
{
"epoch": 2.82,
"learning_rate": 1.684634012732403e-05,
"loss": 2.9258,
"step": 460
},
{
"epoch": 2.83,
"learning_rate": 1.6831842964023212e-05,
"loss": 3.1445,
"step": 461
},
{
"epoch": 2.83,
"learning_rate": 1.6817318824959375e-05,
"loss": 3.2617,
"step": 462
},
{
"epoch": 2.84,
"learning_rate": 1.680276776748157e-05,
"loss": 2.9883,
"step": 463
},
{
"epoch": 2.85,
"learning_rate": 1.6788189849045135e-05,
"loss": 2.9219,
"step": 464
},
{
"epoch": 2.85,
"learning_rate": 1.6773585127211478e-05,
"loss": 2.8281,
"step": 465
},
{
"epoch": 2.86,
"learning_rate": 1.6758953659647838e-05,
"loss": 3.0312,
"step": 466
},
{
"epoch": 2.87,
"learning_rate": 1.6744295504127055e-05,
"loss": 3.2461,
"step": 467
},
{
"epoch": 2.87,
"learning_rate": 1.6729610718527357e-05,
"loss": 3.1562,
"step": 468
},
{
"epoch": 2.88,
"learning_rate": 1.6714899360832118e-05,
"loss": 2.9023,
"step": 469
},
{
"epoch": 2.88,
"learning_rate": 1.6700161489129624e-05,
"loss": 3.0898,
"step": 470
},
{
"epoch": 2.89,
"learning_rate": 1.668539716161287e-05,
"loss": 2.9414,
"step": 471
},
{
"epoch": 2.9,
"learning_rate": 1.667060643657929e-05,
"loss": 2.9844,
"step": 472
},
{
"epoch": 2.9,
"learning_rate": 1.6655789372430572e-05,
"loss": 3.0859,
"step": 473
},
{
"epoch": 2.91,
"learning_rate": 1.6640946027672395e-05,
"loss": 3.1758,
"step": 474
},
{
"epoch": 2.91,
"learning_rate": 1.66260764609142e-05,
"loss": 3.1719,
"step": 475
},
{
"epoch": 2.92,
"learning_rate": 1.6611180730868975e-05,
"loss": 3.0508,
"step": 476
},
{
"epoch": 2.93,
"learning_rate": 1.6596258896353027e-05,
"loss": 3.1406,
"step": 477
},
{
"epoch": 2.93,
"learning_rate": 1.658131101628571e-05,
"loss": 3.1836,
"step": 478
},
{
"epoch": 2.94,
"learning_rate": 1.656633714968924e-05,
"loss": 3.0352,
"step": 479
},
{
"epoch": 2.94,
"learning_rate": 1.6551337355688437e-05,
"loss": 2.8789,
"step": 480
},
{
"epoch": 2.95,
"learning_rate": 1.653631169351049e-05,
"loss": 3.1094,
"step": 481
},
{
"epoch": 2.96,
"learning_rate": 1.6521260222484738e-05,
"loss": 3.4102,
"step": 482
},
{
"epoch": 2.96,
"learning_rate": 1.650618300204242e-05,
"loss": 3.293,
"step": 483
},
{
"epoch": 2.97,
"learning_rate": 1.6491080091716457e-05,
"loss": 2.9922,
"step": 484
},
{
"epoch": 2.98,
"learning_rate": 1.64759515511412e-05,
"loss": 3.082,
"step": 485
},
{
"epoch": 2.98,
"learning_rate": 1.6460797440052195e-05,
"loss": 2.9297,
"step": 486
},
{
"epoch": 2.99,
"learning_rate": 1.6445617818285974e-05,
"loss": 2.8906,
"step": 487
},
{
"epoch": 2.99,
"learning_rate": 1.643041274577978e-05,
"loss": 3.0625,
"step": 488
},
{
"epoch": 3.0,
"learning_rate": 1.6415182282571356e-05,
"loss": 3.1562,
"step": 489
},
{
"epoch": 3.01,
"learning_rate": 1.6399926488798702e-05,
"loss": 2.6367,
"step": 490
},
{
"epoch": 3.01,
"learning_rate": 1.6384645424699835e-05,
"loss": 2.207,
"step": 491
},
{
"epoch": 3.02,
"learning_rate": 1.6369339150612557e-05,
"loss": 2.4844,
"step": 492
},
{
"epoch": 3.02,
"learning_rate": 1.6354007726974205e-05,
"loss": 2.4219,
"step": 493
},
{
"epoch": 3.03,
"learning_rate": 1.6338651214321426e-05,
"loss": 2.4531,
"step": 494
},
{
"epoch": 3.04,
"learning_rate": 1.632326967328993e-05,
"loss": 2.4961,
"step": 495
},
{
"epoch": 3.04,
"learning_rate": 1.630786316461425e-05,
"loss": 2.4219,
"step": 496
},
{
"epoch": 3.05,
"learning_rate": 1.6292431749127507e-05,
"loss": 2.5273,
"step": 497
},
{
"epoch": 3.06,
"learning_rate": 1.627697548776117e-05,
"loss": 2.4492,
"step": 498
},
{
"epoch": 3.06,
"learning_rate": 1.6261494441544805e-05,
"loss": 2.4922,
"step": 499
},
{
"epoch": 3.07,
"learning_rate": 1.624598867160585e-05,
"loss": 2.375,
"step": 500
},
{
"epoch": 3.07,
"learning_rate": 1.623045823916936e-05,
"loss": 2.6914,
"step": 501
},
{
"epoch": 3.08,
"learning_rate": 1.6214903205557774e-05,
"loss": 2.4141,
"step": 502
},
{
"epoch": 3.09,
"learning_rate": 1.619932363219067e-05,
"loss": 2.5742,
"step": 503
},
{
"epoch": 3.09,
"learning_rate": 1.6183719580584515e-05,
"loss": 2.332,
"step": 504
},
{
"epoch": 3.1,
"learning_rate": 1.6168091112352443e-05,
"loss": 2.4727,
"step": 505
},
{
"epoch": 3.1,
"learning_rate": 1.6152438289203982e-05,
"loss": 2.5352,
"step": 506
},
{
"epoch": 3.11,
"learning_rate": 1.6136761172944837e-05,
"loss": 2.4375,
"step": 507
},
{
"epoch": 3.12,
"learning_rate": 1.612105982547663e-05,
"loss": 2.543,
"step": 508
},
{
"epoch": 3.12,
"learning_rate": 1.6105334308796665e-05,
"loss": 2.3945,
"step": 509
},
{
"epoch": 3.13,
"learning_rate": 1.6089584684997674e-05,
"loss": 2.4531,
"step": 510
},
{
"epoch": 3.13,
"learning_rate": 1.607381101626758e-05,
"loss": 2.5781,
"step": 511
},
{
"epoch": 3.14,
"learning_rate": 1.6058013364889247e-05,
"loss": 2.2852,
"step": 512
},
{
"epoch": 3.15,
"learning_rate": 1.6042191793240242e-05,
"loss": 2.293,
"step": 513
},
{
"epoch": 3.15,
"learning_rate": 1.6026346363792565e-05,
"loss": 2.5156,
"step": 514
},
{
"epoch": 3.16,
"learning_rate": 1.6010477139112438e-05,
"loss": 2.3711,
"step": 515
},
{
"epoch": 3.17,
"learning_rate": 1.5994584181860028e-05,
"loss": 2.2891,
"step": 516
},
{
"epoch": 3.17,
"learning_rate": 1.5978667554789216e-05,
"loss": 2.3867,
"step": 517
},
{
"epoch": 3.18,
"learning_rate": 1.596272732074734e-05,
"loss": 2.457,
"step": 518
},
{
"epoch": 3.18,
"learning_rate": 1.5946763542674958e-05,
"loss": 2.293,
"step": 519
},
{
"epoch": 3.19,
"learning_rate": 1.5930776283605585e-05,
"loss": 2.4492,
"step": 520
},
{
"epoch": 3.2,
"learning_rate": 1.5914765606665454e-05,
"loss": 2.2383,
"step": 521
},
{
"epoch": 3.2,
"learning_rate": 1.5898731575073262e-05,
"loss": 2.3281,
"step": 522
},
{
"epoch": 3.21,
"learning_rate": 1.5882674252139928e-05,
"loss": 2.4688,
"step": 523
},
{
"epoch": 3.21,
"learning_rate": 1.5866593701268334e-05,
"loss": 2.3125,
"step": 524
},
{
"epoch": 3.22,
"learning_rate": 1.5850489985953076e-05,
"loss": 2.3672,
"step": 525
},
{
"epoch": 3.23,
"learning_rate": 1.5834363169780227e-05,
"loss": 2.4688,
"step": 526
},
{
"epoch": 3.23,
"learning_rate": 1.5818213316427056e-05,
"loss": 2.375,
"step": 527
},
{
"epoch": 3.24,
"learning_rate": 1.5802040489661817e-05,
"loss": 2.418,
"step": 528
},
{
"epoch": 3.25,
"learning_rate": 1.578584475334345e-05,
"loss": 2.3867,
"step": 529
},
{
"epoch": 3.25,
"learning_rate": 1.5769626171421376e-05,
"loss": 2.2852,
"step": 530
},
{
"epoch": 3.26,
"learning_rate": 1.5753384807935214e-05,
"loss": 2.5234,
"step": 531
},
{
"epoch": 3.26,
"learning_rate": 1.5737120727014535e-05,
"loss": 2.3828,
"step": 532
},
{
"epoch": 3.27,
"learning_rate": 1.572083399287861e-05,
"loss": 2.4023,
"step": 533
},
{
"epoch": 3.28,
"learning_rate": 1.570452466983617e-05,
"loss": 2.4961,
"step": 534
},
{
"epoch": 3.28,
"learning_rate": 1.5688192822285116e-05,
"loss": 2.5234,
"step": 535
},
{
"epoch": 3.29,
"learning_rate": 1.567183851471231e-05,
"loss": 2.418,
"step": 536
},
{
"epoch": 3.29,
"learning_rate": 1.565546181169328e-05,
"loss": 2.3555,
"step": 537
},
{
"epoch": 3.3,
"learning_rate": 1.5639062777892e-05,
"loss": 2.4883,
"step": 538
},
{
"epoch": 3.31,
"learning_rate": 1.5622641478060602e-05,
"loss": 2.5586,
"step": 539
},
{
"epoch": 3.31,
"learning_rate": 1.5606197977039154e-05,
"loss": 2.3359,
"step": 540
},
{
"epoch": 3.32,
"learning_rate": 1.5589732339755362e-05,
"loss": 2.3398,
"step": 541
},
{
"epoch": 3.33,
"learning_rate": 1.5573244631224364e-05,
"loss": 2.2969,
"step": 542
},
{
"epoch": 3.33,
"learning_rate": 1.5556734916548432e-05,
"loss": 2.375,
"step": 543
},
{
"epoch": 3.34,
"learning_rate": 1.5540203260916728e-05,
"loss": 2.3398,
"step": 544
},
{
"epoch": 3.34,
"learning_rate": 1.552364972960506e-05,
"loss": 2.3516,
"step": 545
},
{
"epoch": 3.35,
"learning_rate": 1.5507074387975603e-05,
"loss": 2.4805,
"step": 546
},
{
"epoch": 3.36,
"learning_rate": 1.5490477301476648e-05,
"loss": 2.4766,
"step": 547
},
{
"epoch": 3.36,
"learning_rate": 1.5473858535642365e-05,
"loss": 2.4062,
"step": 548
},
{
"epoch": 3.37,
"learning_rate": 1.5457218156092503e-05,
"loss": 2.4727,
"step": 549
},
{
"epoch": 3.37,
"learning_rate": 1.5440556228532168e-05,
"loss": 2.3672,
"step": 550
},
{
"epoch": 3.38,
"learning_rate": 1.5423872818751544e-05,
"loss": 2.5195,
"step": 551
},
{
"epoch": 3.39,
"learning_rate": 1.5407167992625636e-05,
"loss": 2.418,
"step": 552
},
{
"epoch": 3.39,
"learning_rate": 1.5390441816114022e-05,
"loss": 2.3828,
"step": 553
},
{
"epoch": 3.4,
"learning_rate": 1.5373694355260565e-05,
"loss": 2.4336,
"step": 554
},
{
"epoch": 3.4,
"learning_rate": 1.5356925676193192e-05,
"loss": 2.3086,
"step": 555
},
{
"epoch": 3.41,
"learning_rate": 1.534013584512359e-05,
"loss": 2.25,
"step": 556
},
{
"epoch": 3.42,
"learning_rate": 1.5323324928346984e-05,
"loss": 2.3242,
"step": 557
},
{
"epoch": 3.42,
"learning_rate": 1.5306492992241836e-05,
"loss": 2.4023,
"step": 558
},
{
"epoch": 3.43,
"learning_rate": 1.5289640103269626e-05,
"loss": 2.4531,
"step": 559
},
{
"epoch": 3.44,
"learning_rate": 1.527276632797455e-05,
"loss": 2.3945,
"step": 560
},
{
"epoch": 3.44,
"learning_rate": 1.5255871732983284e-05,
"loss": 2.4258,
"step": 561
},
{
"epoch": 3.45,
"learning_rate": 1.5238956385004703e-05,
"loss": 2.4766,
"step": 562
},
{
"epoch": 3.45,
"learning_rate": 1.5222020350829636e-05,
"loss": 2.4141,
"step": 563
},
{
"epoch": 3.46,
"learning_rate": 1.5205063697330582e-05,
"loss": 2.3359,
"step": 564
},
{
"epoch": 3.47,
"learning_rate": 1.5188086491461467e-05,
"loss": 2.3047,
"step": 565
},
{
"epoch": 3.47,
"learning_rate": 1.5171088800257354e-05,
"loss": 2.5508,
"step": 566
},
{
"epoch": 3.48,
"learning_rate": 1.5154070690834211e-05,
"loss": 2.0957,
"step": 567
},
{
"epoch": 3.48,
"learning_rate": 1.5137032230388613e-05,
"loss": 2.4102,
"step": 568
},
{
"epoch": 3.49,
"learning_rate": 1.5119973486197497e-05,
"loss": 2.5352,
"step": 569
},
{
"epoch": 3.5,
"learning_rate": 1.5102894525617892e-05,
"loss": 2.25,
"step": 570
},
{
"epoch": 3.5,
"learning_rate": 1.5085795416086655e-05,
"loss": 2.3047,
"step": 571
},
{
"epoch": 3.51,
"learning_rate": 1.5068676225120196e-05,
"loss": 2.3359,
"step": 572
},
{
"epoch": 3.52,
"learning_rate": 1.5051537020314218e-05,
"loss": 2.5508,
"step": 573
},
{
"epoch": 3.52,
"learning_rate": 1.5034377869343453e-05,
"loss": 2.6211,
"step": 574
},
{
"epoch": 3.53,
"learning_rate": 1.5017198839961388e-05,
"loss": 2.5625,
"step": 575
},
{
"epoch": 3.53,
"learning_rate": 1.5000000000000002e-05,
"loss": 2.293,
"step": 576
},
{
"epoch": 3.54,
"learning_rate": 1.4982781417369496e-05,
"loss": 2.5078,
"step": 577
},
{
"epoch": 3.55,
"learning_rate": 1.4965543160058028e-05,
"loss": 2.3594,
"step": 578
},
{
"epoch": 3.55,
"learning_rate": 1.4948285296131435e-05,
"loss": 2.4531,
"step": 579
},
{
"epoch": 3.56,
"learning_rate": 1.4931007893732981e-05,
"loss": 2.4961,
"step": 580
},
{
"epoch": 3.56,
"learning_rate": 1.4913711021083071e-05,
"loss": 2.3672,
"step": 581
},
{
"epoch": 3.57,
"learning_rate": 1.4896394746478995e-05,
"loss": 2.5469,
"step": 582
},
{
"epoch": 3.58,
"learning_rate": 1.4879059138294647e-05,
"loss": 2.5703,
"step": 583
},
{
"epoch": 3.58,
"learning_rate": 1.4861704264980264e-05,
"loss": 2.5859,
"step": 584
},
{
"epoch": 3.59,
"learning_rate": 1.4844330195062145e-05,
"loss": 2.4648,
"step": 585
},
{
"epoch": 3.6,
"learning_rate": 1.4826936997142399e-05,
"loss": 2.4883,
"step": 586
},
{
"epoch": 3.6,
"learning_rate": 1.4809524739898651e-05,
"loss": 2.2656,
"step": 587
},
{
"epoch": 3.61,
"learning_rate": 1.4792093492083792e-05,
"loss": 2.2734,
"step": 588
},
{
"epoch": 3.61,
"learning_rate": 1.4774643322525691e-05,
"loss": 2.5156,
"step": 589
},
{
"epoch": 3.62,
"learning_rate": 1.4757174300126935e-05,
"loss": 2.6797,
"step": 590
},
{
"epoch": 3.63,
"learning_rate": 1.473968649386455e-05,
"loss": 2.3398,
"step": 591
},
{
"epoch": 3.63,
"learning_rate": 1.4722179972789725e-05,
"loss": 2.2539,
"step": 592
},
{
"epoch": 3.64,
"learning_rate": 1.4704654806027558e-05,
"loss": 2.5781,
"step": 593
},
{
"epoch": 3.64,
"learning_rate": 1.4687111062776758e-05,
"loss": 2.5352,
"step": 594
},
{
"epoch": 3.65,
"learning_rate": 1.466954881230939e-05,
"loss": 2.5195,
"step": 595
},
{
"epoch": 3.66,
"learning_rate": 1.4651968123970592e-05,
"loss": 2.3945,
"step": 596
},
{
"epoch": 3.66,
"learning_rate": 1.4634369067178312e-05,
"loss": 2.4922,
"step": 597
},
{
"epoch": 3.67,
"learning_rate": 1.4616751711423016e-05,
"loss": 2.4922,
"step": 598
},
{
"epoch": 3.67,
"learning_rate": 1.4599116126267431e-05,
"loss": 2.4961,
"step": 599
},
{
"epoch": 3.68,
"learning_rate": 1.4581462381346261e-05,
"loss": 2.4922,
"step": 600
},
{
"epoch": 3.69,
"learning_rate": 1.4563790546365914e-05,
"loss": 2.5,
"step": 601
},
{
"epoch": 3.69,
"learning_rate": 1.454610069110423e-05,
"loss": 2.4219,
"step": 602
},
{
"epoch": 3.7,
"learning_rate": 1.45283928854102e-05,
"loss": 2.418,
"step": 603
},
{
"epoch": 3.71,
"learning_rate": 1.4510667199203697e-05,
"loss": 2.5488,
"step": 604
},
{
"epoch": 3.71,
"learning_rate": 1.4492923702475183e-05,
"loss": 2.5312,
"step": 605
},
{
"epoch": 3.72,
"learning_rate": 1.4475162465285463e-05,
"loss": 2.5273,
"step": 606
},
{
"epoch": 3.72,
"learning_rate": 1.4457383557765385e-05,
"loss": 2.4141,
"step": 607
},
{
"epoch": 3.73,
"learning_rate": 1.443958705011556e-05,
"loss": 2.4453,
"step": 608
},
{
"epoch": 3.74,
"learning_rate": 1.4421773012606104e-05,
"loss": 2.293,
"step": 609
},
{
"epoch": 3.74,
"learning_rate": 1.4403941515576344e-05,
"loss": 2.4258,
"step": 610
},
{
"epoch": 3.75,
"learning_rate": 1.4386092629434551e-05,
"loss": 2.4648,
"step": 611
},
{
"epoch": 3.75,
"learning_rate": 1.4368226424657661e-05,
"loss": 2.3438,
"step": 612
},
{
"epoch": 3.76,
"learning_rate": 1.4350342971790979e-05,
"loss": 2.2168,
"step": 613
},
{
"epoch": 3.77,
"learning_rate": 1.4332442341447926e-05,
"loss": 2.3828,
"step": 614
},
{
"epoch": 3.77,
"learning_rate": 1.4314524604309748e-05,
"loss": 2.5117,
"step": 615
},
{
"epoch": 3.78,
"learning_rate": 1.4296589831125234e-05,
"loss": 2.4961,
"step": 616
},
{
"epoch": 3.79,
"learning_rate": 1.4278638092710446e-05,
"loss": 2.5391,
"step": 617
},
{
"epoch": 3.79,
"learning_rate": 1.4260669459948429e-05,
"loss": 2.3828,
"step": 618
},
{
"epoch": 3.8,
"learning_rate": 1.4242684003788934e-05,
"loss": 2.4102,
"step": 619
},
{
"epoch": 3.8,
"learning_rate": 1.4224681795248149e-05,
"loss": 2.457,
"step": 620
},
{
"epoch": 3.81,
"learning_rate": 1.42066629054084e-05,
"loss": 2.5,
"step": 621
},
{
"epoch": 3.82,
"learning_rate": 1.418862740541788e-05,
"loss": 2.4102,
"step": 622
},
{
"epoch": 3.82,
"learning_rate": 1.4170575366490376e-05,
"loss": 2.1758,
"step": 623
},
{
"epoch": 3.83,
"learning_rate": 1.415250685990497e-05,
"loss": 2.6445,
"step": 624
},
{
"epoch": 3.83,
"learning_rate": 1.4134421957005775e-05,
"loss": 2.043,
"step": 625
},
{
"epoch": 3.84,
"learning_rate": 1.4116320729201642e-05,
"loss": 2.457,
"step": 626
},
{
"epoch": 3.85,
"learning_rate": 1.4098203247965876e-05,
"loss": 2.1992,
"step": 627
},
{
"epoch": 3.85,
"learning_rate": 1.4080069584835971e-05,
"loss": 2.2891,
"step": 628
},
{
"epoch": 3.86,
"learning_rate": 1.4061919811413305e-05,
"loss": 2.2227,
"step": 629
},
{
"epoch": 3.87,
"learning_rate": 1.4043753999362872e-05,
"loss": 2.2305,
"step": 630
},
{
"epoch": 3.87,
"learning_rate": 1.4025572220412998e-05,
"loss": 2.625,
"step": 631
},
{
"epoch": 3.88,
"learning_rate": 1.400737454635505e-05,
"loss": 2.4219,
"step": 632
},
{
"epoch": 3.88,
"learning_rate": 1.398916104904316e-05,
"loss": 2.6133,
"step": 633
},
{
"epoch": 3.89,
"learning_rate": 1.3970931800393943e-05,
"loss": 2.5625,
"step": 634
},
{
"epoch": 3.9,
"learning_rate": 1.3952686872386195e-05,
"loss": 2.4531,
"step": 635
},
{
"epoch": 3.9,
"learning_rate": 1.3934426337060638e-05,
"loss": 2.6016,
"step": 636
},
{
"epoch": 3.91,
"learning_rate": 1.391615026651961e-05,
"loss": 2.3789,
"step": 637
},
{
"epoch": 3.91,
"learning_rate": 1.3897858732926794e-05,
"loss": 2.3281,
"step": 638
},
{
"epoch": 3.92,
"learning_rate": 1.3879551808506932e-05,
"loss": 2.2031,
"step": 639
},
{
"epoch": 3.93,
"learning_rate": 1.3861229565545532e-05,
"loss": 2.5352,
"step": 640
},
{
"epoch": 3.93,
"learning_rate": 1.384289207638859e-05,
"loss": 2.3008,
"step": 641
},
{
"epoch": 3.94,
"learning_rate": 1.3824539413442304e-05,
"loss": 2.5352,
"step": 642
},
{
"epoch": 3.94,
"learning_rate": 1.3806171649172782e-05,
"loss": 2.4922,
"step": 643
},
{
"epoch": 3.95,
"learning_rate": 1.3787788856105762e-05,
"loss": 2.3945,
"step": 644
},
{
"epoch": 3.96,
"learning_rate": 1.3769391106826326e-05,
"loss": 2.6016,
"step": 645
},
{
"epoch": 3.96,
"learning_rate": 1.3750978473978611e-05,
"loss": 2.4375,
"step": 646
},
{
"epoch": 3.97,
"learning_rate": 1.3732551030265514e-05,
"loss": 2.5195,
"step": 647
},
{
"epoch": 3.98,
"learning_rate": 1.371410884844843e-05,
"loss": 2.5391,
"step": 648
},
{
"epoch": 3.98,
"learning_rate": 1.3695652001346928e-05,
"loss": 2.4102,
"step": 649
},
{
"epoch": 3.99,
"learning_rate": 1.3677180561838501e-05,
"loss": 2.4727,
"step": 650
},
{
"epoch": 3.99,
"learning_rate": 1.3658694602858247e-05,
"loss": 2.6055,
"step": 651
},
{
"epoch": 4.0,
"learning_rate": 1.36401941973986e-05,
"loss": 2.2852,
"step": 652
},
{
"epoch": 4.01,
"learning_rate": 1.362167941850904e-05,
"loss": 1.9121,
"step": 653
},
{
"epoch": 4.01,
"learning_rate": 1.3603150339295797e-05,
"loss": 2.0977,
"step": 654
},
{
"epoch": 4.02,
"learning_rate": 1.3584607032921566e-05,
"loss": 1.9668,
"step": 655
},
{
"epoch": 4.02,
"learning_rate": 1.3566049572605222e-05,
"loss": 1.8398,
"step": 656
},
{
"epoch": 4.03,
"learning_rate": 1.3547478031621517e-05,
"loss": 1.7559,
"step": 657
},
{
"epoch": 4.04,
"learning_rate": 1.3528892483300821e-05,
"loss": 2.0586,
"step": 658
},
{
"epoch": 4.04,
"learning_rate": 1.3510293001028792e-05,
"loss": 1.8984,
"step": 659
},
{
"epoch": 4.05,
"learning_rate": 1.3491679658246114e-05,
"loss": 1.6895,
"step": 660
},
{
"epoch": 4.06,
"learning_rate": 1.3473052528448203e-05,
"loss": 1.7812,
"step": 661
},
{
"epoch": 4.06,
"learning_rate": 1.3454411685184913e-05,
"loss": 1.7539,
"step": 662
},
{
"epoch": 4.07,
"learning_rate": 1.3435757202060242e-05,
"loss": 1.9492,
"step": 663
},
{
"epoch": 4.07,
"learning_rate": 1.3417089152732049e-05,
"loss": 1.7031,
"step": 664
},
{
"epoch": 4.08,
"learning_rate": 1.3398407610911752e-05,
"loss": 1.791,
"step": 665
},
{
"epoch": 4.09,
"learning_rate": 1.3379712650364061e-05,
"loss": 1.8066,
"step": 666
},
{
"epoch": 4.09,
"learning_rate": 1.3361004344906652e-05,
"loss": 1.6992,
"step": 667
},
{
"epoch": 4.1,
"learning_rate": 1.3342282768409904e-05,
"loss": 1.8965,
"step": 668
},
{
"epoch": 4.1,
"learning_rate": 1.3323547994796597e-05,
"loss": 1.7832,
"step": 669
},
{
"epoch": 4.11,
"learning_rate": 1.330480009804162e-05,
"loss": 1.8633,
"step": 670
},
{
"epoch": 4.12,
"learning_rate": 1.3286039152171667e-05,
"loss": 1.6055,
"step": 671
},
{
"epoch": 4.12,
"learning_rate": 1.3267265231264982e-05,
"loss": 1.8164,
"step": 672
},
{
"epoch": 4.13,
"learning_rate": 1.3248478409451017e-05,
"loss": 1.9805,
"step": 673
},
{
"epoch": 4.13,
"learning_rate": 1.3229678760910174e-05,
"loss": 1.666,
"step": 674
},
{
"epoch": 4.14,
"learning_rate": 1.3210866359873506e-05,
"loss": 1.8867,
"step": 675
},
{
"epoch": 4.15,
"learning_rate": 1.3192041280622409e-05,
"loss": 1.9473,
"step": 676
},
{
"epoch": 4.15,
"learning_rate": 1.3173203597488348e-05,
"loss": 1.9375,
"step": 677
},
{
"epoch": 4.16,
"learning_rate": 1.3154353384852559e-05,
"loss": 1.8145,
"step": 678
},
{
"epoch": 4.17,
"learning_rate": 1.3135490717145726e-05,
"loss": 1.7539,
"step": 679
},
{
"epoch": 4.17,
"learning_rate": 1.3116615668847749e-05,
"loss": 1.7734,
"step": 680
},
{
"epoch": 4.18,
"learning_rate": 1.3097728314487385e-05,
"loss": 1.7656,
"step": 681
},
{
"epoch": 4.18,
"learning_rate": 1.3078828728641994e-05,
"loss": 1.8672,
"step": 682
},
{
"epoch": 4.19,
"learning_rate": 1.305991698593723e-05,
"loss": 1.7656,
"step": 683
},
{
"epoch": 4.2,
"learning_rate": 1.3040993161046749e-05,
"loss": 1.8789,
"step": 684
},
{
"epoch": 4.2,
"learning_rate": 1.3022057328691915e-05,
"loss": 1.627,
"step": 685
},
{
"epoch": 4.21,
"learning_rate": 1.3003109563641499e-05,
"loss": 1.7695,
"step": 686
},
{
"epoch": 4.21,
"learning_rate": 1.298414994071139e-05,
"loss": 1.709,
"step": 687
},
{
"epoch": 4.22,
"learning_rate": 1.2965178534764311e-05,
"loss": 1.7383,
"step": 688
},
{
"epoch": 4.23,
"learning_rate": 1.294619542070949e-05,
"loss": 1.6523,
"step": 689
},
{
"epoch": 4.23,
"learning_rate": 1.2927200673502399e-05,
"loss": 1.8145,
"step": 690
},
{
"epoch": 4.24,
"learning_rate": 1.2908194368144437e-05,
"loss": 1.7949,
"step": 691
},
{
"epoch": 4.25,
"learning_rate": 1.288917657968265e-05,
"loss": 1.7422,
"step": 692
},
{
"epoch": 4.25,
"learning_rate": 1.287014738320941e-05,
"loss": 1.9102,
"step": 693
},
{
"epoch": 4.26,
"learning_rate": 1.285110685386215e-05,
"loss": 1.6523,
"step": 694
},
{
"epoch": 4.26,
"learning_rate": 1.283205506682304e-05,
"loss": 1.5938,
"step": 695
},
{
"epoch": 4.27,
"learning_rate": 1.2812992097318711e-05,
"loss": 1.6797,
"step": 696
},
{
"epoch": 4.28,
"learning_rate": 1.2793918020619937e-05,
"loss": 1.8164,
"step": 697
},
{
"epoch": 4.28,
"learning_rate": 1.2774832912041356e-05,
"loss": 1.6328,
"step": 698
},
{
"epoch": 4.29,
"learning_rate": 1.2755736846941167e-05,
"loss": 1.9219,
"step": 699
},
{
"epoch": 4.29,
"learning_rate": 1.2736629900720832e-05,
"loss": 1.8496,
"step": 700
},
{
"epoch": 4.3,
"learning_rate": 1.2717512148824764e-05,
"loss": 1.7031,
"step": 701
},
{
"epoch": 4.31,
"learning_rate": 1.2698383666740064e-05,
"loss": 1.7266,
"step": 702
},
{
"epoch": 4.31,
"learning_rate": 1.2679244529996182e-05,
"loss": 1.9102,
"step": 703
},
{
"epoch": 4.32,
"learning_rate": 1.2660094814164653e-05,
"loss": 1.6855,
"step": 704
},
{
"epoch": 4.33,
"learning_rate": 1.2640934594858773e-05,
"loss": 1.6641,
"step": 705
},
{
"epoch": 4.33,
"learning_rate": 1.262176394773332e-05,
"loss": 1.8672,
"step": 706
},
{
"epoch": 4.34,
"learning_rate": 1.2602582948484243e-05,
"loss": 1.7383,
"step": 707
},
{
"epoch": 4.34,
"learning_rate": 1.2583391672848361e-05,
"loss": 2.0586,
"step": 708
},
{
"epoch": 4.35,
"learning_rate": 1.256419019660308e-05,
"loss": 1.8281,
"step": 709
},
{
"epoch": 4.36,
"learning_rate": 1.2544978595566078e-05,
"loss": 1.7207,
"step": 710
},
{
"epoch": 4.36,
"learning_rate": 1.2525756945595006e-05,
"loss": 1.6328,
"step": 711
},
{
"epoch": 4.37,
"learning_rate": 1.2506525322587207e-05,
"loss": 1.8379,
"step": 712
},
{
"epoch": 4.37,
"learning_rate": 1.2487283802479389e-05,
"loss": 1.8828,
"step": 713
},
{
"epoch": 4.38,
"learning_rate": 1.246803246124735e-05,
"loss": 1.916,
"step": 714
},
{
"epoch": 4.39,
"learning_rate": 1.2448771374905655e-05,
"loss": 1.7852,
"step": 715
},
{
"epoch": 4.39,
"learning_rate": 1.2429500619507362e-05,
"loss": 2.0391,
"step": 716
},
{
"epoch": 4.4,
"learning_rate": 1.2410220271143693e-05,
"loss": 1.7422,
"step": 717
},
{
"epoch": 4.4,
"learning_rate": 1.2390930405943766e-05,
"loss": 1.8672,
"step": 718
},
{
"epoch": 4.41,
"learning_rate": 1.237163110007426e-05,
"loss": 1.8457,
"step": 719
},
{
"epoch": 4.42,
"learning_rate": 1.2352322429739134e-05,
"loss": 1.7402,
"step": 720
},
{
"epoch": 4.42,
"learning_rate": 1.233300447117933e-05,
"loss": 1.6465,
"step": 721
},
{
"epoch": 4.43,
"learning_rate": 1.2313677300672463e-05,
"loss": 1.6777,
"step": 722
},
{
"epoch": 4.44,
"learning_rate": 1.2294340994532511e-05,
"loss": 1.7656,
"step": 723
},
{
"epoch": 4.44,
"learning_rate": 1.2274995629109545e-05,
"loss": 1.8066,
"step": 724
},
{
"epoch": 4.45,
"learning_rate": 1.2255641280789385e-05,
"loss": 1.8809,
"step": 725
},
{
"epoch": 4.45,
"learning_rate": 1.2236278025993334e-05,
"loss": 1.8223,
"step": 726
},
{
"epoch": 4.46,
"learning_rate": 1.2216905941177854e-05,
"loss": 1.7656,
"step": 727
},
{
"epoch": 4.47,
"learning_rate": 1.2197525102834284e-05,
"loss": 1.8066,
"step": 728
},
{
"epoch": 4.47,
"learning_rate": 1.2178135587488515e-05,
"loss": 1.7207,
"step": 729
},
{
"epoch": 4.48,
"learning_rate": 1.215873747170071e-05,
"loss": 1.8535,
"step": 730
},
{
"epoch": 4.48,
"learning_rate": 1.2139330832064975e-05,
"loss": 1.7949,
"step": 731
},
{
"epoch": 4.49,
"learning_rate": 1.2119915745209092e-05,
"loss": 1.8926,
"step": 732
},
{
"epoch": 4.5,
"learning_rate": 1.2100492287794186e-05,
"loss": 1.6777,
"step": 733
},
{
"epoch": 4.5,
"learning_rate": 1.2081060536514432e-05,
"loss": 1.7773,
"step": 734
},
{
"epoch": 4.51,
"learning_rate": 1.206162056809676e-05,
"loss": 1.6699,
"step": 735
},
{
"epoch": 4.52,
"learning_rate": 1.2042172459300546e-05,
"loss": 1.709,
"step": 736
},
{
"epoch": 4.52,
"learning_rate": 1.2022716286917298e-05,
"loss": 1.8887,
"step": 737
},
{
"epoch": 4.53,
"learning_rate": 1.2003252127770378e-05,
"loss": 1.9219,
"step": 738
},
{
"epoch": 4.53,
"learning_rate": 1.198378005871467e-05,
"loss": 1.8535,
"step": 739
},
{
"epoch": 4.54,
"learning_rate": 1.1964300156636304e-05,
"loss": 1.7051,
"step": 740
},
{
"epoch": 4.55,
"learning_rate": 1.1944812498452329e-05,
"loss": 1.7578,
"step": 741
},
{
"epoch": 4.55,
"learning_rate": 1.192531716111042e-05,
"loss": 1.8203,
"step": 742
},
{
"epoch": 4.56,
"learning_rate": 1.1905814221588581e-05,
"loss": 1.6016,
"step": 743
},
{
"epoch": 4.56,
"learning_rate": 1.1886303756894828e-05,
"loss": 1.543,
"step": 744
},
{
"epoch": 4.57,
"learning_rate": 1.1866785844066884e-05,
"loss": 1.8145,
"step": 745
},
{
"epoch": 4.58,
"learning_rate": 1.1847260560171895e-05,
"loss": 1.6719,
"step": 746
},
{
"epoch": 4.58,
"learning_rate": 1.18277279823061e-05,
"loss": 1.6953,
"step": 747
},
{
"epoch": 4.59,
"learning_rate": 1.1808188187594549e-05,
"loss": 1.6406,
"step": 748
},
{
"epoch": 4.6,
"learning_rate": 1.1788641253190779e-05,
"loss": 1.7246,
"step": 749
},
{
"epoch": 4.6,
"learning_rate": 1.176908725627652e-05,
"loss": 1.6992,
"step": 750
},
{
"epoch": 4.61,
"learning_rate": 1.1749526274061394e-05,
"loss": 1.916,
"step": 751
},
{
"epoch": 4.61,
"learning_rate": 1.1729958383782598e-05,
"loss": 1.6543,
"step": 752
},
{
"epoch": 4.62,
"learning_rate": 1.1710383662704608e-05,
"loss": 1.707,
"step": 753
},
{
"epoch": 4.63,
"learning_rate": 1.1690802188118878e-05,
"loss": 1.6953,
"step": 754
},
{
"epoch": 4.63,
"learning_rate": 1.1671214037343515e-05,
"loss": 1.6875,
"step": 755
},
{
"epoch": 4.64,
"learning_rate": 1.1651619287723e-05,
"loss": 1.7969,
"step": 756
},
{
"epoch": 4.64,
"learning_rate": 1.1632018016627859e-05,
"loss": 1.7461,
"step": 757
},
{
"epoch": 4.65,
"learning_rate": 1.1612410301454384e-05,
"loss": 1.8887,
"step": 758
},
{
"epoch": 4.66,
"learning_rate": 1.1592796219624292e-05,
"loss": 1.9414,
"step": 759
},
{
"epoch": 4.66,
"learning_rate": 1.1573175848584455e-05,
"loss": 1.8711,
"step": 760
},
{
"epoch": 4.67,
"learning_rate": 1.1553549265806567e-05,
"loss": 1.7246,
"step": 761
},
{
"epoch": 4.67,
"learning_rate": 1.1533916548786856e-05,
"loss": 1.8496,
"step": 762
},
{
"epoch": 4.68,
"learning_rate": 1.1514277775045768e-05,
"loss": 1.918,
"step": 763
},
{
"epoch": 4.69,
"learning_rate": 1.1494633022127669e-05,
"loss": 1.8574,
"step": 764
},
{
"epoch": 4.69,
"learning_rate": 1.1474982367600524e-05,
"loss": 1.668,
"step": 765
},
{
"epoch": 4.7,
"learning_rate": 1.1455325889055616e-05,
"loss": 1.7031,
"step": 766
},
{
"epoch": 4.71,
"learning_rate": 1.1435663664107204e-05,
"loss": 1.7754,
"step": 767
},
{
"epoch": 4.71,
"learning_rate": 1.141599577039226e-05,
"loss": 1.7129,
"step": 768
},
{
"epoch": 4.72,
"learning_rate": 1.1396322285570119e-05,
"loss": 1.6582,
"step": 769
},
{
"epoch": 4.72,
"learning_rate": 1.1376643287322202e-05,
"loss": 1.8672,
"step": 770
},
{
"epoch": 4.73,
"learning_rate": 1.1356958853351705e-05,
"loss": 1.8867,
"step": 771
},
{
"epoch": 4.74,
"learning_rate": 1.1337269061383278e-05,
"loss": 1.8359,
"step": 772
},
{
"epoch": 4.74,
"learning_rate": 1.1317573989162727e-05,
"loss": 1.8535,
"step": 773
},
{
"epoch": 4.75,
"learning_rate": 1.129787371445672e-05,
"loss": 1.7793,
"step": 774
},
{
"epoch": 4.75,
"learning_rate": 1.1278168315052445e-05,
"loss": 1.834,
"step": 775
},
{
"epoch": 4.76,
"learning_rate": 1.1258457868757352e-05,
"loss": 1.8906,
"step": 776
},
{
"epoch": 4.77,
"learning_rate": 1.1238742453398794e-05,
"loss": 1.9512,
"step": 777
},
{
"epoch": 4.77,
"learning_rate": 1.1219022146823762e-05,
"loss": 1.8047,
"step": 778
},
{
"epoch": 4.78,
"learning_rate": 1.1199297026898547e-05,
"loss": 1.627,
"step": 779
},
{
"epoch": 4.79,
"learning_rate": 1.1179567171508463e-05,
"loss": 1.8242,
"step": 780
},
{
"epoch": 4.79,
"learning_rate": 1.1159832658557498e-05,
"loss": 1.7129,
"step": 781
},
{
"epoch": 4.8,
"learning_rate": 1.1140093565968055e-05,
"loss": 1.7012,
"step": 782
},
{
"epoch": 4.8,
"learning_rate": 1.1120349971680605e-05,
"loss": 1.8145,
"step": 783
},
{
"epoch": 4.81,
"learning_rate": 1.1100601953653393e-05,
"loss": 1.6426,
"step": 784
},
{
"epoch": 4.82,
"learning_rate": 1.1080849589862142e-05,
"loss": 1.8574,
"step": 785
},
{
"epoch": 4.82,
"learning_rate": 1.1061092958299727e-05,
"loss": 1.752,
"step": 786
},
{
"epoch": 4.83,
"learning_rate": 1.1041332136975874e-05,
"loss": 1.9531,
"step": 787
},
{
"epoch": 4.83,
"learning_rate": 1.1021567203916861e-05,
"loss": 1.7676,
"step": 788
},
{
"epoch": 4.84,
"learning_rate": 1.1001798237165185e-05,
"loss": 1.7656,
"step": 789
},
{
"epoch": 4.85,
"learning_rate": 1.0982025314779287e-05,
"loss": 1.9512,
"step": 790
},
{
"epoch": 4.85,
"learning_rate": 1.0962248514833218e-05,
"loss": 1.791,
"step": 791
},
{
"epoch": 4.86,
"learning_rate": 1.0942467915416342e-05,
"loss": 1.8398,
"step": 792
},
{
"epoch": 4.87,
"learning_rate": 1.092268359463302e-05,
"loss": 1.6797,
"step": 793
},
{
"epoch": 4.87,
"learning_rate": 1.090289563060232e-05,
"loss": 1.7871,
"step": 794
},
{
"epoch": 4.88,
"learning_rate": 1.088310410145768e-05,
"loss": 1.6738,
"step": 795
},
{
"epoch": 4.88,
"learning_rate": 1.086330908534663e-05,
"loss": 1.8711,
"step": 796
},
{
"epoch": 4.89,
"learning_rate": 1.0843510660430447e-05,
"loss": 1.752,
"step": 797
},
{
"epoch": 4.9,
"learning_rate": 1.0823708904883898e-05,
"loss": 1.9297,
"step": 798
},
{
"epoch": 4.9,
"learning_rate": 1.0803903896894877e-05,
"loss": 1.9141,
"step": 799
},
{
"epoch": 4.91,
"learning_rate": 1.0784095714664124e-05,
"loss": 1.7188,
"step": 800
},
{
"epoch": 4.91,
"learning_rate": 1.0764284436404924e-05,
"loss": 1.7441,
"step": 801
},
{
"epoch": 4.92,
"learning_rate": 1.0744470140342775e-05,
"loss": 1.7266,
"step": 802
},
{
"epoch": 4.93,
"learning_rate": 1.0724652904715091e-05,
"loss": 1.832,
"step": 803
},
{
"epoch": 4.93,
"learning_rate": 1.0704832807770909e-05,
"loss": 1.6152,
"step": 804
},
{
"epoch": 4.94,
"learning_rate": 1.0685009927770542e-05,
"loss": 1.8281,
"step": 805
},
{
"epoch": 4.94,
"learning_rate": 1.0665184342985306e-05,
"loss": 1.7812,
"step": 806
},
{
"epoch": 4.95,
"learning_rate": 1.064535613169719e-05,
"loss": 1.875,
"step": 807
},
{
"epoch": 4.96,
"learning_rate": 1.0625525372198564e-05,
"loss": 1.748,
"step": 808
},
{
"epoch": 4.96,
"learning_rate": 1.0605692142791846e-05,
"loss": 1.7148,
"step": 809
},
{
"epoch": 4.97,
"learning_rate": 1.0585856521789215e-05,
"loss": 1.7715,
"step": 810
},
{
"epoch": 4.98,
"learning_rate": 1.056601858751229e-05,
"loss": 1.7676,
"step": 811
},
{
"epoch": 4.98,
"learning_rate": 1.0546178418291833e-05,
"loss": 1.7852,
"step": 812
},
{
"epoch": 4.99,
"learning_rate": 1.0526336092467414e-05,
"loss": 1.9141,
"step": 813
},
{
"epoch": 4.99,
"learning_rate": 1.0506491688387128e-05,
"loss": 1.6602,
"step": 814
},
{
"epoch": 5.0,
"learning_rate": 1.0486645284407282e-05,
"loss": 1.75,
"step": 815
},
{
"epoch": 5.01,
"learning_rate": 1.0466796958892071e-05,
"loss": 1.5469,
"step": 816
},
{
"epoch": 5.01,
"learning_rate": 1.0446946790213275e-05,
"loss": 1.2852,
"step": 817
},
{
"epoch": 5.02,
"learning_rate": 1.0427094856749966e-05,
"loss": 1.3926,
"step": 818
},
{
"epoch": 5.02,
"learning_rate": 1.0407241236888164e-05,
"loss": 1.293,
"step": 819
},
{
"epoch": 5.03,
"learning_rate": 1.0387386009020569e-05,
"loss": 1.2559,
"step": 820
},
{
"epoch": 5.04,
"learning_rate": 1.0367529251546208e-05,
"loss": 1.3379,
"step": 821
},
{
"epoch": 5.04,
"learning_rate": 1.034767104287017e-05,
"loss": 1.3047,
"step": 822
},
{
"epoch": 5.05,
"learning_rate": 1.032781146140326e-05,
"loss": 1.3105,
"step": 823
},
{
"epoch": 5.06,
"learning_rate": 1.0307950585561705e-05,
"loss": 1.3203,
"step": 824
},
{
"epoch": 5.06,
"learning_rate": 1.0288088493766846e-05,
"loss": 1.2461,
"step": 825
},
{
"epoch": 5.07,
"learning_rate": 1.0268225264444829e-05,
"loss": 1.3281,
"step": 826
},
{
"epoch": 5.07,
"learning_rate": 1.0248360976026279e-05,
"loss": 1.1758,
"step": 827
},
{
"epoch": 5.08,
"learning_rate": 1.0228495706946015e-05,
"loss": 1.1465,
"step": 828
},
{
"epoch": 5.09,
"learning_rate": 1.0208629535642726e-05,
"loss": 1.1836,
"step": 829
},
{
"epoch": 5.09,
"learning_rate": 1.0188762540558657e-05,
"loss": 1.1504,
"step": 830
},
{
"epoch": 5.1,
"learning_rate": 1.0168894800139311e-05,
"loss": 1.1641,
"step": 831
},
{
"epoch": 5.1,
"learning_rate": 1.0149026392833137e-05,
"loss": 1.1504,
"step": 832
},
{
"epoch": 5.11,
"learning_rate": 1.0129157397091208e-05,
"loss": 1.2832,
"step": 833
},
{
"epoch": 5.12,
"learning_rate": 1.010928789136693e-05,
"loss": 1.25,
"step": 834
},
{
"epoch": 5.12,
"learning_rate": 1.0089417954115715e-05,
"loss": 1.2207,
"step": 835
},
{
"epoch": 5.13,
"learning_rate": 1.0069547663794682e-05,
"loss": 1.1855,
"step": 836
},
{
"epoch": 5.13,
"learning_rate": 1.0049677098862347e-05,
"loss": 1.1289,
"step": 837
},
{
"epoch": 5.14,
"learning_rate": 1.002980633777831e-05,
"loss": 1.1562,
"step": 838
},
{
"epoch": 5.15,
"learning_rate": 1.0009935459002935e-05,
"loss": 1.3242,
"step": 839
},
{
"epoch": 5.15,
"learning_rate": 9.990064540997066e-06,
"loss": 1.3105,
"step": 840
},
{
"epoch": 5.16,
"learning_rate": 9.970193662221694e-06,
"loss": 1.3145,
"step": 841
},
{
"epoch": 5.17,
"learning_rate": 9.950322901137655e-06,
"loss": 1.2441,
"step": 842
},
{
"epoch": 5.17,
"learning_rate": 9.93045233620532e-06,
"loss": 1.3262,
"step": 843
},
{
"epoch": 5.18,
"learning_rate": 9.910582045884292e-06,
"loss": 1.2656,
"step": 844
},
{
"epoch": 5.18,
"learning_rate": 9.890712108633076e-06,
"loss": 1.3633,
"step": 845
},
{
"epoch": 5.19,
"learning_rate": 9.870842602908794e-06,
"loss": 1.2734,
"step": 846
},
{
"epoch": 5.2,
"learning_rate": 9.850973607166865e-06,
"loss": 1.2656,
"step": 847
},
{
"epoch": 5.2,
"learning_rate": 9.83110519986069e-06,
"loss": 1.2949,
"step": 848
},
{
"epoch": 5.21,
"learning_rate": 9.811237459441346e-06,
"loss": 1.2227,
"step": 849
},
{
"epoch": 5.21,
"learning_rate": 9.791370464357279e-06,
"loss": 1.2793,
"step": 850
},
{
"epoch": 5.22,
"learning_rate": 9.771504293053985e-06,
"loss": 1.3633,
"step": 851
},
{
"epoch": 5.23,
"learning_rate": 9.751639023973724e-06,
"loss": 1.207,
"step": 852
},
{
"epoch": 5.23,
"learning_rate": 9.731774735555174e-06,
"loss": 1.252,
"step": 853
},
{
"epoch": 5.24,
"learning_rate": 9.711911506233157e-06,
"loss": 1.1992,
"step": 854
},
{
"epoch": 5.25,
"learning_rate": 9.692049414438298e-06,
"loss": 1.3516,
"step": 855
},
{
"epoch": 5.25,
"learning_rate": 9.672188538596746e-06,
"loss": 1.3574,
"step": 856
},
{
"epoch": 5.26,
"learning_rate": 9.652328957129831e-06,
"loss": 1.4062,
"step": 857
},
{
"epoch": 5.26,
"learning_rate": 9.632470748453794e-06,
"loss": 1.3223,
"step": 858
},
{
"epoch": 5.27,
"learning_rate": 9.612613990979436e-06,
"loss": 1.2207,
"step": 859
},
{
"epoch": 5.28,
"learning_rate": 9.59275876311184e-06,
"loss": 1.2441,
"step": 860
},
{
"epoch": 5.28,
"learning_rate": 9.572905143250039e-06,
"loss": 1.0586,
"step": 861
},
{
"epoch": 5.29,
"learning_rate": 9.553053209786725e-06,
"loss": 1.2148,
"step": 862
},
{
"epoch": 5.29,
"learning_rate": 9.53320304110793e-06,
"loss": 1.2402,
"step": 863
},
{
"epoch": 5.3,
"learning_rate": 9.513354715592721e-06,
"loss": 1.1338,
"step": 864
},
{
"epoch": 5.31,
"learning_rate": 9.493508311612874e-06,
"loss": 1.332,
"step": 865
},
{
"epoch": 5.31,
"learning_rate": 9.473663907532593e-06,
"loss": 1.2715,
"step": 866
},
{
"epoch": 5.32,
"learning_rate": 9.453821581708174e-06,
"loss": 1.2793,
"step": 867
},
{
"epoch": 5.33,
"learning_rate": 9.433981412487711e-06,
"loss": 1.2969,
"step": 868
},
{
"epoch": 5.33,
"learning_rate": 9.414143478210786e-06,
"loss": 1.1074,
"step": 869
},
{
"epoch": 5.34,
"learning_rate": 9.394307857208158e-06,
"loss": 1.1924,
"step": 870
},
{
"epoch": 5.34,
"learning_rate": 9.374474627801439e-06,
"loss": 1.2188,
"step": 871
},
{
"epoch": 5.35,
"learning_rate": 9.354643868302813e-06,
"loss": 1.2246,
"step": 872
},
{
"epoch": 5.36,
"learning_rate": 9.334815657014696e-06,
"loss": 1.2109,
"step": 873
},
{
"epoch": 5.36,
"learning_rate": 9.314990072229461e-06,
"loss": 1.2832,
"step": 874
},
{
"epoch": 5.37,
"learning_rate": 9.295167192229093e-06,
"loss": 1.2666,
"step": 875
},
{
"epoch": 5.37,
"learning_rate": 9.27534709528491e-06,
"loss": 1.3066,
"step": 876
},
{
"epoch": 5.38,
"learning_rate": 9.25552985965723e-06,
"loss": 1.5352,
"step": 877
},
{
"epoch": 5.39,
"learning_rate": 9.235715563595082e-06,
"loss": 1.2305,
"step": 878
},
{
"epoch": 5.39,
"learning_rate": 9.215904285335876e-06,
"loss": 1.1113,
"step": 879
},
{
"epoch": 5.4,
"learning_rate": 9.196096103105127e-06,
"loss": 1.2285,
"step": 880
},
{
"epoch": 5.4,
"learning_rate": 9.176291095116104e-06,
"loss": 1.2871,
"step": 881
},
{
"epoch": 5.41,
"learning_rate": 9.156489339569555e-06,
"loss": 1.2539,
"step": 882
},
{
"epoch": 5.42,
"learning_rate": 9.136690914653377e-06,
"loss": 1.2666,
"step": 883
},
{
"epoch": 5.42,
"learning_rate": 9.11689589854232e-06,
"loss": 1.2539,
"step": 884
},
{
"epoch": 5.43,
"learning_rate": 9.097104369397681e-06,
"loss": 1.1562,
"step": 885
},
{
"epoch": 5.44,
"learning_rate": 9.07731640536698e-06,
"loss": 1.2148,
"step": 886
},
{
"epoch": 5.44,
"learning_rate": 9.057532084583662e-06,
"loss": 1.3848,
"step": 887
},
{
"epoch": 5.45,
"learning_rate": 9.037751485166785e-06,
"loss": 1.2832,
"step": 888
},
{
"epoch": 5.45,
"learning_rate": 9.017974685220716e-06,
"loss": 1.2832,
"step": 889
},
{
"epoch": 5.46,
"learning_rate": 8.998201762834815e-06,
"loss": 1.3906,
"step": 890
},
{
"epoch": 5.47,
"learning_rate": 8.97843279608314e-06,
"loss": 1.2539,
"step": 891
},
{
"epoch": 5.47,
"learning_rate": 8.958667863024127e-06,
"loss": 1.168,
"step": 892
},
{
"epoch": 5.48,
"learning_rate": 8.938907041700275e-06,
"loss": 1.3086,
"step": 893
},
{
"epoch": 5.48,
"learning_rate": 8.919150410137862e-06,
"loss": 1.2656,
"step": 894
},
{
"epoch": 5.49,
"learning_rate": 8.899398046346608e-06,
"loss": 1.209,
"step": 895
},
{
"epoch": 5.5,
"learning_rate": 8.8796500283194e-06,
"loss": 1.2852,
"step": 896
},
{
"epoch": 5.5,
"learning_rate": 8.859906434031947e-06,
"loss": 1.1504,
"step": 897
},
{
"epoch": 5.51,
"learning_rate": 8.840167341442505e-06,
"loss": 1.0957,
"step": 898
},
{
"epoch": 5.52,
"learning_rate": 8.820432828491542e-06,
"loss": 1.2148,
"step": 899
},
{
"epoch": 5.52,
"learning_rate": 8.800702973101454e-06,
"loss": 1.2832,
"step": 900
},
{
"epoch": 5.53,
"learning_rate": 8.78097785317624e-06,
"loss": 1.252,
"step": 901
},
{
"epoch": 5.53,
"learning_rate": 8.761257546601209e-06,
"loss": 1.3633,
"step": 902
},
{
"epoch": 5.54,
"learning_rate": 8.741542131242652e-06,
"loss": 1.2246,
"step": 903
},
{
"epoch": 5.55,
"learning_rate": 8.721831684947557e-06,
"loss": 1.2148,
"step": 904
},
{
"epoch": 5.55,
"learning_rate": 8.702126285543286e-06,
"loss": 1.127,
"step": 905
},
{
"epoch": 5.56,
"learning_rate": 8.682426010837274e-06,
"loss": 1.25,
"step": 906
},
{
"epoch": 5.56,
"learning_rate": 8.662730938616724e-06,
"loss": 1.2031,
"step": 907
},
{
"epoch": 5.57,
"learning_rate": 8.643041146648299e-06,
"loss": 1.2246,
"step": 908
},
{
"epoch": 5.58,
"learning_rate": 8.6233567126778e-06,
"loss": 1.3438,
"step": 909
},
{
"epoch": 5.58,
"learning_rate": 8.603677714429888e-06,
"loss": 1.2852,
"step": 910
},
{
"epoch": 5.59,
"learning_rate": 8.584004229607747e-06,
"loss": 1.418,
"step": 911
},
{
"epoch": 5.6,
"learning_rate": 8.564336335892798e-06,
"loss": 1.3105,
"step": 912
},
{
"epoch": 5.6,
"learning_rate": 8.54467411094439e-06,
"loss": 1.2422,
"step": 913
},
{
"epoch": 5.61,
"learning_rate": 8.52501763239948e-06,
"loss": 1.2373,
"step": 914
},
{
"epoch": 5.61,
"learning_rate": 8.505366977872336e-06,
"loss": 1.2637,
"step": 915
},
{
"epoch": 5.62,
"learning_rate": 8.485722224954237e-06,
"loss": 1.3906,
"step": 916
},
{
"epoch": 5.63,
"learning_rate": 8.466083451213145e-06,
"loss": 1.1748,
"step": 917
},
{
"epoch": 5.63,
"learning_rate": 8.446450734193437e-06,
"loss": 1.2949,
"step": 918
},
{
"epoch": 5.64,
"learning_rate": 8.426824151415548e-06,
"loss": 1.125,
"step": 919
},
{
"epoch": 5.64,
"learning_rate": 8.407203780375711e-06,
"loss": 1.2539,
"step": 920
},
{
"epoch": 5.65,
"learning_rate": 8.38758969854562e-06,
"loss": 1.2305,
"step": 921
},
{
"epoch": 5.66,
"learning_rate": 8.367981983372143e-06,
"loss": 1.1523,
"step": 922
},
{
"epoch": 5.66,
"learning_rate": 8.348380712277002e-06,
"loss": 1.2285,
"step": 923
},
{
"epoch": 5.67,
"learning_rate": 8.32878596265649e-06,
"loss": 1.3281,
"step": 924
},
{
"epoch": 5.67,
"learning_rate": 8.309197811881128e-06,
"loss": 1.3379,
"step": 925
},
{
"epoch": 5.68,
"learning_rate": 8.289616337295396e-06,
"loss": 1.2891,
"step": 926
},
{
"epoch": 5.69,
"learning_rate": 8.270041616217407e-06,
"loss": 1.2441,
"step": 927
},
{
"epoch": 5.69,
"learning_rate": 8.250473725938608e-06,
"loss": 1.3652,
"step": 928
},
{
"epoch": 5.7,
"learning_rate": 8.23091274372348e-06,
"loss": 1.1523,
"step": 929
},
{
"epoch": 5.71,
"learning_rate": 8.211358746809225e-06,
"loss": 1.2637,
"step": 930
},
{
"epoch": 5.71,
"learning_rate": 8.191811812405453e-06,
"loss": 1.3184,
"step": 931
},
{
"epoch": 5.72,
"learning_rate": 8.172272017693903e-06,
"loss": 1.2676,
"step": 932
},
{
"epoch": 5.72,
"learning_rate": 8.15273943982811e-06,
"loss": 1.1836,
"step": 933
},
{
"epoch": 5.73,
"learning_rate": 8.133214155933118e-06,
"loss": 1.1533,
"step": 934
},
{
"epoch": 5.74,
"learning_rate": 8.113696243105175e-06,
"loss": 1.1562,
"step": 935
},
{
"epoch": 5.74,
"learning_rate": 8.09418577841142e-06,
"loss": 1.3008,
"step": 936
},
{
"epoch": 5.75,
"learning_rate": 8.074682838889581e-06,
"loss": 1.3379,
"step": 937
},
{
"epoch": 5.75,
"learning_rate": 8.055187501547674e-06,
"loss": 1.2012,
"step": 938
},
{
"epoch": 5.76,
"learning_rate": 8.035699843363696e-06,
"loss": 1.1484,
"step": 939
},
{
"epoch": 5.77,
"learning_rate": 8.01621994128533e-06,
"loss": 1.293,
"step": 940
},
{
"epoch": 5.77,
"learning_rate": 7.996747872229624e-06,
"loss": 1.3223,
"step": 941
},
{
"epoch": 5.78,
"learning_rate": 7.977283713082706e-06,
"loss": 1.3105,
"step": 942
},
{
"epoch": 5.79,
"learning_rate": 7.95782754069946e-06,
"loss": 1.207,
"step": 943
},
{
"epoch": 5.79,
"learning_rate": 7.938379431903243e-06,
"loss": 1.1992,
"step": 944
},
{
"epoch": 5.8,
"learning_rate": 7.91893946348557e-06,
"loss": 1.1582,
"step": 945
},
{
"epoch": 5.8,
"learning_rate": 7.899507712205818e-06,
"loss": 1.168,
"step": 946
},
{
"epoch": 5.81,
"learning_rate": 7.880084254790911e-06,
"loss": 1.3105,
"step": 947
},
{
"epoch": 5.82,
"learning_rate": 7.860669167935028e-06,
"loss": 1.2988,
"step": 948
},
{
"epoch": 5.82,
"learning_rate": 7.841262528299296e-06,
"loss": 1.1211,
"step": 949
},
{
"epoch": 5.83,
"learning_rate": 7.821864412511485e-06,
"loss": 1.2832,
"step": 950
},
{
"epoch": 5.83,
"learning_rate": 7.802474897165716e-06,
"loss": 1.0977,
"step": 951
},
{
"epoch": 5.84,
"learning_rate": 7.783094058822147e-06,
"loss": 1.0918,
"step": 952
},
{
"epoch": 5.85,
"learning_rate": 7.76372197400667e-06,
"loss": 1.2617,
"step": 953
},
{
"epoch": 5.85,
"learning_rate": 7.74435871921062e-06,
"loss": 1.2793,
"step": 954
},
{
"epoch": 5.86,
"learning_rate": 7.72500437089046e-06,
"loss": 1.2402,
"step": 955
},
{
"epoch": 5.87,
"learning_rate": 7.705659005467489e-06,
"loss": 1.2344,
"step": 956
},
{
"epoch": 5.87,
"learning_rate": 7.68632269932754e-06,
"loss": 1.2832,
"step": 957
},
{
"epoch": 5.88,
"learning_rate": 7.666995528820673e-06,
"loss": 1.2402,
"step": 958
},
{
"epoch": 5.88,
"learning_rate": 7.647677570260868e-06,
"loss": 1.3262,
"step": 959
},
{
"epoch": 5.89,
"learning_rate": 7.628368899925744e-06,
"loss": 1.2695,
"step": 960
},
{
"epoch": 5.9,
"learning_rate": 7.609069594056234e-06,
"loss": 1.2031,
"step": 961
},
{
"epoch": 5.9,
"learning_rate": 7.589779728856307e-06,
"loss": 1.1484,
"step": 962
},
{
"epoch": 5.91,
"learning_rate": 7.570499380492641e-06,
"loss": 1.3203,
"step": 963
},
{
"epoch": 5.91,
"learning_rate": 7.551228625094349e-06,
"loss": 1.2754,
"step": 964
},
{
"epoch": 5.92,
"learning_rate": 7.5319675387526555e-06,
"loss": 1.2559,
"step": 965
},
{
"epoch": 5.93,
"learning_rate": 7.512716197520614e-06,
"loss": 1.209,
"step": 966
},
{
"epoch": 5.93,
"learning_rate": 7.493474677412795e-06,
"loss": 1.1875,
"step": 967
},
{
"epoch": 5.94,
"learning_rate": 7.4742430544049945e-06,
"loss": 1.2168,
"step": 968
},
{
"epoch": 5.94,
"learning_rate": 7.4550214044339256e-06,
"loss": 1.209,
"step": 969
},
{
"epoch": 5.95,
"learning_rate": 7.435809803396923e-06,
"loss": 1.25,
"step": 970
},
{
"epoch": 5.96,
"learning_rate": 7.416608327151642e-06,
"loss": 1.1211,
"step": 971
},
{
"epoch": 5.96,
"learning_rate": 7.397417051515758e-06,
"loss": 1.1113,
"step": 972
},
{
"epoch": 5.97,
"learning_rate": 7.37823605226668e-06,
"loss": 1.2422,
"step": 973
},
{
"epoch": 5.98,
"learning_rate": 7.359065405141228e-06,
"loss": 1.2363,
"step": 974
},
{
"epoch": 5.98,
"learning_rate": 7.33990518583535e-06,
"loss": 1.1338,
"step": 975
},
{
"epoch": 5.99,
"learning_rate": 7.320755470003822e-06,
"loss": 1.0918,
"step": 976
},
{
"epoch": 5.99,
"learning_rate": 7.301616333259942e-06,
"loss": 1.3027,
"step": 977
},
{
"epoch": 6.0,
"learning_rate": 7.282487851175237e-06,
"loss": 1.0625,
"step": 978
},
{
"epoch": 6.01,
"learning_rate": 7.263370099279173e-06,
"loss": 0.792,
"step": 979
},
{
"epoch": 6.01,
"learning_rate": 7.244263153058835e-06,
"loss": 0.9102,
"step": 980
},
{
"epoch": 6.02,
"learning_rate": 7.225167087958647e-06,
"loss": 0.832,
"step": 981
},
{
"epoch": 6.02,
"learning_rate": 7.2060819793800665e-06,
"loss": 0.8662,
"step": 982
},
{
"epoch": 6.03,
"learning_rate": 7.187007902681289e-06,
"loss": 0.8164,
"step": 983
},
{
"epoch": 6.04,
"learning_rate": 7.16794493317696e-06,
"loss": 0.8496,
"step": 984
},
{
"epoch": 6.04,
"learning_rate": 7.148893146137852e-06,
"loss": 0.9854,
"step": 985
},
{
"epoch": 6.05,
"learning_rate": 7.129852616790594e-06,
"loss": 0.8486,
"step": 986
},
{
"epoch": 6.06,
"learning_rate": 7.110823420317356e-06,
"loss": 0.8359,
"step": 987
},
{
"epoch": 6.06,
"learning_rate": 7.091805631855566e-06,
"loss": 0.7695,
"step": 988
},
{
"epoch": 6.07,
"learning_rate": 7.072799326497603e-06,
"loss": 0.8828,
"step": 989
},
{
"epoch": 6.07,
"learning_rate": 7.053804579290513e-06,
"loss": 0.9307,
"step": 990
},
{
"epoch": 6.08,
"learning_rate": 7.034821465235693e-06,
"loss": 0.7568,
"step": 991
},
{
"epoch": 6.09,
"learning_rate": 7.0158500592886115e-06,
"loss": 0.8779,
"step": 992
},
{
"epoch": 6.09,
"learning_rate": 6.996890436358505e-06,
"loss": 0.9648,
"step": 993
},
{
"epoch": 6.1,
"learning_rate": 6.977942671308087e-06,
"loss": 0.7734,
"step": 994
},
{
"epoch": 6.1,
"learning_rate": 6.95900683895325e-06,
"loss": 0.8066,
"step": 995
},
{
"epoch": 6.11,
"learning_rate": 6.9400830140627705e-06,
"loss": 0.9189,
"step": 996
},
{
"epoch": 6.12,
"learning_rate": 6.921171271358007e-06,
"loss": 0.8271,
"step": 997
},
{
"epoch": 6.12,
"learning_rate": 6.902271685512616e-06,
"loss": 0.9258,
"step": 998
},
{
"epoch": 6.13,
"learning_rate": 6.883384331152254e-06,
"loss": 0.9004,
"step": 999
},
{
"epoch": 6.13,
"learning_rate": 6.864509282854272e-06,
"loss": 0.8652,
"step": 1000
},
{
"epoch": 6.14,
"learning_rate": 6.845646615147445e-06,
"loss": 0.8779,
"step": 1001
},
{
"epoch": 6.15,
"learning_rate": 6.826796402511653e-06,
"loss": 0.8105,
"step": 1002
},
{
"epoch": 6.15,
"learning_rate": 6.8079587193775935e-06,
"loss": 0.9023,
"step": 1003
},
{
"epoch": 6.16,
"learning_rate": 6.789133640126498e-06,
"loss": 0.8877,
"step": 1004
},
{
"epoch": 6.17,
"learning_rate": 6.770321239089825e-06,
"loss": 0.9209,
"step": 1005
},
{
"epoch": 6.17,
"learning_rate": 6.751521590548986e-06,
"loss": 0.8389,
"step": 1006
},
{
"epoch": 6.18,
"learning_rate": 6.732734768735021e-06,
"loss": 0.8125,
"step": 1007
},
{
"epoch": 6.18,
"learning_rate": 6.713960847828335e-06,
"loss": 0.8408,
"step": 1008
},
{
"epoch": 6.19,
"learning_rate": 6.695199901958386e-06,
"loss": 0.9258,
"step": 1009
},
{
"epoch": 6.2,
"learning_rate": 6.6764520052034054e-06,
"loss": 0.8213,
"step": 1010
},
{
"epoch": 6.2,
"learning_rate": 6.657717231590095e-06,
"loss": 0.8838,
"step": 1011
},
{
"epoch": 6.21,
"learning_rate": 6.638995655093351e-06,
"loss": 0.667,
"step": 1012
},
{
"epoch": 6.21,
"learning_rate": 6.620287349635942e-06,
"loss": 0.9072,
"step": 1013
},
{
"epoch": 6.22,
"learning_rate": 6.601592389088251e-06,
"loss": 0.8184,
"step": 1014
},
{
"epoch": 6.23,
"learning_rate": 6.582910847267957e-06,
"loss": 0.9688,
"step": 1015
},
{
"epoch": 6.23,
"learning_rate": 6.564242797939759e-06,
"loss": 0.7861,
"step": 1016
},
{
"epoch": 6.24,
"learning_rate": 6.545588314815088e-06,
"loss": 0.9268,
"step": 1017
},
{
"epoch": 6.25,
"learning_rate": 6.526947471551799e-06,
"loss": 0.7949,
"step": 1018
},
{
"epoch": 6.25,
"learning_rate": 6.508320341753889e-06,
"loss": 0.8994,
"step": 1019
},
{
"epoch": 6.26,
"learning_rate": 6.489706998971212e-06,
"loss": 0.8193,
"step": 1020
},
{
"epoch": 6.26,
"learning_rate": 6.471107516699183e-06,
"loss": 0.877,
"step": 1021
},
{
"epoch": 6.27,
"learning_rate": 6.452521968378482e-06,
"loss": 0.8525,
"step": 1022
},
{
"epoch": 6.28,
"learning_rate": 6.4339504273947805e-06,
"loss": 0.8115,
"step": 1023
},
{
"epoch": 6.28,
"learning_rate": 6.415392967078438e-06,
"loss": 0.8262,
"step": 1024
},
{
"epoch": 6.29,
"learning_rate": 6.396849660704205e-06,
"loss": 0.9258,
"step": 1025
},
{
"epoch": 6.29,
"learning_rate": 6.378320581490962e-06,
"loss": 0.873,
"step": 1026
},
{
"epoch": 6.3,
"learning_rate": 6.3598058026013995e-06,
"loss": 0.9082,
"step": 1027
},
{
"epoch": 6.31,
"learning_rate": 6.3413053971417575e-06,
"loss": 0.9756,
"step": 1028
},
{
"epoch": 6.31,
"learning_rate": 6.322819438161502e-06,
"loss": 0.7363,
"step": 1029
},
{
"epoch": 6.32,
"learning_rate": 6.304347998653074e-06,
"loss": 0.835,
"step": 1030
},
{
"epoch": 6.33,
"learning_rate": 6.285891151551573e-06,
"loss": 0.8457,
"step": 1031
},
{
"epoch": 6.33,
"learning_rate": 6.267448969734486e-06,
"loss": 0.833,
"step": 1032
},
{
"epoch": 6.34,
"learning_rate": 6.24902152602139e-06,
"loss": 0.7949,
"step": 1033
},
{
"epoch": 6.34,
"learning_rate": 6.2306088931736766e-06,
"loss": 0.9092,
"step": 1034
},
{
"epoch": 6.35,
"learning_rate": 6.21221114389424e-06,
"loss": 0.8643,
"step": 1035
},
{
"epoch": 6.36,
"learning_rate": 6.193828350827222e-06,
"loss": 0.8809,
"step": 1036
},
{
"epoch": 6.36,
"learning_rate": 6.175460586557701e-06,
"loss": 0.8662,
"step": 1037
},
{
"epoch": 6.37,
"learning_rate": 6.157107923611412e-06,
"loss": 0.8682,
"step": 1038
},
{
"epoch": 6.37,
"learning_rate": 6.1387704344544684e-06,
"loss": 0.8701,
"step": 1039
},
{
"epoch": 6.38,
"learning_rate": 6.120448191493071e-06,
"loss": 0.791,
"step": 1040
},
{
"epoch": 6.39,
"learning_rate": 6.102141267073207e-06,
"loss": 0.8857,
"step": 1041
},
{
"epoch": 6.39,
"learning_rate": 6.083849733480394e-06,
"loss": 0.8623,
"step": 1042
},
{
"epoch": 6.4,
"learning_rate": 6.065573662939367e-06,
"loss": 0.8105,
"step": 1043
},
{
"epoch": 6.4,
"learning_rate": 6.047313127613808e-06,
"loss": 0.9443,
"step": 1044
},
{
"epoch": 6.41,
"learning_rate": 6.0290681996060605e-06,
"loss": 0.7783,
"step": 1045
},
{
"epoch": 6.42,
"learning_rate": 6.010838950956841e-06,
"loss": 0.8701,
"step": 1046
},
{
"epoch": 6.42,
"learning_rate": 5.992625453644953e-06,
"loss": 0.8672,
"step": 1047
},
{
"epoch": 6.43,
"learning_rate": 5.974427779587004e-06,
"loss": 0.8262,
"step": 1048
},
{
"epoch": 6.44,
"learning_rate": 5.9562460006371295e-06,
"loss": 0.8818,
"step": 1049
},
{
"epoch": 6.44,
"learning_rate": 5.938080188586699e-06,
"loss": 0.7998,
"step": 1050
},
{
"epoch": 6.45,
"learning_rate": 5.919930415164033e-06,
"loss": 0.7217,
"step": 1051
},
{
"epoch": 6.45,
"learning_rate": 5.901796752034128e-06,
"loss": 0.8486,
"step": 1052
},
{
"epoch": 6.46,
"learning_rate": 5.883679270798363e-06,
"loss": 0.7949,
"step": 1053
},
{
"epoch": 6.47,
"learning_rate": 5.865578042994227e-06,
"loss": 0.9209,
"step": 1054
},
{
"epoch": 6.47,
"learning_rate": 5.84749314009503e-06,
"loss": 0.8779,
"step": 1055
},
{
"epoch": 6.48,
"learning_rate": 5.829424633509627e-06,
"loss": 0.9678,
"step": 1056
},
{
"epoch": 6.48,
"learning_rate": 5.8113725945821245e-06,
"loss": 0.7764,
"step": 1057
},
{
"epoch": 6.49,
"learning_rate": 5.7933370945916036e-06,
"loss": 0.8252,
"step": 1058
},
{
"epoch": 6.5,
"learning_rate": 5.775318204751854e-06,
"loss": 0.8438,
"step": 1059
},
{
"epoch": 6.5,
"learning_rate": 5.757315996211066e-06,
"loss": 0.7744,
"step": 1060
},
{
"epoch": 6.51,
"learning_rate": 5.7393305400515755e-06,
"loss": 0.8027,
"step": 1061
},
{
"epoch": 6.52,
"learning_rate": 5.721361907289556e-06,
"loss": 0.834,
"step": 1062
},
{
"epoch": 6.52,
"learning_rate": 5.703410168874768e-06,
"loss": 0.8496,
"step": 1063
},
{
"epoch": 6.53,
"learning_rate": 5.685475395690259e-06,
"loss": 1.0342,
"step": 1064
},
{
"epoch": 6.53,
"learning_rate": 5.667557658552078e-06,
"loss": 0.8789,
"step": 1065
},
{
"epoch": 6.54,
"learning_rate": 5.649657028209024e-06,
"loss": 0.7568,
"step": 1066
},
{
"epoch": 6.55,
"learning_rate": 5.631773575342343e-06,
"loss": 0.791,
"step": 1067
},
{
"epoch": 6.55,
"learning_rate": 5.61390737056545e-06,
"loss": 0.9238,
"step": 1068
},
{
"epoch": 6.56,
"learning_rate": 5.5960584844236565e-06,
"loss": 0.7002,
"step": 1069
},
{
"epoch": 6.56,
"learning_rate": 5.5782269873939e-06,
"loss": 0.8096,
"step": 1070
},
{
"epoch": 6.57,
"learning_rate": 5.560412949884442e-06,
"loss": 0.8545,
"step": 1071
},
{
"epoch": 6.58,
"learning_rate": 5.542616442234618e-06,
"loss": 0.8203,
"step": 1072
},
{
"epoch": 6.58,
"learning_rate": 5.52483753471454e-06,
"loss": 0.8271,
"step": 1073
},
{
"epoch": 6.59,
"learning_rate": 5.507076297524818e-06,
"loss": 0.8428,
"step": 1074
},
{
"epoch": 6.6,
"learning_rate": 5.48933280079631e-06,
"loss": 0.8076,
"step": 1075
},
{
"epoch": 6.6,
"learning_rate": 5.471607114589806e-06,
"loss": 0.8057,
"step": 1076
},
{
"epoch": 6.61,
"learning_rate": 5.453899308895774e-06,
"loss": 0.7715,
"step": 1077
},
{
"epoch": 6.61,
"learning_rate": 5.436209453634087e-06,
"loss": 0.7207,
"step": 1078
},
{
"epoch": 6.62,
"learning_rate": 5.418537618653743e-06,
"loss": 0.7812,
"step": 1079
},
{
"epoch": 6.63,
"learning_rate": 5.400883873732574e-06,
"loss": 0.8213,
"step": 1080
},
{
"epoch": 6.63,
"learning_rate": 5.3832482885769855e-06,
"loss": 0.7451,
"step": 1081
},
{
"epoch": 6.64,
"learning_rate": 5.365630932821688e-06,
"loss": 0.835,
"step": 1082
},
{
"epoch": 6.64,
"learning_rate": 5.3480318760294084e-06,
"loss": 0.8604,
"step": 1083
},
{
"epoch": 6.65,
"learning_rate": 5.330451187690614e-06,
"loss": 0.9072,
"step": 1084
},
{
"epoch": 6.66,
"learning_rate": 5.3128889372232436e-06,
"loss": 0.8721,
"step": 1085
},
{
"epoch": 6.66,
"learning_rate": 5.295345193972445e-06,
"loss": 0.8779,
"step": 1086
},
{
"epoch": 6.67,
"learning_rate": 5.277820027210279e-06,
"loss": 0.8916,
"step": 1087
},
{
"epoch": 6.67,
"learning_rate": 5.260313506135452e-06,
"loss": 0.8721,
"step": 1088
},
{
"epoch": 6.68,
"learning_rate": 5.242825699873068e-06,
"loss": 0.8613,
"step": 1089
},
{
"epoch": 6.69,
"learning_rate": 5.225356677474309e-06,
"loss": 0.8379,
"step": 1090
},
{
"epoch": 6.69,
"learning_rate": 5.2079065079162115e-06,
"loss": 0.708,
"step": 1091
},
{
"epoch": 6.7,
"learning_rate": 5.190475260101353e-06,
"loss": 0.873,
"step": 1092
},
{
"epoch": 6.71,
"learning_rate": 5.1730630028576055e-06,
"loss": 0.7119,
"step": 1093
},
{
"epoch": 6.71,
"learning_rate": 5.155669804937855e-06,
"loss": 0.8848,
"step": 1094
},
{
"epoch": 6.72,
"learning_rate": 5.138295735019741e-06,
"loss": 0.8633,
"step": 1095
},
{
"epoch": 6.72,
"learning_rate": 5.120940861705357e-06,
"loss": 0.8203,
"step": 1096
},
{
"epoch": 6.73,
"learning_rate": 5.103605253521007e-06,
"loss": 0.8398,
"step": 1097
},
{
"epoch": 6.74,
"learning_rate": 5.086288978916931e-06,
"loss": 0.9297,
"step": 1098
},
{
"epoch": 6.74,
"learning_rate": 5.068992106267021e-06,
"loss": 0.71,
"step": 1099
},
{
"epoch": 6.75,
"learning_rate": 5.051714703868569e-06,
"loss": 0.7275,
"step": 1100
},
{
"epoch": 6.75,
"learning_rate": 5.034456839941979e-06,
"loss": 0.8164,
"step": 1101
},
{
"epoch": 6.76,
"learning_rate": 5.017218582630507e-06,
"loss": 0.7363,
"step": 1102
},
{
"epoch": 6.77,
"learning_rate": 5.000000000000003e-06,
"loss": 0.9561,
"step": 1103
},
{
"epoch": 6.77,
"learning_rate": 4.982801160038614e-06,
"loss": 0.834,
"step": 1104
},
{
"epoch": 6.78,
"learning_rate": 4.965622130656551e-06,
"loss": 0.8418,
"step": 1105
},
{
"epoch": 6.79,
"learning_rate": 4.948462979685783e-06,
"loss": 0.8418,
"step": 1106
},
{
"epoch": 6.79,
"learning_rate": 4.931323774879807e-06,
"loss": 0.8584,
"step": 1107
},
{
"epoch": 6.8,
"learning_rate": 4.914204583913349e-06,
"loss": 0.8105,
"step": 1108
},
{
"epoch": 6.8,
"learning_rate": 4.897105474382109e-06,
"loss": 0.9131,
"step": 1109
},
{
"epoch": 6.81,
"learning_rate": 4.880026513802504e-06,
"loss": 0.791,
"step": 1110
},
{
"epoch": 6.82,
"learning_rate": 4.862967769611389e-06,
"loss": 0.8828,
"step": 1111
},
{
"epoch": 6.82,
"learning_rate": 4.845929309165793e-06,
"loss": 0.8291,
"step": 1112
},
{
"epoch": 6.83,
"learning_rate": 4.828911199742646e-06,
"loss": 0.8252,
"step": 1113
},
{
"epoch": 6.83,
"learning_rate": 4.8119135085385375e-06,
"loss": 0.7529,
"step": 1114
},
{
"epoch": 6.84,
"learning_rate": 4.794936302669417e-06,
"loss": 0.8613,
"step": 1115
},
{
"epoch": 6.85,
"learning_rate": 4.777979649170367e-06,
"loss": 0.7803,
"step": 1116
},
{
"epoch": 6.85,
"learning_rate": 4.7610436149953e-06,
"loss": 0.9141,
"step": 1117
},
{
"epoch": 6.86,
"learning_rate": 4.744128267016719e-06,
"loss": 0.8291,
"step": 1118
},
{
"epoch": 6.87,
"learning_rate": 4.727233672025453e-06,
"loss": 0.7451,
"step": 1119
},
{
"epoch": 6.87,
"learning_rate": 4.710359896730379e-06,
"loss": 0.8457,
"step": 1120
},
{
"epoch": 6.88,
"learning_rate": 4.693507007758165e-06,
"loss": 0.7646,
"step": 1121
},
{
"epoch": 6.88,
"learning_rate": 4.676675071653019e-06,
"loss": 0.8506,
"step": 1122
},
{
"epoch": 6.89,
"learning_rate": 4.659864154876411e-06,
"loss": 0.7246,
"step": 1123
},
{
"epoch": 6.9,
"learning_rate": 4.643074323806813e-06,
"loss": 0.8555,
"step": 1124
},
{
"epoch": 6.9,
"learning_rate": 4.626305644739435e-06,
"loss": 0.8125,
"step": 1125
},
{
"epoch": 6.91,
"learning_rate": 4.609558183885979e-06,
"loss": 0.8418,
"step": 1126
},
{
"epoch": 6.91,
"learning_rate": 4.592832007374364e-06,
"loss": 0.8271,
"step": 1127
},
{
"epoch": 6.92,
"learning_rate": 4.576127181248459e-06,
"loss": 0.7979,
"step": 1128
},
{
"epoch": 6.93,
"learning_rate": 4.559443771467833e-06,
"loss": 0.8438,
"step": 1129
},
{
"epoch": 6.93,
"learning_rate": 4.542781843907499e-06,
"loss": 0.7432,
"step": 1130
},
{
"epoch": 6.94,
"learning_rate": 4.5261414643576396e-06,
"loss": 0.7852,
"step": 1131
},
{
"epoch": 6.94,
"learning_rate": 4.509522698523352e-06,
"loss": 0.8125,
"step": 1132
},
{
"epoch": 6.95,
"learning_rate": 4.492925612024402e-06,
"loss": 0.7588,
"step": 1133
},
{
"epoch": 6.96,
"learning_rate": 4.476350270394942e-06,
"loss": 0.751,
"step": 1134
},
{
"epoch": 6.96,
"learning_rate": 4.4597967390832745e-06,
"loss": 0.9287,
"step": 1135
},
{
"epoch": 6.97,
"learning_rate": 4.4432650834515735e-06,
"loss": 0.7432,
"step": 1136
},
{
"epoch": 6.98,
"learning_rate": 4.426755368775637e-06,
"loss": 0.7783,
"step": 1137
},
{
"epoch": 6.98,
"learning_rate": 4.4102676602446375e-06,
"loss": 0.8613,
"step": 1138
},
{
"epoch": 6.99,
"learning_rate": 4.3938020229608506e-06,
"loss": 0.8584,
"step": 1139
},
{
"epoch": 6.99,
"learning_rate": 4.377358521939401e-06,
"loss": 0.8105,
"step": 1140
},
{
"epoch": 7.0,
"learning_rate": 4.360937222108002e-06,
"loss": 0.7871,
"step": 1141
},
{
"epoch": 7.01,
"learning_rate": 4.344538188306723e-06,
"loss": 0.5469,
"step": 1142
},
{
"epoch": 7.01,
"learning_rate": 4.328161485287693e-06,
"loss": 0.6025,
"step": 1143
},
{
"epoch": 7.02,
"learning_rate": 4.3118071777148865e-06,
"loss": 0.5752,
"step": 1144
},
{
"epoch": 7.02,
"learning_rate": 4.295475330163832e-06,
"loss": 0.6367,
"step": 1145
},
{
"epoch": 7.03,
"learning_rate": 4.279166007121389e-06,
"loss": 0.5527,
"step": 1146
},
{
"epoch": 7.04,
"learning_rate": 4.262879272985468e-06,
"loss": 0.5439,
"step": 1147
},
{
"epoch": 7.04,
"learning_rate": 4.246615192064787e-06,
"loss": 0.5586,
"step": 1148
},
{
"epoch": 7.05,
"learning_rate": 4.230373828578626e-06,
"loss": 0.6318,
"step": 1149
},
{
"epoch": 7.06,
"learning_rate": 4.21415524665655e-06,
"loss": 0.6299,
"step": 1150
},
{
"epoch": 7.06,
"learning_rate": 4.197959510338187e-06,
"loss": 0.583,
"step": 1151
},
{
"epoch": 7.07,
"learning_rate": 4.181786683572946e-06,
"loss": 0.626,
"step": 1152
},
{
"epoch": 7.07,
"learning_rate": 4.165636830219776e-06,
"loss": 0.5845,
"step": 1153
},
{
"epoch": 7.08,
"learning_rate": 4.149510014046922e-06,
"loss": 0.5723,
"step": 1154
},
{
"epoch": 7.09,
"learning_rate": 4.1334062987316695e-06,
"loss": 0.5391,
"step": 1155
},
{
"epoch": 7.09,
"learning_rate": 4.117325747860077e-06,
"loss": 0.5967,
"step": 1156
},
{
"epoch": 7.1,
"learning_rate": 4.101268424926741e-06,
"loss": 0.6357,
"step": 1157
},
{
"epoch": 7.1,
"learning_rate": 4.085234393334551e-06,
"loss": 0.5654,
"step": 1158
},
{
"epoch": 7.11,
"learning_rate": 4.069223716394419e-06,
"loss": 0.5889,
"step": 1159
},
{
"epoch": 7.12,
"learning_rate": 4.053236457325043e-06,
"loss": 0.5615,
"step": 1160
},
{
"epoch": 7.12,
"learning_rate": 4.0372726792526614e-06,
"loss": 0.5459,
"step": 1161
},
{
"epoch": 7.13,
"learning_rate": 4.021332445210785e-06,
"loss": 0.6182,
"step": 1162
},
{
"epoch": 7.13,
"learning_rate": 4.005415818139975e-06,
"loss": 0.6357,
"step": 1163
},
{
"epoch": 7.14,
"learning_rate": 3.989522860887567e-06,
"loss": 0.5,
"step": 1164
},
{
"epoch": 7.15,
"learning_rate": 3.973653636207437e-06,
"loss": 0.5625,
"step": 1165
},
{
"epoch": 7.15,
"learning_rate": 3.95780820675976e-06,
"loss": 0.6074,
"step": 1166
},
{
"epoch": 7.16,
"learning_rate": 3.941986635110754e-06,
"loss": 0.6416,
"step": 1167
},
{
"epoch": 7.17,
"learning_rate": 3.9261889837324245e-06,
"loss": 0.5239,
"step": 1168
},
{
"epoch": 7.17,
"learning_rate": 3.910415315002328e-06,
"loss": 0.5127,
"step": 1169
},
{
"epoch": 7.18,
"learning_rate": 3.89466569120334e-06,
"loss": 0.5771,
"step": 1170
},
{
"epoch": 7.18,
"learning_rate": 3.878940174523371e-06,
"loss": 0.6367,
"step": 1171
},
{
"epoch": 7.19,
"learning_rate": 3.8632388270551665e-06,
"loss": 0.6191,
"step": 1172
},
{
"epoch": 7.2,
"learning_rate": 3.847561710796019e-06,
"loss": 0.5928,
"step": 1173
},
{
"epoch": 7.2,
"learning_rate": 3.8319088876475595e-06,
"loss": 0.5742,
"step": 1174
},
{
"epoch": 7.21,
"learning_rate": 3.816280419415487e-06,
"loss": 0.6201,
"step": 1175
},
{
"epoch": 7.21,
"learning_rate": 3.8006763678093326e-06,
"loss": 0.6885,
"step": 1176
},
{
"epoch": 7.22,
"learning_rate": 3.785096794442229e-06,
"loss": 0.5742,
"step": 1177
},
{
"epoch": 7.23,
"learning_rate": 3.7695417608306415e-06,
"loss": 0.5352,
"step": 1178
},
{
"epoch": 7.23,
"learning_rate": 3.7540113283941536e-06,
"loss": 0.6123,
"step": 1179
},
{
"epoch": 7.24,
"learning_rate": 3.7385055584552e-06,
"loss": 0.5605,
"step": 1180
},
{
"epoch": 7.25,
"learning_rate": 3.723024512238833e-06,
"loss": 0.541,
"step": 1181
},
{
"epoch": 7.25,
"learning_rate": 3.707568250872493e-06,
"loss": 0.6328,
"step": 1182
},
{
"epoch": 7.26,
"learning_rate": 3.6921368353857524e-06,
"loss": 0.5498,
"step": 1183
},
{
"epoch": 7.26,
"learning_rate": 3.676730326710074e-06,
"loss": 0.5938,
"step": 1184
},
{
"epoch": 7.27,
"learning_rate": 3.6613487856785744e-06,
"loss": 0.5742,
"step": 1185
},
{
"epoch": 7.28,
"learning_rate": 3.645992273025797e-06,
"loss": 0.5493,
"step": 1186
},
{
"epoch": 7.28,
"learning_rate": 3.630660849387444e-06,
"loss": 0.5947,
"step": 1187
},
{
"epoch": 7.29,
"learning_rate": 3.6153545753001663e-06,
"loss": 0.5522,
"step": 1188
},
{
"epoch": 7.29,
"learning_rate": 3.6000735112012984e-06,
"loss": 0.5967,
"step": 1189
},
{
"epoch": 7.3,
"learning_rate": 3.584817717428647e-06,
"loss": 0.6006,
"step": 1190
},
{
"epoch": 7.31,
"learning_rate": 3.569587254220225e-06,
"loss": 0.5664,
"step": 1191
},
{
"epoch": 7.31,
"learning_rate": 3.5543821817140313e-06,
"loss": 0.5898,
"step": 1192
},
{
"epoch": 7.32,
"learning_rate": 3.5392025599478053e-06,
"loss": 0.4985,
"step": 1193
},
{
"epoch": 7.33,
"learning_rate": 3.5240484488588012e-06,
"loss": 0.5273,
"step": 1194
},
{
"epoch": 7.33,
"learning_rate": 3.5089199082835436e-06,
"loss": 0.627,
"step": 1195
},
{
"epoch": 7.34,
"learning_rate": 3.493816997957582e-06,
"loss": 0.5479,
"step": 1196
},
{
"epoch": 7.34,
"learning_rate": 3.478739777515264e-06,
"loss": 0.5625,
"step": 1197
},
{
"epoch": 7.35,
"learning_rate": 3.463688306489511e-06,
"loss": 0.5649,
"step": 1198
},
{
"epoch": 7.36,
"learning_rate": 3.448662644311567e-06,
"loss": 0.6064,
"step": 1199
},
{
"epoch": 7.36,
"learning_rate": 3.433662850310763e-06,
"loss": 0.6211,
"step": 1200
},
{
"epoch": 7.37,
"learning_rate": 3.418688983714291e-06,
"loss": 0.5337,
"step": 1201
},
{
"epoch": 7.37,
"learning_rate": 3.403741103646977e-06,
"loss": 0.6035,
"step": 1202
},
{
"epoch": 7.38,
"learning_rate": 3.3888192691310262e-06,
"loss": 0.5508,
"step": 1203
},
{
"epoch": 7.39,
"learning_rate": 3.373923539085805e-06,
"loss": 0.5215,
"step": 1204
},
{
"epoch": 7.39,
"learning_rate": 3.3590539723276083e-06,
"loss": 0.5239,
"step": 1205
},
{
"epoch": 7.4,
"learning_rate": 3.3442106275694295e-06,
"loss": 0.5444,
"step": 1206
},
{
"epoch": 7.4,
"learning_rate": 3.329393563420713e-06,
"loss": 0.6401,
"step": 1207
},
{
"epoch": 7.41,
"learning_rate": 3.3146028383871363e-06,
"loss": 0.5825,
"step": 1208
},
{
"epoch": 7.42,
"learning_rate": 3.2998385108703766e-06,
"loss": 0.5347,
"step": 1209
},
{
"epoch": 7.42,
"learning_rate": 3.285100639167883e-06,
"loss": 0.5645,
"step": 1210
},
{
"epoch": 7.43,
"learning_rate": 3.2703892814726436e-06,
"loss": 0.5459,
"step": 1211
},
{
"epoch": 7.44,
"learning_rate": 3.2557044958729466e-06,
"loss": 0.582,
"step": 1212
},
{
"epoch": 7.44,
"learning_rate": 3.2410463403521653e-06,
"loss": 0.6035,
"step": 1213
},
{
"epoch": 7.45,
"learning_rate": 3.2264148727885257e-06,
"loss": 0.6094,
"step": 1214
},
{
"epoch": 7.45,
"learning_rate": 3.211810150954867e-06,
"loss": 0.5801,
"step": 1215
},
{
"epoch": 7.46,
"learning_rate": 3.1972322325184347e-06,
"loss": 0.6016,
"step": 1216
},
{
"epoch": 7.47,
"learning_rate": 3.182681175040625e-06,
"loss": 0.5352,
"step": 1217
},
{
"epoch": 7.47,
"learning_rate": 3.1681570359767875e-06,
"loss": 0.5757,
"step": 1218
},
{
"epoch": 7.48,
"learning_rate": 3.1536598726759747e-06,
"loss": 0.5894,
"step": 1219
},
{
"epoch": 7.48,
"learning_rate": 3.1391897423807204e-06,
"loss": 0.4736,
"step": 1220
},
{
"epoch": 7.49,
"learning_rate": 3.1247467022268284e-06,
"loss": 0.4985,
"step": 1221
},
{
"epoch": 7.5,
"learning_rate": 3.110330809243134e-06,
"loss": 0.5459,
"step": 1222
},
{
"epoch": 7.5,
"learning_rate": 3.095942120351276e-06,
"loss": 0.4756,
"step": 1223
},
{
"epoch": 7.51,
"learning_rate": 3.081580692365478e-06,
"loss": 0.5908,
"step": 1224
},
{
"epoch": 7.52,
"learning_rate": 3.0672465819923215e-06,
"loss": 0.583,
"step": 1225
},
{
"epoch": 7.52,
"learning_rate": 3.052939845830528e-06,
"loss": 0.5034,
"step": 1226
},
{
"epoch": 7.53,
"learning_rate": 3.0386605403707347e-06,
"loss": 0.4697,
"step": 1227
},
{
"epoch": 7.53,
"learning_rate": 3.0244087219952565e-06,
"loss": 0.5146,
"step": 1228
},
{
"epoch": 7.54,
"learning_rate": 3.0101844469778797e-06,
"loss": 0.5674,
"step": 1229
},
{
"epoch": 7.55,
"learning_rate": 2.9959877714836406e-06,
"loss": 0.542,
"step": 1230
},
{
"epoch": 7.55,
"learning_rate": 2.981818751568586e-06,
"loss": 0.5669,
"step": 1231
},
{
"epoch": 7.56,
"learning_rate": 2.9676774431795752e-06,
"loss": 0.5244,
"step": 1232
},
{
"epoch": 7.56,
"learning_rate": 2.95356390215404e-06,
"loss": 0.5679,
"step": 1233
},
{
"epoch": 7.57,
"learning_rate": 2.939478184219777e-06,
"loss": 0.4868,
"step": 1234
},
{
"epoch": 7.58,
"learning_rate": 2.9254203449947196e-06,
"loss": 0.5498,
"step": 1235
},
{
"epoch": 7.58,
"learning_rate": 2.9113904399867188e-06,
"loss": 0.6143,
"step": 1236
},
{
"epoch": 7.59,
"learning_rate": 2.8973885245933287e-06,
"loss": 0.6279,
"step": 1237
},
{
"epoch": 7.6,
"learning_rate": 2.8834146541015874e-06,
"loss": 0.5552,
"step": 1238
},
{
"epoch": 7.6,
"learning_rate": 2.869468883687798e-06,
"loss": 0.5186,
"step": 1239
},
{
"epoch": 7.61,
"learning_rate": 2.855551268417305e-06,
"loss": 0.5244,
"step": 1240
},
{
"epoch": 7.61,
"learning_rate": 2.8416618632442785e-06,
"loss": 0.5884,
"step": 1241
},
{
"epoch": 7.62,
"learning_rate": 2.827800723011508e-06,
"loss": 0.6289,
"step": 1242
},
{
"epoch": 7.63,
"learning_rate": 2.813967902450179e-06,
"loss": 0.5732,
"step": 1243
},
{
"epoch": 7.63,
"learning_rate": 2.8001634561796463e-06,
"loss": 0.5527,
"step": 1244
},
{
"epoch": 7.64,
"learning_rate": 2.786387438707231e-06,
"loss": 0.5835,
"step": 1245
},
{
"epoch": 7.64,
"learning_rate": 2.7726399044280107e-06,
"loss": 0.5557,
"step": 1246
},
{
"epoch": 7.65,
"learning_rate": 2.758920907624585e-06,
"loss": 0.5322,
"step": 1247
},
{
"epoch": 7.66,
"learning_rate": 2.7452305024668747e-06,
"loss": 0.54,
"step": 1248
},
{
"epoch": 7.66,
"learning_rate": 2.7315687430119097e-06,
"loss": 0.6719,
"step": 1249
},
{
"epoch": 7.67,
"learning_rate": 2.7179356832036142e-06,
"loss": 0.6846,
"step": 1250
},
{
"epoch": 7.67,
"learning_rate": 2.704331376872581e-06,
"loss": 0.5723,
"step": 1251
},
{
"epoch": 7.68,
"learning_rate": 2.6907558777358756e-06,
"loss": 0.5562,
"step": 1252
},
{
"epoch": 7.69,
"learning_rate": 2.677209239396811e-06,
"loss": 0.5967,
"step": 1253
},
{
"epoch": 7.69,
"learning_rate": 2.6636915153447494e-06,
"loss": 0.4829,
"step": 1254
},
{
"epoch": 7.7,
"learning_rate": 2.650202758954886e-06,
"loss": 0.6201,
"step": 1255
},
{
"epoch": 7.71,
"learning_rate": 2.6367430234880286e-06,
"loss": 0.4766,
"step": 1256
},
{
"epoch": 7.71,
"learning_rate": 2.6233123620903946e-06,
"loss": 0.583,
"step": 1257
},
{
"epoch": 7.72,
"learning_rate": 2.6099108277934105e-06,
"loss": 0.5054,
"step": 1258
},
{
"epoch": 7.72,
"learning_rate": 2.5965384735134825e-06,
"loss": 0.5459,
"step": 1259
},
{
"epoch": 7.73,
"learning_rate": 2.583195352051808e-06,
"loss": 0.5312,
"step": 1260
},
{
"epoch": 7.74,
"learning_rate": 2.5698815160941494e-06,
"loss": 0.584,
"step": 1261
},
{
"epoch": 7.74,
"learning_rate": 2.5565970182106425e-06,
"loss": 0.5928,
"step": 1262
},
{
"epoch": 7.75,
"learning_rate": 2.5433419108555758e-06,
"loss": 0.5205,
"step": 1263
},
{
"epoch": 7.75,
"learning_rate": 2.5301162463671845e-06,
"loss": 0.5303,
"step": 1264
},
{
"epoch": 7.76,
"learning_rate": 2.516920076967455e-06,
"loss": 0.5615,
"step": 1265
},
{
"epoch": 7.77,
"learning_rate": 2.5037534547619125e-06,
"loss": 0.6182,
"step": 1266
},
{
"epoch": 7.77,
"learning_rate": 2.4906164317394067e-06,
"loss": 0.5088,
"step": 1267
},
{
"epoch": 7.78,
"learning_rate": 2.4775090597719163e-06,
"loss": 0.5264,
"step": 1268
},
{
"epoch": 7.79,
"learning_rate": 2.4644313906143414e-06,
"loss": 0.5195,
"step": 1269
},
{
"epoch": 7.79,
"learning_rate": 2.451383475904304e-06,
"loss": 0.5332,
"step": 1270
},
{
"epoch": 7.8,
"learning_rate": 2.438365367161939e-06,
"loss": 0.5718,
"step": 1271
},
{
"epoch": 7.8,
"learning_rate": 2.4253771157896856e-06,
"loss": 0.5269,
"step": 1272
},
{
"epoch": 7.81,
"learning_rate": 2.4124187730720916e-06,
"loss": 0.563,
"step": 1273
},
{
"epoch": 7.82,
"learning_rate": 2.3994903901756163e-06,
"loss": 0.5156,
"step": 1274
},
{
"epoch": 7.82,
"learning_rate": 2.3865920181484127e-06,
"loss": 0.478,
"step": 1275
},
{
"epoch": 7.83,
"learning_rate": 2.3737237079201437e-06,
"loss": 0.5879,
"step": 1276
},
{
"epoch": 7.83,
"learning_rate": 2.3608855103017613e-06,
"loss": 0.5972,
"step": 1277
},
{
"epoch": 7.84,
"learning_rate": 2.3480774759853307e-06,
"loss": 0.5254,
"step": 1278
},
{
"epoch": 7.85,
"learning_rate": 2.3352996555438036e-06,
"loss": 0.5645,
"step": 1279
},
{
"epoch": 7.85,
"learning_rate": 2.3225520994308382e-06,
"loss": 0.5957,
"step": 1280
},
{
"epoch": 7.86,
"learning_rate": 2.309834857980583e-06,
"loss": 0.5371,
"step": 1281
},
{
"epoch": 7.87,
"learning_rate": 2.297147981407509e-06,
"loss": 0.5508,
"step": 1282
},
{
"epoch": 7.87,
"learning_rate": 2.2844915198061714e-06,
"loss": 0.4985,
"step": 1283
},
{
"epoch": 7.88,
"learning_rate": 2.2718655231510368e-06,
"loss": 0.5928,
"step": 1284
},
{
"epoch": 7.88,
"learning_rate": 2.2592700412962775e-06,
"loss": 0.5928,
"step": 1285
},
{
"epoch": 7.89,
"learning_rate": 2.246705123975582e-06,
"loss": 0.6377,
"step": 1286
},
{
"epoch": 7.9,
"learning_rate": 2.234170820801954e-06,
"loss": 0.5674,
"step": 1287
},
{
"epoch": 7.9,
"learning_rate": 2.2216671812675118e-06,
"loss": 0.4785,
"step": 1288
},
{
"epoch": 7.91,
"learning_rate": 2.209194254743295e-06,
"loss": 0.5767,
"step": 1289
},
{
"epoch": 7.91,
"learning_rate": 2.196752090479083e-06,
"loss": 0.5601,
"step": 1290
},
{
"epoch": 7.92,
"learning_rate": 2.184340737603178e-06,
"loss": 0.4595,
"step": 1291
},
{
"epoch": 7.93,
"learning_rate": 2.1719602451222245e-06,
"loss": 0.5625,
"step": 1292
},
{
"epoch": 7.93,
"learning_rate": 2.159610661921018e-06,
"loss": 0.5679,
"step": 1293
},
{
"epoch": 7.94,
"learning_rate": 2.1472920367623094e-06,
"loss": 0.6499,
"step": 1294
},
{
"epoch": 7.94,
"learning_rate": 2.1350044182866025e-06,
"loss": 0.4966,
"step": 1295
},
{
"epoch": 7.95,
"learning_rate": 2.1227478550119763e-06,
"loss": 0.5933,
"step": 1296
},
{
"epoch": 7.96,
"learning_rate": 2.1105223953338805e-06,
"loss": 0.4814,
"step": 1297
},
{
"epoch": 7.96,
"learning_rate": 2.09832808752496e-06,
"loss": 0.5088,
"step": 1298
},
{
"epoch": 7.97,
"learning_rate": 2.086164979734856e-06,
"loss": 0.5586,
"step": 1299
},
{
"epoch": 7.98,
"learning_rate": 2.0740331199900053e-06,
"loss": 0.5396,
"step": 1300
},
{
"epoch": 7.98,
"learning_rate": 2.0619325561934658e-06,
"loss": 0.6182,
"step": 1301
},
{
"epoch": 7.99,
"learning_rate": 2.0498633361247278e-06,
"loss": 0.5537,
"step": 1302
},
{
"epoch": 7.99,
"learning_rate": 2.0378255074395094e-06,
"loss": 0.5107,
"step": 1303
},
{
"epoch": 8.0,
"learning_rate": 2.0258191176695896e-06,
"loss": 0.5176,
"step": 1304
},
{
"epoch": 8.01,
"learning_rate": 2.0138442142226e-06,
"loss": 0.4658,
"step": 1305
},
{
"epoch": 8.01,
"learning_rate": 2.001900844381857e-06,
"loss": 0.3608,
"step": 1306
},
{
"epoch": 8.02,
"learning_rate": 1.9899890553061565e-06,
"loss": 0.4785,
"step": 1307
},
{
"epoch": 8.02,
"learning_rate": 1.978108894029598e-06,
"loss": 0.4692,
"step": 1308
},
{
"epoch": 8.03,
"learning_rate": 1.9662604074614044e-06,
"loss": 0.4463,
"step": 1309
},
{
"epoch": 8.04,
"learning_rate": 1.954443642385727e-06,
"loss": 0.4473,
"step": 1310
},
{
"epoch": 8.04,
"learning_rate": 1.9426586454614617e-06,
"loss": 0.3853,
"step": 1311
},
{
"epoch": 8.05,
"learning_rate": 1.9309054632220645e-06,
"loss": 0.4043,
"step": 1312
},
{
"epoch": 8.06,
"learning_rate": 1.919184142075372e-06,
"loss": 0.3589,
"step": 1313
},
{
"epoch": 8.06,
"learning_rate": 1.9074947283034206e-06,
"loss": 0.3608,
"step": 1314
},
{
"epoch": 8.07,
"learning_rate": 1.895837268062256e-06,
"loss": 0.499,
"step": 1315
},
{
"epoch": 8.07,
"learning_rate": 1.884211807381755e-06,
"loss": 0.4058,
"step": 1316
},
{
"epoch": 8.08,
"learning_rate": 1.8726183921654373e-06,
"loss": 0.5142,
"step": 1317
},
{
"epoch": 8.09,
"learning_rate": 1.8610570681903018e-06,
"loss": 0.3506,
"step": 1318
},
{
"epoch": 8.09,
"learning_rate": 1.8495278811066197e-06,
"loss": 0.4849,
"step": 1319
},
{
"epoch": 8.1,
"learning_rate": 1.8380308764377841e-06,
"loss": 0.3979,
"step": 1320
},
{
"epoch": 8.1,
"learning_rate": 1.8265660995801004e-06,
"loss": 0.375,
"step": 1321
},
{
"epoch": 8.11,
"learning_rate": 1.8151335958026317e-06,
"loss": 0.4575,
"step": 1322
},
{
"epoch": 8.12,
"learning_rate": 1.803733410247006e-06,
"loss": 0.3691,
"step": 1323
},
{
"epoch": 8.12,
"learning_rate": 1.7923655879272395e-06,
"loss": 0.4448,
"step": 1324
},
{
"epoch": 8.13,
"learning_rate": 1.7810301737295588e-06,
"loss": 0.4111,
"step": 1325
},
{
"epoch": 8.13,
"learning_rate": 1.76972721241224e-06,
"loss": 0.3872,
"step": 1326
},
{
"epoch": 8.14,
"learning_rate": 1.7584567486054039e-06,
"loss": 0.4336,
"step": 1327
},
{
"epoch": 8.15,
"learning_rate": 1.7472188268108569e-06,
"loss": 0.3569,
"step": 1328
},
{
"epoch": 8.15,
"learning_rate": 1.7360134914019122e-06,
"loss": 0.4526,
"step": 1329
},
{
"epoch": 8.16,
"learning_rate": 1.7248407866232175e-06,
"loss": 0.4351,
"step": 1330
},
{
"epoch": 8.17,
"learning_rate": 1.7137007565905772e-06,
"loss": 0.3394,
"step": 1331
},
{
"epoch": 8.17,
"learning_rate": 1.7025934452907755e-06,
"loss": 0.439,
"step": 1332
},
{
"epoch": 8.18,
"learning_rate": 1.6915188965814034e-06,
"loss": 0.437,
"step": 1333
},
{
"epoch": 8.18,
"learning_rate": 1.6804771541906972e-06,
"loss": 0.3999,
"step": 1334
},
{
"epoch": 8.19,
"learning_rate": 1.6694682617173452e-06,
"loss": 0.3999,
"step": 1335
},
{
"epoch": 8.2,
"learning_rate": 1.6584922626303325e-06,
"loss": 0.4165,
"step": 1336
},
{
"epoch": 8.2,
"learning_rate": 1.6475492002687632e-06,
"loss": 0.4141,
"step": 1337
},
{
"epoch": 8.21,
"learning_rate": 1.6366391178416918e-06,
"loss": 0.397,
"step": 1338
},
{
"epoch": 8.21,
"learning_rate": 1.6257620584279454e-06,
"loss": 0.3926,
"step": 1339
},
{
"epoch": 8.22,
"learning_rate": 1.6149180649759622e-06,
"loss": 0.3926,
"step": 1340
},
{
"epoch": 8.23,
"learning_rate": 1.60410718030361e-06,
"loss": 0.436,
"step": 1341
},
{
"epoch": 8.23,
"learning_rate": 1.5933294470980443e-06,
"loss": 0.4141,
"step": 1342
},
{
"epoch": 8.24,
"learning_rate": 1.5825849079155032e-06,
"loss": 0.4165,
"step": 1343
},
{
"epoch": 8.25,
"learning_rate": 1.5718736051811634e-06,
"loss": 0.4912,
"step": 1344
},
{
"epoch": 8.25,
"learning_rate": 1.5611955811889645e-06,
"loss": 0.397,
"step": 1345
},
{
"epoch": 8.26,
"learning_rate": 1.5505508781014489e-06,
"loss": 0.4297,
"step": 1346
},
{
"epoch": 8.26,
"learning_rate": 1.539939537949583e-06,
"loss": 0.4883,
"step": 1347
},
{
"epoch": 8.27,
"learning_rate": 1.5293616026326053e-06,
"loss": 0.3496,
"step": 1348
},
{
"epoch": 8.28,
"learning_rate": 1.5188171139178486e-06,
"loss": 0.4014,
"step": 1349
},
{
"epoch": 8.28,
"learning_rate": 1.5083061134405874e-06,
"loss": 0.3706,
"step": 1350
},
{
"epoch": 8.29,
"learning_rate": 1.4978286427038602e-06,
"loss": 0.4463,
"step": 1351
},
{
"epoch": 8.29,
"learning_rate": 1.4873847430783118e-06,
"loss": 0.4316,
"step": 1352
},
{
"epoch": 8.3,
"learning_rate": 1.476974455802036e-06,
"loss": 0.4258,
"step": 1353
},
{
"epoch": 8.31,
"learning_rate": 1.4665978219804056e-06,
"loss": 0.3833,
"step": 1354
},
{
"epoch": 8.31,
"learning_rate": 1.4562548825859092e-06,
"loss": 0.3687,
"step": 1355
},
{
"epoch": 8.32,
"learning_rate": 1.4459456784579917e-06,
"loss": 0.4141,
"step": 1356
},
{
"epoch": 8.33,
"learning_rate": 1.435670250302892e-06,
"loss": 0.4692,
"step": 1357
},
{
"epoch": 8.33,
"learning_rate": 1.425428638693489e-06,
"loss": 0.3999,
"step": 1358
},
{
"epoch": 8.34,
"learning_rate": 1.415220884069135e-06,
"loss": 0.4443,
"step": 1359
},
{
"epoch": 8.34,
"learning_rate": 1.405047026735491e-06,
"loss": 0.3403,
"step": 1360
},
{
"epoch": 8.35,
"learning_rate": 1.394907106864375e-06,
"loss": 0.4438,
"step": 1361
},
{
"epoch": 8.36,
"learning_rate": 1.3848011644936077e-06,
"loss": 0.3643,
"step": 1362
},
{
"epoch": 8.36,
"learning_rate": 1.3747292395268407e-06,
"loss": 0.4121,
"step": 1363
},
{
"epoch": 8.37,
"learning_rate": 1.3646913717334142e-06,
"loss": 0.394,
"step": 1364
},
{
"epoch": 8.37,
"learning_rate": 1.3546876007481847e-06,
"loss": 0.4102,
"step": 1365
},
{
"epoch": 8.38,
"learning_rate": 1.344717966071385e-06,
"loss": 0.3857,
"step": 1366
},
{
"epoch": 8.39,
"learning_rate": 1.3347825070684518e-06,
"loss": 0.3726,
"step": 1367
},
{
"epoch": 8.39,
"learning_rate": 1.3248812629698815e-06,
"loss": 0.4077,
"step": 1368
},
{
"epoch": 8.4,
"learning_rate": 1.3150142728710669e-06,
"loss": 0.4009,
"step": 1369
},
{
"epoch": 8.4,
"learning_rate": 1.3051815757321607e-06,
"loss": 0.3789,
"step": 1370
},
{
"epoch": 8.41,
"learning_rate": 1.295383210377895e-06,
"loss": 0.3452,
"step": 1371
},
{
"epoch": 8.42,
"learning_rate": 1.2856192154974488e-06,
"loss": 0.4043,
"step": 1372
},
{
"epoch": 8.42,
"learning_rate": 1.2758896296442834e-06,
"loss": 0.4385,
"step": 1373
},
{
"epoch": 8.43,
"learning_rate": 1.266194491235998e-06,
"loss": 0.4263,
"step": 1374
},
{
"epoch": 8.44,
"learning_rate": 1.2565338385541792e-06,
"loss": 0.416,
"step": 1375
},
{
"epoch": 8.44,
"learning_rate": 1.2469077097442372e-06,
"loss": 0.4087,
"step": 1376
},
{
"epoch": 8.45,
"learning_rate": 1.2373161428152647e-06,
"loss": 0.4033,
"step": 1377
},
{
"epoch": 8.45,
"learning_rate": 1.2277591756398933e-06,
"loss": 0.3394,
"step": 1378
},
{
"epoch": 8.46,
"learning_rate": 1.2182368459541294e-06,
"loss": 0.4214,
"step": 1379
},
{
"epoch": 8.47,
"learning_rate": 1.2087491913572103e-06,
"loss": 0.4229,
"step": 1380
},
{
"epoch": 8.47,
"learning_rate": 1.1992962493114645e-06,
"loss": 0.3779,
"step": 1381
},
{
"epoch": 8.48,
"learning_rate": 1.1898780571421554e-06,
"loss": 0.4639,
"step": 1382
},
{
"epoch": 8.48,
"learning_rate": 1.1804946520373307e-06,
"loss": 0.4116,
"step": 1383
},
{
"epoch": 8.49,
"learning_rate": 1.171146071047683e-06,
"loss": 0.3823,
"step": 1384
},
{
"epoch": 8.5,
"learning_rate": 1.161832351086396e-06,
"loss": 0.4209,
"step": 1385
},
{
"epoch": 8.5,
"learning_rate": 1.1525535289290168e-06,
"loss": 0.3936,
"step": 1386
},
{
"epoch": 8.51,
"learning_rate": 1.1433096412132838e-06,
"loss": 0.3999,
"step": 1387
},
{
"epoch": 8.52,
"learning_rate": 1.1341007244390023e-06,
"loss": 0.437,
"step": 1388
},
{
"epoch": 8.52,
"learning_rate": 1.124926814967887e-06,
"loss": 0.3521,
"step": 1389
},
{
"epoch": 8.53,
"learning_rate": 1.1157879490234346e-06,
"loss": 0.4141,
"step": 1390
},
{
"epoch": 8.53,
"learning_rate": 1.1066841626907633e-06,
"loss": 0.418,
"step": 1391
},
{
"epoch": 8.54,
"learning_rate": 1.097615491916485e-06,
"loss": 0.4189,
"step": 1392
},
{
"epoch": 8.55,
"learning_rate": 1.088581972508549e-06,
"loss": 0.4517,
"step": 1393
},
{
"epoch": 8.55,
"learning_rate": 1.0795836401361148e-06,
"loss": 0.4067,
"step": 1394
},
{
"epoch": 8.56,
"learning_rate": 1.0706205303294025e-06,
"loss": 0.375,
"step": 1395
},
{
"epoch": 8.56,
"learning_rate": 1.0616926784795511e-06,
"loss": 0.3359,
"step": 1396
},
{
"epoch": 8.57,
"learning_rate": 1.0528001198384862e-06,
"loss": 0.4092,
"step": 1397
},
{
"epoch": 8.58,
"learning_rate": 1.043942889518782e-06,
"loss": 0.3726,
"step": 1398
},
{
"epoch": 8.58,
"learning_rate": 1.035121022493506e-06,
"loss": 0.4136,
"step": 1399
},
{
"epoch": 8.59,
"learning_rate": 1.026334553596101e-06,
"loss": 0.3877,
"step": 1400
},
{
"epoch": 8.6,
"learning_rate": 1.0175835175202341e-06,
"loss": 0.4268,
"step": 1401
},
{
"epoch": 8.6,
"learning_rate": 1.0088679488196695e-06,
"loss": 0.4053,
"step": 1402
},
{
"epoch": 8.61,
"learning_rate": 1.0001878819081268e-06,
"loss": 0.3955,
"step": 1403
},
{
"epoch": 8.61,
"learning_rate": 9.91543351059141e-07,
"loss": 0.3677,
"step": 1404
},
{
"epoch": 8.62,
"learning_rate": 9.829343904059342e-07,
"loss": 0.3691,
"step": 1405
},
{
"epoch": 8.63,
"learning_rate": 9.743610339412801e-07,
"loss": 0.4097,
"step": 1406
},
{
"epoch": 8.63,
"learning_rate": 9.658233155173657e-07,
"loss": 0.4043,
"step": 1407
},
{
"epoch": 8.64,
"learning_rate": 9.573212688456635e-07,
"loss": 0.4346,
"step": 1408
},
{
"epoch": 8.64,
"learning_rate": 9.488549274967873e-07,
"loss": 0.3755,
"step": 1409
},
{
"epoch": 8.65,
"learning_rate": 9.404243249003786e-07,
"loss": 0.373,
"step": 1410
},
{
"epoch": 8.66,
"learning_rate": 9.320294943449537e-07,
"loss": 0.4517,
"step": 1411
},
{
"epoch": 8.66,
"learning_rate": 9.236704689777842e-07,
"loss": 0.4087,
"step": 1412
},
{
"epoch": 8.67,
"learning_rate": 9.153472818047627e-07,
"loss": 0.4146,
"step": 1413
},
{
"epoch": 8.67,
"learning_rate": 9.070599656902801e-07,
"loss": 0.3848,
"step": 1414
},
{
"epoch": 8.68,
"learning_rate": 8.988085533570833e-07,
"loss": 0.3652,
"step": 1415
},
{
"epoch": 8.69,
"learning_rate": 8.905930773861527e-07,
"loss": 0.3765,
"step": 1416
},
{
"epoch": 8.69,
"learning_rate": 8.824135702165693e-07,
"loss": 0.395,
"step": 1417
},
{
"epoch": 8.7,
"learning_rate": 8.74270064145396e-07,
"loss": 0.3818,
"step": 1418
},
{
"epoch": 8.71,
"learning_rate": 8.661625913275463e-07,
"loss": 0.375,
"step": 1419
},
{
"epoch": 8.71,
"learning_rate": 8.580911837756467e-07,
"loss": 0.3896,
"step": 1420
},
{
"epoch": 8.72,
"learning_rate": 8.500558733599206e-07,
"loss": 0.3535,
"step": 1421
},
{
"epoch": 8.72,
"learning_rate": 8.420566918080686e-07,
"loss": 0.4189,
"step": 1422
},
{
"epoch": 8.73,
"learning_rate": 8.340936707051273e-07,
"loss": 0.4199,
"step": 1423
},
{
"epoch": 8.74,
"learning_rate": 8.261668414933521e-07,
"loss": 0.4771,
"step": 1424
},
{
"epoch": 8.74,
"learning_rate": 8.182762354720985e-07,
"loss": 0.3779,
"step": 1425
},
{
"epoch": 8.75,
"learning_rate": 8.10421883797694e-07,
"loss": 0.3979,
"step": 1426
},
{
"epoch": 8.75,
"learning_rate": 8.026038174833085e-07,
"loss": 0.4072,
"step": 1427
},
{
"epoch": 8.76,
"learning_rate": 7.948220673988427e-07,
"loss": 0.4141,
"step": 1428
},
{
"epoch": 8.77,
"learning_rate": 7.87076664270795e-07,
"loss": 0.3457,
"step": 1429
},
{
"epoch": 8.77,
"learning_rate": 7.793676386821602e-07,
"loss": 0.395,
"step": 1430
},
{
"epoch": 8.78,
"learning_rate": 7.716950210722818e-07,
"loss": 0.4409,
"step": 1431
},
{
"epoch": 8.79,
"learning_rate": 7.6405884173675e-07,
"loss": 0.4697,
"step": 1432
},
{
"epoch": 8.79,
"learning_rate": 7.564591308272773e-07,
"loss": 0.3926,
"step": 1433
},
{
"epoch": 8.8,
"learning_rate": 7.488959183515809e-07,
"loss": 0.3809,
"step": 1434
},
{
"epoch": 8.8,
"learning_rate": 7.413692341732582e-07,
"loss": 0.3564,
"step": 1435
},
{
"epoch": 8.81,
"learning_rate": 7.338791080116792e-07,
"loss": 0.3618,
"step": 1436
},
{
"epoch": 8.82,
"learning_rate": 7.264255694418576e-07,
"loss": 0.4092,
"step": 1437
},
{
"epoch": 8.82,
"learning_rate": 7.190086478943459e-07,
"loss": 0.4375,
"step": 1438
},
{
"epoch": 8.83,
"learning_rate": 7.116283726551077e-07,
"loss": 0.3667,
"step": 1439
},
{
"epoch": 8.83,
"learning_rate": 7.042847728654078e-07,
"loss": 0.3511,
"step": 1440
},
{
"epoch": 8.84,
"learning_rate": 6.969778775217007e-07,
"loss": 0.3926,
"step": 1441
},
{
"epoch": 8.85,
"learning_rate": 6.897077154755094e-07,
"loss": 0.4565,
"step": 1442
},
{
"epoch": 8.85,
"learning_rate": 6.824743154333157e-07,
"loss": 0.3608,
"step": 1443
},
{
"epoch": 8.86,
"learning_rate": 6.752777059564431e-07,
"loss": 0.4204,
"step": 1444
},
{
"epoch": 8.87,
"learning_rate": 6.681179154609463e-07,
"loss": 0.4058,
"step": 1445
},
{
"epoch": 8.87,
"learning_rate": 6.609949722175013e-07,
"loss": 0.3936,
"step": 1446
},
{
"epoch": 8.88,
"learning_rate": 6.539089043512914e-07,
"loss": 0.4004,
"step": 1447
},
{
"epoch": 8.88,
"learning_rate": 6.468597398418952e-07,
"loss": 0.3545,
"step": 1448
},
{
"epoch": 8.89,
"learning_rate": 6.398475065231746e-07,
"loss": 0.3264,
"step": 1449
},
{
"epoch": 8.9,
"learning_rate": 6.328722320831737e-07,
"loss": 0.3521,
"step": 1450
},
{
"epoch": 8.9,
"learning_rate": 6.259339440639966e-07,
"loss": 0.3779,
"step": 1451
},
{
"epoch": 8.91,
"learning_rate": 6.1903266986171e-07,
"loss": 0.397,
"step": 1452
},
{
"epoch": 8.91,
"learning_rate": 6.121684367262271e-07,
"loss": 0.4111,
"step": 1453
},
{
"epoch": 8.92,
"learning_rate": 6.053412717612061e-07,
"loss": 0.373,
"step": 1454
},
{
"epoch": 8.93,
"learning_rate": 5.985512019239392e-07,
"loss": 0.4199,
"step": 1455
},
{
"epoch": 8.93,
"learning_rate": 5.917982540252442e-07,
"loss": 0.3833,
"step": 1456
},
{
"epoch": 8.94,
"learning_rate": 5.850824547293655e-07,
"loss": 0.3838,
"step": 1457
},
{
"epoch": 8.94,
"learning_rate": 5.784038305538653e-07,
"loss": 0.4448,
"step": 1458
},
{
"epoch": 8.95,
"learning_rate": 5.71762407869515e-07,
"loss": 0.4224,
"step": 1459
},
{
"epoch": 8.96,
"learning_rate": 5.651582129001987e-07,
"loss": 0.3784,
"step": 1460
},
{
"epoch": 8.96,
"learning_rate": 5.585912717228015e-07,
"loss": 0.3955,
"step": 1461
},
{
"epoch": 8.97,
"learning_rate": 5.520616102671128e-07,
"loss": 0.4287,
"step": 1462
},
{
"epoch": 8.98,
"learning_rate": 5.455692543157243e-07,
"loss": 0.4048,
"step": 1463
},
{
"epoch": 8.98,
"learning_rate": 5.391142295039209e-07,
"loss": 0.4062,
"step": 1464
},
{
"epoch": 8.99,
"learning_rate": 5.326965613195867e-07,
"loss": 0.4785,
"step": 1465
},
{
"epoch": 8.99,
"learning_rate": 5.263162751031025e-07,
"loss": 0.4512,
"step": 1466
},
{
"epoch": 9.0,
"learning_rate": 5.199733960472431e-07,
"loss": 0.416,
"step": 1467
},
{
"epoch": 9.01,
"learning_rate": 5.136679491970809e-07,
"loss": 0.3584,
"step": 1468
},
{
"epoch": 9.01,
"learning_rate": 5.073999594498869e-07,
"loss": 0.3274,
"step": 1469
},
{
"epoch": 9.02,
"learning_rate": 5.011694515550303e-07,
"loss": 0.3901,
"step": 1470
},
{
"epoch": 9.02,
"learning_rate": 4.949764501138832e-07,
"loss": 0.3359,
"step": 1471
},
{
"epoch": 9.03,
"learning_rate": 4.888209795797205e-07,
"loss": 0.3325,
"step": 1472
},
{
"epoch": 9.04,
"learning_rate": 4.827030642576236e-07,
"loss": 0.3188,
"step": 1473
},
{
"epoch": 9.04,
"learning_rate": 4.766227283043912e-07,
"loss": 0.3936,
"step": 1474
},
{
"epoch": 9.05,
"learning_rate": 4.7057999572843516e-07,
"loss": 0.3057,
"step": 1475
},
{
"epoch": 9.06,
"learning_rate": 4.645748903896885e-07,
"loss": 0.3564,
"step": 1476
},
{
"epoch": 9.06,
"learning_rate": 4.5860743599951186e-07,
"loss": 0.3252,
"step": 1477
},
{
"epoch": 9.07,
"learning_rate": 4.5267765612060253e-07,
"loss": 0.355,
"step": 1478
},
{
"epoch": 9.07,
"learning_rate": 4.4678557416689586e-07,
"loss": 0.332,
"step": 1479
},
{
"epoch": 9.08,
"learning_rate": 4.4093121340347824e-07,
"loss": 0.3267,
"step": 1480
},
{
"epoch": 9.09,
"learning_rate": 4.3511459694648873e-07,
"loss": 0.3574,
"step": 1481
},
{
"epoch": 9.09,
"learning_rate": 4.2933574776303664e-07,
"loss": 0.3354,
"step": 1482
},
{
"epoch": 9.1,
"learning_rate": 4.235946886711018e-07,
"loss": 0.3193,
"step": 1483
},
{
"epoch": 9.1,
"learning_rate": 4.1789144233945087e-07,
"loss": 0.3301,
"step": 1484
},
{
"epoch": 9.11,
"learning_rate": 4.122260312875437e-07,
"loss": 0.3311,
"step": 1485
},
{
"epoch": 9.12,
"learning_rate": 4.0659847788544926e-07,
"loss": 0.3257,
"step": 1486
},
{
"epoch": 9.12,
"learning_rate": 4.010088043537519e-07,
"loss": 0.3389,
"step": 1487
},
{
"epoch": 9.13,
"learning_rate": 3.954570327634677e-07,
"loss": 0.3252,
"step": 1488
},
{
"epoch": 9.13,
"learning_rate": 3.899431850359503e-07,
"loss": 0.3359,
"step": 1489
},
{
"epoch": 9.14,
"learning_rate": 3.8446728294281865e-07,
"loss": 0.3408,
"step": 1490
},
{
"epoch": 9.15,
"learning_rate": 3.7902934810585603e-07,
"loss": 0.3555,
"step": 1491
},
{
"epoch": 9.15,
"learning_rate": 3.736294019969311e-07,
"loss": 0.3066,
"step": 1492
},
{
"epoch": 9.16,
"learning_rate": 3.682674659379137e-07,
"loss": 0.3354,
"step": 1493
},
{
"epoch": 9.17,
"learning_rate": 3.629435611005916e-07,
"loss": 0.3721,
"step": 1494
},
{
"epoch": 9.17,
"learning_rate": 3.5765770850658244e-07,
"loss": 0.3271,
"step": 1495
},
{
"epoch": 9.18,
"learning_rate": 3.5240992902725204e-07,
"loss": 0.2993,
"step": 1496
},
{
"epoch": 9.18,
"learning_rate": 3.4720024338363633e-07,
"loss": 0.3398,
"step": 1497
},
{
"epoch": 9.19,
"learning_rate": 3.420286721463562e-07,
"loss": 0.3213,
"step": 1498
},
{
"epoch": 9.2,
"learning_rate": 3.3689523573553597e-07,
"loss": 0.3203,
"step": 1499
},
{
"epoch": 9.2,
"learning_rate": 3.3179995442071956e-07,
"loss": 0.3105,
"step": 1500
},
{
"epoch": 9.21,
"learning_rate": 3.2674284832080127e-07,
"loss": 0.3369,
"step": 1501
},
{
"epoch": 9.21,
"learning_rate": 3.217239374039338e-07,
"loss": 0.3384,
"step": 1502
},
{
"epoch": 9.22,
"learning_rate": 3.1674324148745827e-07,
"loss": 0.2983,
"step": 1503
},
{
"epoch": 9.23,
"learning_rate": 3.118007802378198e-07,
"loss": 0.374,
"step": 1504
},
{
"epoch": 9.23,
"learning_rate": 3.0689657317049205e-07,
"loss": 0.3257,
"step": 1505
},
{
"epoch": 9.24,
"learning_rate": 3.020306396499062e-07,
"loss": 0.3735,
"step": 1506
},
{
"epoch": 9.25,
"learning_rate": 2.972029988893621e-07,
"loss": 0.3589,
"step": 1507
},
{
"epoch": 9.25,
"learning_rate": 2.9241366995096387e-07,
"loss": 0.2961,
"step": 1508
},
{
"epoch": 9.26,
"learning_rate": 2.8766267174553884e-07,
"loss": 0.2913,
"step": 1509
},
{
"epoch": 9.26,
"learning_rate": 2.8295002303256546e-07,
"loss": 0.3169,
"step": 1510
},
{
"epoch": 9.27,
"learning_rate": 2.7827574242009434e-07,
"loss": 0.355,
"step": 1511
},
{
"epoch": 9.28,
"learning_rate": 2.736398483646807e-07,
"loss": 0.3374,
"step": 1512
},
{
"epoch": 9.28,
"learning_rate": 2.6904235917131094e-07,
"loss": 0.334,
"step": 1513
},
{
"epoch": 9.29,
"learning_rate": 2.64483292993325e-07,
"loss": 0.3369,
"step": 1514
},
{
"epoch": 9.29,
"learning_rate": 2.599626678323508e-07,
"loss": 0.3076,
"step": 1515
},
{
"epoch": 9.3,
"learning_rate": 2.554805015382289e-07,
"loss": 0.3066,
"step": 1516
},
{
"epoch": 9.31,
"learning_rate": 2.5103681180894566e-07,
"loss": 0.3735,
"step": 1517
},
{
"epoch": 9.31,
"learning_rate": 2.4663161619055797e-07,
"loss": 0.3203,
"step": 1518
},
{
"epoch": 9.32,
"learning_rate": 2.422649320771331e-07,
"loss": 0.2974,
"step": 1519
},
{
"epoch": 9.33,
"learning_rate": 2.3793677671066882e-07,
"loss": 0.2905,
"step": 1520
},
{
"epoch": 9.33,
"learning_rate": 2.3364716718103143e-07,
"loss": 0.3438,
"step": 1521
},
{
"epoch": 9.34,
"learning_rate": 2.293961204258932e-07,
"loss": 0.3091,
"step": 1522
},
{
"epoch": 9.34,
"learning_rate": 2.2518365323065284e-07,
"loss": 0.3037,
"step": 1523
},
{
"epoch": 9.35,
"learning_rate": 2.2100978222838186e-07,
"loss": 0.4043,
"step": 1524
},
{
"epoch": 9.36,
"learning_rate": 2.1687452389974829e-07,
"loss": 0.3203,
"step": 1525
},
{
"epoch": 9.36,
"learning_rate": 2.1277789457296306e-07,
"loss": 0.4023,
"step": 1526
},
{
"epoch": 9.37,
"learning_rate": 2.0871991042370255e-07,
"loss": 0.3345,
"step": 1527
},
{
"epoch": 9.37,
"learning_rate": 2.0470058747505516e-07,
"loss": 0.3618,
"step": 1528
},
{
"epoch": 9.38,
"learning_rate": 2.0071994159745367e-07,
"loss": 0.333,
"step": 1529
},
{
"epoch": 9.39,
"learning_rate": 1.9677798850861517e-07,
"loss": 0.3579,
"step": 1530
},
{
"epoch": 9.39,
"learning_rate": 1.9287474377347238e-07,
"loss": 0.3389,
"step": 1531
},
{
"epoch": 9.4,
"learning_rate": 1.8901022280411906e-07,
"loss": 0.292,
"step": 1532
},
{
"epoch": 9.4,
"learning_rate": 1.8518444085974697e-07,
"loss": 0.3896,
"step": 1533
},
{
"epoch": 9.41,
"learning_rate": 1.8139741304658566e-07,
"loss": 0.3501,
"step": 1534
},
{
"epoch": 9.42,
"learning_rate": 1.776491543178438e-07,
"loss": 0.3237,
"step": 1535
},
{
"epoch": 9.42,
"learning_rate": 1.739396794736481e-07,
"loss": 0.334,
"step": 1536
},
{
"epoch": 9.43,
"learning_rate": 1.7026900316098217e-07,
"loss": 0.332,
"step": 1537
},
{
"epoch": 9.44,
"learning_rate": 1.6663713987363882e-07,
"loss": 0.3452,
"step": 1538
},
{
"epoch": 9.44,
"learning_rate": 1.6304410395215243e-07,
"loss": 0.3301,
"step": 1539
},
{
"epoch": 9.45,
"learning_rate": 1.5948990958374543e-07,
"loss": 0.3374,
"step": 1540
},
{
"epoch": 9.45,
"learning_rate": 1.559745708022753e-07,
"loss": 0.2935,
"step": 1541
},
{
"epoch": 9.46,
"learning_rate": 1.5249810148817658e-07,
"loss": 0.3643,
"step": 1542
},
{
"epoch": 9.47,
"learning_rate": 1.490605153684066e-07,
"loss": 0.3765,
"step": 1543
},
{
"epoch": 9.47,
"learning_rate": 1.4566182601638779e-07,
"loss": 0.335,
"step": 1544
},
{
"epoch": 9.48,
"learning_rate": 1.4230204685196202e-07,
"loss": 0.3569,
"step": 1545
},
{
"epoch": 9.48,
"learning_rate": 1.3898119114133192e-07,
"loss": 0.356,
"step": 1546
},
{
"epoch": 9.49,
"learning_rate": 1.3569927199700628e-07,
"loss": 0.3247,
"step": 1547
},
{
"epoch": 9.5,
"learning_rate": 1.3245630237775585e-07,
"loss": 0.3125,
"step": 1548
},
{
"epoch": 9.5,
"learning_rate": 1.292522950885533e-07,
"loss": 0.3115,
"step": 1549
},
{
"epoch": 9.51,
"learning_rate": 1.2608726278053208e-07,
"loss": 0.3647,
"step": 1550
},
{
"epoch": 9.52,
"learning_rate": 1.2296121795092874e-07,
"loss": 0.3447,
"step": 1551
},
{
"epoch": 9.52,
"learning_rate": 1.1987417294303748e-07,
"loss": 0.3105,
"step": 1552
},
{
"epoch": 9.53,
"learning_rate": 1.1682613994615788e-07,
"loss": 0.3765,
"step": 1553
},
{
"epoch": 9.53,
"learning_rate": 1.1381713099555381e-07,
"loss": 0.3472,
"step": 1554
},
{
"epoch": 9.54,
"learning_rate": 1.1084715797239798e-07,
"loss": 0.2969,
"step": 1555
},
{
"epoch": 9.55,
"learning_rate": 1.0791623260372863e-07,
"loss": 0.3467,
"step": 1556
},
{
"epoch": 9.55,
"learning_rate": 1.0502436646240399e-07,
"loss": 0.3164,
"step": 1557
},
{
"epoch": 9.56,
"learning_rate": 1.0217157096705676e-07,
"loss": 0.3633,
"step": 1558
},
{
"epoch": 9.56,
"learning_rate": 9.935785738204417e-08,
"loss": 0.3267,
"step": 1559
},
{
"epoch": 9.57,
"learning_rate": 9.658323681741133e-08,
"loss": 0.3037,
"step": 1560
},
{
"epoch": 9.58,
"learning_rate": 9.384772022884015e-08,
"loss": 0.3833,
"step": 1561
},
{
"epoch": 9.58,
"learning_rate": 9.11513184176116e-08,
"loss": 0.3452,
"step": 1562
},
{
"epoch": 9.59,
"learning_rate": 8.8494042030558e-08,
"loss": 0.3096,
"step": 1563
},
{
"epoch": 9.6,
"learning_rate": 8.587590156002635e-08,
"loss": 0.3167,
"step": 1564
},
{
"epoch": 9.6,
"learning_rate": 8.329690734383278e-08,
"loss": 0.3413,
"step": 1565
},
{
"epoch": 9.61,
"learning_rate": 8.075706956522156e-08,
"loss": 0.3936,
"step": 1566
},
{
"epoch": 9.61,
"learning_rate": 7.825639825282949e-08,
"loss": 0.3364,
"step": 1567
},
{
"epoch": 9.62,
"learning_rate": 7.579490328064265e-08,
"loss": 0.3911,
"step": 1568
},
{
"epoch": 9.63,
"learning_rate": 7.33725943679553e-08,
"loss": 0.2969,
"step": 1569
},
{
"epoch": 9.63,
"learning_rate": 7.098948107933656e-08,
"loss": 0.3291,
"step": 1570
},
{
"epoch": 9.64,
"learning_rate": 6.864557282459162e-08,
"loss": 0.3184,
"step": 1571
},
{
"epoch": 9.64,
"learning_rate": 6.634087885871832e-08,
"loss": 0.335,
"step": 1572
},
{
"epoch": 9.65,
"learning_rate": 6.407540828188175e-08,
"loss": 0.3523,
"step": 1573
},
{
"epoch": 9.66,
"learning_rate": 6.184917003936752e-08,
"loss": 0.2961,
"step": 1574
},
{
"epoch": 9.66,
"learning_rate": 5.966217292155296e-08,
"loss": 0.3701,
"step": 1575
},
{
"epoch": 9.67,
"learning_rate": 5.7514425563870436e-08,
"loss": 0.3662,
"step": 1576
},
{
"epoch": 9.67,
"learning_rate": 5.540593644677295e-08,
"loss": 0.3115,
"step": 1577
},
{
"epoch": 9.68,
"learning_rate": 5.333671389569972e-08,
"loss": 0.3164,
"step": 1578
},
{
"epoch": 9.69,
"learning_rate": 5.1306766081048456e-08,
"loss": 0.3003,
"step": 1579
},
{
"epoch": 9.69,
"learning_rate": 4.931610101813533e-08,
"loss": 0.3164,
"step": 1580
},
{
"epoch": 9.7,
"learning_rate": 4.73647265671684e-08,
"loss": 0.3521,
"step": 1581
},
{
"epoch": 9.71,
"learning_rate": 4.545265043321645e-08,
"loss": 0.2876,
"step": 1582
},
{
"epoch": 9.71,
"learning_rate": 4.357988016617687e-08,
"loss": 0.2947,
"step": 1583
},
{
"epoch": 9.72,
"learning_rate": 4.174642316074562e-08,
"loss": 0.3423,
"step": 1584
},
{
"epoch": 9.72,
"learning_rate": 3.9952286656389506e-08,
"loss": 0.3438,
"step": 1585
},
{
"epoch": 9.73,
"learning_rate": 3.819747773731841e-08,
"loss": 0.3872,
"step": 1586
},
{
"epoch": 9.74,
"learning_rate": 3.648200333245422e-08,
"loss": 0.3247,
"step": 1587
},
{
"epoch": 9.74,
"learning_rate": 3.480587021540527e-08,
"loss": 0.3091,
"step": 1588
},
{
"epoch": 9.75,
"learning_rate": 3.316908500443972e-08,
"loss": 0.3633,
"step": 1589
},
{
"epoch": 9.75,
"learning_rate": 3.1571654162461107e-08,
"loss": 0.3281,
"step": 1590
},
{
"epoch": 9.76,
"learning_rate": 3.001358399697618e-08,
"loss": 0.3545,
"step": 1591
},
{
"epoch": 9.77,
"learning_rate": 2.8494880660080437e-08,
"loss": 0.3472,
"step": 1592
},
{
"epoch": 9.77,
"learning_rate": 2.7015550148423718e-08,
"loss": 0.3682,
"step": 1593
},
{
"epoch": 9.78,
"learning_rate": 2.557559830319245e-08,
"loss": 0.3105,
"step": 1594
},
{
"epoch": 9.79,
"learning_rate": 2.417503081008632e-08,
"loss": 0.3003,
"step": 1595
},
{
"epoch": 9.79,
"learning_rate": 2.2813853199292745e-08,
"loss": 0.3608,
"step": 1596
},
{
"epoch": 9.8,
"learning_rate": 2.1492070845468005e-08,
"loss": 0.2871,
"step": 1597
},
{
"epoch": 9.8,
"learning_rate": 2.0209688967713914e-08,
"loss": 0.3169,
"step": 1598
},
{
"epoch": 9.81,
"learning_rate": 1.896671262955896e-08,
"loss": 0.3218,
"step": 1599
},
{
"epoch": 9.82,
"learning_rate": 1.7763146738938307e-08,
"loss": 0.332,
"step": 1600
},
{
"epoch": 9.82,
"learning_rate": 1.659899604816939e-08,
"loss": 0.3013,
"step": 1601
},
{
"epoch": 9.83,
"learning_rate": 1.5474265153944124e-08,
"loss": 0.3262,
"step": 1602
},
{
"epoch": 9.83,
"learning_rate": 1.4388958497300043e-08,
"loss": 0.2925,
"step": 1603
},
{
"epoch": 9.84,
"learning_rate": 1.3343080363604766e-08,
"loss": 0.314,
"step": 1604
},
{
"epoch": 9.85,
"learning_rate": 1.2336634882544885e-08,
"loss": 0.3696,
"step": 1605
},
{
"epoch": 9.85,
"learning_rate": 1.1369626028104874e-08,
"loss": 0.3647,
"step": 1606
},
{
"epoch": 9.86,
"learning_rate": 1.0442057618551549e-08,
"loss": 0.3306,
"step": 1607
},
{
"epoch": 9.87,
"learning_rate": 9.553933316420739e-09,
"loss": 0.3916,
"step": 1608
},
{
"epoch": 9.87,
"learning_rate": 8.705256628499525e-09,
"loss": 0.3525,
"step": 1609
},
{
"epoch": 9.88,
"learning_rate": 7.896030905818474e-09,
"loss": 0.3662,
"step": 1610
},
{
"epoch": 9.88,
"learning_rate": 7.126259343631648e-09,
"loss": 0.3042,
"step": 1611
},
{
"epoch": 9.89,
"learning_rate": 6.39594498140883e-09,
"loss": 0.3257,
"step": 1612
},
{
"epoch": 9.9,
"learning_rate": 5.705090702819993e-09,
"loss": 0.3237,
"step": 1613
},
{
"epoch": 9.9,
"learning_rate": 5.053699235726406e-09,
"loss": 0.29,
"step": 1614
},
{
"epoch": 9.91,
"learning_rate": 4.4417731521717576e-09,
"loss": 0.3081,
"step": 1615
},
{
"epoch": 9.91,
"learning_rate": 3.869314868363283e-09,
"loss": 0.2944,
"step": 1616
},
{
"epoch": 9.92,
"learning_rate": 3.3363266446750918e-09,
"loss": 0.2676,
"step": 1617
},
{
"epoch": 9.93,
"learning_rate": 2.842810585627076e-09,
"loss": 0.3086,
"step": 1618
},
{
"epoch": 9.93,
"learning_rate": 2.388768639886019e-09,
"loss": 0.3047,
"step": 1619
},
{
"epoch": 9.94,
"learning_rate": 1.9742026002500526e-09,
"loss": 0.3242,
"step": 1620
},
{
"epoch": 9.94,
"learning_rate": 1.5991141036475478e-09,
"loss": 0.3086,
"step": 1621
},
{
"epoch": 9.95,
"learning_rate": 1.263504631129342e-09,
"loss": 0.3174,
"step": 1622
},
{
"epoch": 9.96,
"learning_rate": 9.673755078598578e-10,
"loss": 0.354,
"step": 1623
},
{
"epoch": 9.96,
"learning_rate": 7.107279031148828e-10,
"loss": 0.3208,
"step": 1624
},
{
"epoch": 9.97,
"learning_rate": 4.935628302760175e-10,
"loss": 0.3721,
"step": 1625
},
{
"epoch": 9.98,
"learning_rate": 3.158811468273459e-10,
"loss": 0.3354,
"step": 1626
},
{
"epoch": 9.98,
"learning_rate": 1.776835543509936e-10,
"loss": 0.3215,
"step": 1627
},
{
"epoch": 9.99,
"learning_rate": 7.897059852490785e-11,
"loss": 0.2866,
"step": 1628
},
{
"epoch": 9.99,
"learning_rate": 1.9742669119526824e-11,
"loss": 0.3057,
"step": 1629
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.3101,
"step": 1630
},
{
"epoch": 10.0,
"step": 1630,
"total_flos": 41842376695808.0,
"train_loss": 1.8768900678201688,
"train_runtime": 1493.3467,
"train_samples_per_second": 69.736,
"train_steps_per_second": 1.092
}
],
"logging_steps": 1.0,
"max_steps": 1630,
"num_train_epochs": 10,
"save_steps": 10000,
"total_flos": 41842376695808.0,
"trial_name": null,
"trial_params": null
}