diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8419 +1,3 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.986666666666667, - "eval_steps": 500000, - "global_step": 140000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 1.99862042482382e-05, - "loss": 5.2806, - "step": 100 - }, - { - "epoch": 0.0, - "learning_rate": 1.9971981823741495e-05, - "loss": 3.6273, - "step": 200 - }, - { - "epoch": 0.01, - "learning_rate": 1.995775939924479e-05, - "loss": 3.2687, - "step": 300 - }, - { - "epoch": 0.01, - "learning_rate": 1.994353697474809e-05, - "loss": 2.9993, - "step": 400 - }, - { - "epoch": 0.01, - "learning_rate": 1.9929314550251384e-05, - "loss": 2.8417, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.991509212575468e-05, - "loss": 2.6742, - "step": 600 - }, - { - "epoch": 0.01, - "learning_rate": 1.9900869701257974e-05, - "loss": 2.4993, - "step": 700 - }, - { - "epoch": 0.02, - "learning_rate": 1.9886647276761272e-05, - "loss": 2.4251, - "step": 800 - }, - { - "epoch": 0.02, - "learning_rate": 1.9872424852264567e-05, - "loss": 2.3429, - "step": 900 - }, - { - "epoch": 0.02, - "learning_rate": 1.9858202427767862e-05, - "loss": 2.2494, - "step": 1000 - }, - { - "epoch": 0.02, - "learning_rate": 1.984398000327116e-05, - "loss": 2.1587, - "step": 1100 - }, - { - "epoch": 0.03, - "learning_rate": 1.9829757578774455e-05, - "loss": 2.0811, - "step": 1200 - }, - { - "epoch": 0.03, - "learning_rate": 1.981553515427775e-05, - "loss": 2.0518, - "step": 1300 - }, - { - "epoch": 0.03, - "learning_rate": 1.980131272978105e-05, - "loss": 1.9941, - "step": 1400 - }, - { - "epoch": 0.03, - "learning_rate": 1.9787090305284344e-05, - "loss": 1.9329, - "step": 1500 - }, - { - "epoch": 0.03, - "learning_rate": 1.977286788078764e-05, - "loss": 1.9052, - "step": 1600 - }, - { - "epoch": 0.04, - "learning_rate": 1.9758645456290934e-05, - "loss": 1.889, - "step": 1700 - }, - { - "epoch": 0.04, - "learning_rate": 1.9744423031794232e-05, - "loss": 1.8027, - "step": 1800 - }, - { - "epoch": 0.04, - "learning_rate": 1.9730200607297527e-05, - "loss": 1.7749, - "step": 1900 - }, - { - "epoch": 0.04, - "learning_rate": 1.9715978182800822e-05, - "loss": 1.7478, - "step": 2000 - }, - { - "epoch": 0.04, - "learning_rate": 1.970175575830412e-05, - "loss": 1.6756, - "step": 2100 - }, - { - "epoch": 0.05, - "learning_rate": 1.9687533333807415e-05, - "loss": 1.6775, - "step": 2200 - }, - { - "epoch": 0.05, - "learning_rate": 1.9673310909310714e-05, - "loss": 1.6507, - "step": 2300 - }, - { - "epoch": 0.05, - "learning_rate": 1.965908848481401e-05, - "loss": 1.645, - "step": 2400 - }, - { - "epoch": 0.05, - "learning_rate": 1.9644866060317303e-05, - "loss": 1.616, - "step": 2500 - }, - { - "epoch": 0.06, - "learning_rate": 1.9630643635820602e-05, - "loss": 1.5684, - "step": 2600 - }, - { - "epoch": 0.06, - "learning_rate": 1.9616421211323893e-05, - "loss": 1.5602, - "step": 2700 - }, - { - "epoch": 0.06, - "learning_rate": 1.9602198786827192e-05, - "loss": 1.5526, - "step": 2800 - }, - { - "epoch": 0.06, - "learning_rate": 1.9587976362330487e-05, - "loss": 1.5324, - "step": 2900 - }, - { - "epoch": 0.06, - "learning_rate": 1.9573753937833785e-05, - "loss": 1.5142, - "step": 3000 - }, - { - "epoch": 0.07, - "learning_rate": 1.955953151333708e-05, - "loss": 1.4919, - "step": 3100 - }, - { - "epoch": 0.07, - "learning_rate": 1.9545309088840375e-05, - "loss": 1.5006, - "step": 3200 - }, - { - "epoch": 0.07, - "learning_rate": 1.9531086664343673e-05, - "loss": 1.4516, - "step": 3300 - }, - { - "epoch": 0.07, - "learning_rate": 1.951686423984697e-05, - "loss": 1.4454, - "step": 3400 - }, - { - "epoch": 0.07, - "learning_rate": 1.9502641815350263e-05, - "loss": 1.4276, - "step": 3500 - }, - { - "epoch": 0.08, - "learning_rate": 1.9488419390853562e-05, - "loss": 1.4065, - "step": 3600 - }, - { - "epoch": 0.08, - "learning_rate": 1.9474196966356857e-05, - "loss": 1.3845, - "step": 3700 - }, - { - "epoch": 0.08, - "learning_rate": 1.945997454186015e-05, - "loss": 1.3871, - "step": 3800 - }, - { - "epoch": 0.08, - "learning_rate": 1.9445752117363447e-05, - "loss": 1.3778, - "step": 3900 - }, - { - "epoch": 0.09, - "learning_rate": 1.9431529692866745e-05, - "loss": 1.3523, - "step": 4000 - }, - { - "epoch": 0.09, - "learning_rate": 1.941730726837004e-05, - "loss": 1.3421, - "step": 4100 - }, - { - "epoch": 0.09, - "learning_rate": 1.9403084843873335e-05, - "loss": 1.3313, - "step": 4200 - }, - { - "epoch": 0.09, - "learning_rate": 1.9388862419376633e-05, - "loss": 1.3358, - "step": 4300 - }, - { - "epoch": 0.09, - "learning_rate": 1.937463999487993e-05, - "loss": 1.3021, - "step": 4400 - }, - { - "epoch": 0.1, - "learning_rate": 1.9360417570383227e-05, - "loss": 1.3315, - "step": 4500 - }, - { - "epoch": 0.1, - "learning_rate": 1.934619514588652e-05, - "loss": 1.3232, - "step": 4600 - }, - { - "epoch": 0.1, - "learning_rate": 1.9331972721389817e-05, - "loss": 1.2925, - "step": 4700 - }, - { - "epoch": 0.1, - "learning_rate": 1.931775029689311e-05, - "loss": 1.2768, - "step": 4800 - }, - { - "epoch": 0.1, - "learning_rate": 1.9303527872396407e-05, - "loss": 1.259, - "step": 4900 - }, - { - "epoch": 0.11, - "learning_rate": 1.9289305447899705e-05, - "loss": 1.2595, - "step": 5000 - }, - { - "epoch": 0.11, - "learning_rate": 1.9275083023403e-05, - "loss": 1.2254, - "step": 5100 - }, - { - "epoch": 0.11, - "learning_rate": 1.9260860598906298e-05, - "loss": 1.2301, - "step": 5200 - }, - { - "epoch": 0.11, - "learning_rate": 1.9246638174409593e-05, - "loss": 1.2349, - "step": 5300 - }, - { - "epoch": 0.12, - "learning_rate": 1.9232415749912888e-05, - "loss": 1.2096, - "step": 5400 - }, - { - "epoch": 0.12, - "learning_rate": 1.9218193325416187e-05, - "loss": 1.2194, - "step": 5500 - }, - { - "epoch": 0.12, - "learning_rate": 1.920397090091948e-05, - "loss": 1.2226, - "step": 5600 - }, - { - "epoch": 0.12, - "learning_rate": 1.9189748476422777e-05, - "loss": 1.1933, - "step": 5700 - }, - { - "epoch": 0.12, - "learning_rate": 1.9175526051926075e-05, - "loss": 1.2068, - "step": 5800 - }, - { - "epoch": 0.13, - "learning_rate": 1.916130362742937e-05, - "loss": 1.21, - "step": 5900 - }, - { - "epoch": 0.13, - "learning_rate": 1.9147081202932665e-05, - "loss": 1.1852, - "step": 6000 - }, - { - "epoch": 0.13, - "learning_rate": 1.913285877843596e-05, - "loss": 1.1707, - "step": 6100 - }, - { - "epoch": 0.13, - "learning_rate": 1.9118636353939258e-05, - "loss": 1.1565, - "step": 6200 - }, - { - "epoch": 0.13, - "learning_rate": 1.9104413929442553e-05, - "loss": 1.1662, - "step": 6300 - }, - { - "epoch": 0.14, - "learning_rate": 1.9090191504945848e-05, - "loss": 1.1382, - "step": 6400 - }, - { - "epoch": 0.14, - "learning_rate": 1.9075969080449146e-05, - "loss": 1.1716, - "step": 6500 - }, - { - "epoch": 0.14, - "learning_rate": 1.906174665595244e-05, - "loss": 1.1565, - "step": 6600 - }, - { - "epoch": 0.14, - "learning_rate": 1.904752423145574e-05, - "loss": 1.1151, - "step": 6700 - }, - { - "epoch": 0.15, - "learning_rate": 1.9033301806959035e-05, - "loss": 1.1403, - "step": 6800 - }, - { - "epoch": 0.15, - "learning_rate": 1.901907938246233e-05, - "loss": 1.1078, - "step": 6900 - }, - { - "epoch": 0.15, - "learning_rate": 1.9004856957965625e-05, - "loss": 1.1204, - "step": 7000 - }, - { - "epoch": 0.15, - "learning_rate": 1.899063453346892e-05, - "loss": 1.0886, - "step": 7100 - }, - { - "epoch": 0.15, - "learning_rate": 1.8976412108972218e-05, - "loss": 1.1277, - "step": 7200 - }, - { - "epoch": 0.16, - "learning_rate": 1.8962189684475513e-05, - "loss": 1.1038, - "step": 7300 - }, - { - "epoch": 0.16, - "learning_rate": 1.894796725997881e-05, - "loss": 1.0894, - "step": 7400 - }, - { - "epoch": 0.16, - "learning_rate": 1.8933744835482106e-05, - "loss": 1.0767, - "step": 7500 - }, - { - "epoch": 0.16, - "learning_rate": 1.89195224109854e-05, - "loss": 1.0702, - "step": 7600 - }, - { - "epoch": 0.16, - "learning_rate": 1.89052999864887e-05, - "loss": 1.0973, - "step": 7700 - }, - { - "epoch": 0.17, - "learning_rate": 1.8891077561991995e-05, - "loss": 1.08, - "step": 7800 - }, - { - "epoch": 0.17, - "learning_rate": 1.887685513749529e-05, - "loss": 1.0724, - "step": 7900 - }, - { - "epoch": 0.17, - "learning_rate": 1.8862632712998585e-05, - "loss": 1.0463, - "step": 8000 - }, - { - "epoch": 0.17, - "learning_rate": 1.8848410288501883e-05, - "loss": 1.0401, - "step": 8100 - }, - { - "epoch": 0.17, - "learning_rate": 1.8834187864005178e-05, - "loss": 1.0301, - "step": 8200 - }, - { - "epoch": 0.18, - "learning_rate": 1.8819965439508473e-05, - "loss": 1.0488, - "step": 8300 - }, - { - "epoch": 0.18, - "learning_rate": 1.880574301501177e-05, - "loss": 1.0408, - "step": 8400 - }, - { - "epoch": 0.18, - "learning_rate": 1.8791520590515066e-05, - "loss": 1.038, - "step": 8500 - }, - { - "epoch": 0.18, - "learning_rate": 1.877729816601836e-05, - "loss": 1.0288, - "step": 8600 - }, - { - "epoch": 0.19, - "learning_rate": 1.876307574152166e-05, - "loss": 1.036, - "step": 8700 - }, - { - "epoch": 0.19, - "learning_rate": 1.8748853317024955e-05, - "loss": 1.0297, - "step": 8800 - }, - { - "epoch": 0.19, - "learning_rate": 1.8734630892528253e-05, - "loss": 1.015, - "step": 8900 - }, - { - "epoch": 0.19, - "learning_rate": 1.8720408468031548e-05, - "loss": 1.0222, - "step": 9000 - }, - { - "epoch": 0.19, - "learning_rate": 1.8706186043534843e-05, - "loss": 1.0312, - "step": 9100 - }, - { - "epoch": 0.2, - "learning_rate": 1.8691963619038138e-05, - "loss": 0.9983, - "step": 9200 - }, - { - "epoch": 0.2, - "learning_rate": 1.8677741194541433e-05, - "loss": 1.0013, - "step": 9300 - }, - { - "epoch": 0.2, - "learning_rate": 1.866351877004473e-05, - "loss": 1.0147, - "step": 9400 - }, - { - "epoch": 0.2, - "learning_rate": 1.8649296345548026e-05, - "loss": 0.9913, - "step": 9500 - }, - { - "epoch": 0.2, - "learning_rate": 1.8635073921051325e-05, - "loss": 0.9795, - "step": 9600 - }, - { - "epoch": 0.21, - "learning_rate": 1.862085149655462e-05, - "loss": 0.9799, - "step": 9700 - }, - { - "epoch": 0.21, - "learning_rate": 1.8606629072057914e-05, - "loss": 0.9934, - "step": 9800 - }, - { - "epoch": 0.21, - "learning_rate": 1.8592406647561213e-05, - "loss": 0.9725, - "step": 9900 - }, - { - "epoch": 0.21, - "learning_rate": 1.8578184223064508e-05, - "loss": 0.9886, - "step": 10000 - }, - { - "epoch": 0.22, - "learning_rate": 1.8563961798567803e-05, - "loss": 0.9837, - "step": 10100 - }, - { - "epoch": 0.22, - "learning_rate": 1.8549739374071098e-05, - "loss": 0.9567, - "step": 10200 - }, - { - "epoch": 0.22, - "learning_rate": 1.8535516949574396e-05, - "loss": 0.9544, - "step": 10300 - }, - { - "epoch": 0.22, - "learning_rate": 1.852129452507769e-05, - "loss": 0.9693, - "step": 10400 - }, - { - "epoch": 0.22, - "learning_rate": 1.8507072100580986e-05, - "loss": 0.9519, - "step": 10500 - }, - { - "epoch": 0.23, - "learning_rate": 1.8492849676084284e-05, - "loss": 0.9584, - "step": 10600 - }, - { - "epoch": 0.23, - "learning_rate": 1.847862725158758e-05, - "loss": 0.9361, - "step": 10700 - }, - { - "epoch": 0.23, - "learning_rate": 1.8464404827090874e-05, - "loss": 0.9314, - "step": 10800 - }, - { - "epoch": 0.23, - "learning_rate": 1.8450182402594173e-05, - "loss": 0.9184, - "step": 10900 - }, - { - "epoch": 0.23, - "learning_rate": 1.8435959978097468e-05, - "loss": 0.9332, - "step": 11000 - }, - { - "epoch": 0.24, - "learning_rate": 1.8421737553600766e-05, - "loss": 0.9261, - "step": 11100 - }, - { - "epoch": 0.24, - "learning_rate": 1.8407515129104058e-05, - "loss": 0.9049, - "step": 11200 - }, - { - "epoch": 0.24, - "learning_rate": 1.8393292704607356e-05, - "loss": 0.9413, - "step": 11300 - }, - { - "epoch": 0.24, - "learning_rate": 1.837907028011065e-05, - "loss": 0.9411, - "step": 11400 - }, - { - "epoch": 0.25, - "learning_rate": 1.8364847855613946e-05, - "loss": 0.9145, - "step": 11500 - }, - { - "epoch": 0.25, - "learning_rate": 1.8350625431117244e-05, - "loss": 0.9131, - "step": 11600 - }, - { - "epoch": 0.25, - "learning_rate": 1.833640300662054e-05, - "loss": 0.9214, - "step": 11700 - }, - { - "epoch": 0.25, - "learning_rate": 1.8322180582123838e-05, - "loss": 0.9121, - "step": 11800 - }, - { - "epoch": 0.25, - "learning_rate": 1.8307958157627133e-05, - "loss": 0.9119, - "step": 11900 - }, - { - "epoch": 0.26, - "learning_rate": 1.8293735733130428e-05, - "loss": 0.8982, - "step": 12000 - }, - { - "epoch": 0.26, - "learning_rate": 1.8279513308633726e-05, - "loss": 0.8759, - "step": 12100 - }, - { - "epoch": 0.26, - "learning_rate": 1.826529088413702e-05, - "loss": 0.8995, - "step": 12200 - }, - { - "epoch": 0.26, - "learning_rate": 1.8251068459640316e-05, - "loss": 0.909, - "step": 12300 - }, - { - "epoch": 0.26, - "learning_rate": 1.823684603514361e-05, - "loss": 0.8936, - "step": 12400 - }, - { - "epoch": 0.27, - "learning_rate": 1.822262361064691e-05, - "loss": 0.8864, - "step": 12500 - }, - { - "epoch": 0.27, - "learning_rate": 1.8208401186150204e-05, - "loss": 0.8911, - "step": 12600 - }, - { - "epoch": 0.27, - "learning_rate": 1.81941787616535e-05, - "loss": 0.8811, - "step": 12700 - }, - { - "epoch": 0.27, - "learning_rate": 1.8179956337156798e-05, - "loss": 0.8801, - "step": 12800 - }, - { - "epoch": 0.28, - "learning_rate": 1.8165733912660093e-05, - "loss": 0.8815, - "step": 12900 - }, - { - "epoch": 0.28, - "learning_rate": 1.8151511488163388e-05, - "loss": 0.8742, - "step": 13000 - }, - { - "epoch": 0.28, - "learning_rate": 1.8137289063666686e-05, - "loss": 0.8564, - "step": 13100 - }, - { - "epoch": 0.28, - "learning_rate": 1.812306663916998e-05, - "loss": 0.8465, - "step": 13200 - }, - { - "epoch": 0.28, - "learning_rate": 1.810884421467328e-05, - "loss": 0.8335, - "step": 13300 - }, - { - "epoch": 0.29, - "learning_rate": 1.809462179017657e-05, - "loss": 0.8573, - "step": 13400 - }, - { - "epoch": 0.29, - "learning_rate": 1.808039936567987e-05, - "loss": 0.8476, - "step": 13500 - }, - { - "epoch": 0.29, - "learning_rate": 1.8066176941183164e-05, - "loss": 0.844, - "step": 13600 - }, - { - "epoch": 0.29, - "learning_rate": 1.805195451668646e-05, - "loss": 0.8424, - "step": 13700 - }, - { - "epoch": 0.29, - "learning_rate": 1.8037732092189757e-05, - "loss": 0.8402, - "step": 13800 - }, - { - "epoch": 0.3, - "learning_rate": 1.8023509667693052e-05, - "loss": 0.8349, - "step": 13900 - }, - { - "epoch": 0.3, - "learning_rate": 1.800928724319635e-05, - "loss": 0.843, - "step": 14000 - }, - { - "epoch": 0.3, - "learning_rate": 1.7995064818699646e-05, - "loss": 0.8307, - "step": 14100 - }, - { - "epoch": 0.3, - "learning_rate": 1.798084239420294e-05, - "loss": 0.8324, - "step": 14200 - }, - { - "epoch": 0.31, - "learning_rate": 1.796661996970624e-05, - "loss": 0.8173, - "step": 14300 - }, - { - "epoch": 0.31, - "learning_rate": 1.795239754520953e-05, - "loss": 0.8246, - "step": 14400 - }, - { - "epoch": 0.31, - "learning_rate": 1.793817512071283e-05, - "loss": 0.8159, - "step": 14500 - }, - { - "epoch": 0.31, - "learning_rate": 1.7923952696216124e-05, - "loss": 0.829, - "step": 14600 - }, - { - "epoch": 0.31, - "learning_rate": 1.7909730271719422e-05, - "loss": 0.8368, - "step": 14700 - }, - { - "epoch": 0.32, - "learning_rate": 1.7895507847222717e-05, - "loss": 0.8027, - "step": 14800 - }, - { - "epoch": 0.32, - "learning_rate": 1.7881285422726012e-05, - "loss": 0.8249, - "step": 14900 - }, - { - "epoch": 0.32, - "learning_rate": 1.786706299822931e-05, - "loss": 0.8209, - "step": 15000 - }, - { - "epoch": 0.32, - "learning_rate": 1.7852840573732606e-05, - "loss": 0.8169, - "step": 15100 - }, - { - "epoch": 0.32, - "learning_rate": 1.78386181492359e-05, - "loss": 0.8082, - "step": 15200 - }, - { - "epoch": 0.33, - "learning_rate": 1.78243957247392e-05, - "loss": 0.8075, - "step": 15300 - }, - { - "epoch": 0.33, - "learning_rate": 1.7810173300242494e-05, - "loss": 0.8179, - "step": 15400 - }, - { - "epoch": 0.33, - "learning_rate": 1.779595087574579e-05, - "loss": 0.7908, - "step": 15500 - }, - { - "epoch": 0.33, - "learning_rate": 1.7781728451249084e-05, - "loss": 0.8037, - "step": 15600 - }, - { - "epoch": 0.33, - "learning_rate": 1.7767506026752382e-05, - "loss": 0.807, - "step": 15700 - }, - { - "epoch": 0.34, - "learning_rate": 1.7753283602255677e-05, - "loss": 0.8002, - "step": 15800 - }, - { - "epoch": 0.34, - "learning_rate": 1.7739061177758972e-05, - "loss": 0.795, - "step": 15900 - }, - { - "epoch": 0.34, - "learning_rate": 1.772483875326227e-05, - "loss": 0.7717, - "step": 16000 - }, - { - "epoch": 0.34, - "learning_rate": 1.7710616328765566e-05, - "loss": 0.781, - "step": 16100 - }, - { - "epoch": 0.35, - "learning_rate": 1.7696393904268864e-05, - "loss": 0.7809, - "step": 16200 - }, - { - "epoch": 0.35, - "learning_rate": 1.768217147977216e-05, - "loss": 0.7809, - "step": 16300 - }, - { - "epoch": 0.35, - "learning_rate": 1.7667949055275454e-05, - "loss": 0.7729, - "step": 16400 - }, - { - "epoch": 0.35, - "learning_rate": 1.7653726630778752e-05, - "loss": 0.7764, - "step": 16500 - }, - { - "epoch": 0.35, - "learning_rate": 1.7639504206282044e-05, - "loss": 0.7556, - "step": 16600 - }, - { - "epoch": 0.36, - "learning_rate": 1.7625281781785342e-05, - "loss": 0.7653, - "step": 16700 - }, - { - "epoch": 0.36, - "learning_rate": 1.7611059357288637e-05, - "loss": 0.7518, - "step": 16800 - }, - { - "epoch": 0.36, - "learning_rate": 1.7596836932791936e-05, - "loss": 0.7633, - "step": 16900 - }, - { - "epoch": 0.36, - "learning_rate": 1.758261450829523e-05, - "loss": 0.7681, - "step": 17000 - }, - { - "epoch": 0.36, - "learning_rate": 1.7568392083798525e-05, - "loss": 0.7523, - "step": 17100 - }, - { - "epoch": 0.37, - "learning_rate": 1.7554169659301824e-05, - "loss": 0.7655, - "step": 17200 - }, - { - "epoch": 0.37, - "learning_rate": 1.753994723480512e-05, - "loss": 0.7336, - "step": 17300 - }, - { - "epoch": 0.37, - "learning_rate": 1.7525724810308414e-05, - "loss": 0.7735, - "step": 17400 - }, - { - "epoch": 0.37, - "learning_rate": 1.7511502385811712e-05, - "loss": 0.7584, - "step": 17500 - }, - { - "epoch": 0.38, - "learning_rate": 1.7497279961315007e-05, - "loss": 0.7471, - "step": 17600 - }, - { - "epoch": 0.38, - "learning_rate": 1.7483057536818302e-05, - "loss": 0.7508, - "step": 17700 - }, - { - "epoch": 0.38, - "learning_rate": 1.7468835112321597e-05, - "loss": 0.7414, - "step": 17800 - }, - { - "epoch": 0.38, - "learning_rate": 1.7454612687824895e-05, - "loss": 0.7316, - "step": 17900 - }, - { - "epoch": 0.38, - "learning_rate": 1.744039026332819e-05, - "loss": 0.727, - "step": 18000 - }, - { - "epoch": 0.39, - "learning_rate": 1.7426167838831485e-05, - "loss": 0.7498, - "step": 18100 - }, - { - "epoch": 0.39, - "learning_rate": 1.7411945414334784e-05, - "loss": 0.7127, - "step": 18200 - }, - { - "epoch": 0.39, - "learning_rate": 1.739772298983808e-05, - "loss": 0.718, - "step": 18300 - }, - { - "epoch": 0.39, - "learning_rate": 1.7383500565341377e-05, - "loss": 0.714, - "step": 18400 - }, - { - "epoch": 0.39, - "learning_rate": 1.7369278140844672e-05, - "loss": 0.7108, - "step": 18500 - }, - { - "epoch": 0.4, - "learning_rate": 1.7355055716347967e-05, - "loss": 0.7233, - "step": 18600 - }, - { - "epoch": 0.4, - "learning_rate": 1.7340833291851262e-05, - "loss": 0.7187, - "step": 18700 - }, - { - "epoch": 0.4, - "learning_rate": 1.7326610867354557e-05, - "loss": 0.7248, - "step": 18800 - }, - { - "epoch": 0.4, - "learning_rate": 1.7312388442857855e-05, - "loss": 0.7299, - "step": 18900 - }, - { - "epoch": 0.41, - "learning_rate": 1.729816601836115e-05, - "loss": 0.709, - "step": 19000 - }, - { - "epoch": 0.41, - "learning_rate": 1.728394359386445e-05, - "loss": 0.7083, - "step": 19100 - }, - { - "epoch": 0.41, - "learning_rate": 1.7269721169367744e-05, - "loss": 0.7039, - "step": 19200 - }, - { - "epoch": 0.41, - "learning_rate": 1.725549874487104e-05, - "loss": 0.7068, - "step": 19300 - }, - { - "epoch": 0.41, - "learning_rate": 1.7241276320374337e-05, - "loss": 0.7143, - "step": 19400 - }, - { - "epoch": 0.42, - "learning_rate": 1.7227053895877632e-05, - "loss": 0.6971, - "step": 19500 - }, - { - "epoch": 0.42, - "learning_rate": 1.7212831471380927e-05, - "loss": 0.6998, - "step": 19600 - }, - { - "epoch": 0.42, - "learning_rate": 1.7198609046884225e-05, - "loss": 0.6995, - "step": 19700 - }, - { - "epoch": 0.42, - "learning_rate": 1.718438662238752e-05, - "loss": 0.6885, - "step": 19800 - }, - { - "epoch": 0.42, - "learning_rate": 1.7170164197890815e-05, - "loss": 0.7026, - "step": 19900 - }, - { - "epoch": 0.43, - "learning_rate": 1.715594177339411e-05, - "loss": 0.7008, - "step": 20000 - }, - { - "epoch": 0.43, - "learning_rate": 1.714171934889741e-05, - "loss": 0.7041, - "step": 20100 - }, - { - "epoch": 0.43, - "learning_rate": 1.7127496924400704e-05, - "loss": 0.6964, - "step": 20200 - }, - { - "epoch": 0.43, - "learning_rate": 1.7113274499904e-05, - "loss": 0.6913, - "step": 20300 - }, - { - "epoch": 0.44, - "learning_rate": 1.7099052075407297e-05, - "loss": 0.6961, - "step": 20400 - }, - { - "epoch": 0.44, - "learning_rate": 1.7084829650910592e-05, - "loss": 0.6695, - "step": 20500 - }, - { - "epoch": 0.44, - "learning_rate": 1.707060722641389e-05, - "loss": 0.663, - "step": 20600 - }, - { - "epoch": 0.44, - "learning_rate": 1.7056384801917185e-05, - "loss": 0.6762, - "step": 20700 - }, - { - "epoch": 0.44, - "learning_rate": 1.704216237742048e-05, - "loss": 0.6636, - "step": 20800 - }, - { - "epoch": 0.45, - "learning_rate": 1.7027939952923775e-05, - "loss": 0.6751, - "step": 20900 - }, - { - "epoch": 0.45, - "learning_rate": 1.701371752842707e-05, - "loss": 0.6798, - "step": 21000 - }, - { - "epoch": 0.45, - "learning_rate": 1.699949510393037e-05, - "loss": 0.6768, - "step": 21100 - }, - { - "epoch": 0.45, - "learning_rate": 1.6985272679433663e-05, - "loss": 0.6471, - "step": 21200 - }, - { - "epoch": 0.45, - "learning_rate": 1.6971050254936962e-05, - "loss": 0.6717, - "step": 21300 - }, - { - "epoch": 0.46, - "learning_rate": 1.6956827830440257e-05, - "loss": 0.6662, - "step": 21400 - }, - { - "epoch": 0.46, - "learning_rate": 1.6942605405943552e-05, - "loss": 0.6686, - "step": 21500 - }, - { - "epoch": 0.46, - "learning_rate": 1.692838298144685e-05, - "loss": 0.668, - "step": 21600 - }, - { - "epoch": 0.46, - "learning_rate": 1.6914160556950145e-05, - "loss": 0.6459, - "step": 21700 - }, - { - "epoch": 0.47, - "learning_rate": 1.689993813245344e-05, - "loss": 0.6676, - "step": 21800 - }, - { - "epoch": 0.47, - "learning_rate": 1.6885715707956735e-05, - "loss": 0.6642, - "step": 21900 - }, - { - "epoch": 0.47, - "learning_rate": 1.6871493283460033e-05, - "loss": 0.6521, - "step": 22000 - }, - { - "epoch": 0.47, - "learning_rate": 1.685727085896333e-05, - "loss": 0.6693, - "step": 22100 - }, - { - "epoch": 0.47, - "learning_rate": 1.6843048434466623e-05, - "loss": 0.6618, - "step": 22200 - }, - { - "epoch": 0.48, - "learning_rate": 1.682882600996992e-05, - "loss": 0.6486, - "step": 22300 - }, - { - "epoch": 0.48, - "learning_rate": 1.6814603585473217e-05, - "loss": 0.6517, - "step": 22400 - }, - { - "epoch": 0.48, - "learning_rate": 1.680038116097651e-05, - "loss": 0.656, - "step": 22500 - }, - { - "epoch": 0.48, - "learning_rate": 1.678615873647981e-05, - "loss": 0.6414, - "step": 22600 - }, - { - "epoch": 0.48, - "learning_rate": 1.6771936311983105e-05, - "loss": 0.6406, - "step": 22700 - }, - { - "epoch": 0.49, - "learning_rate": 1.6757713887486403e-05, - "loss": 0.6383, - "step": 22800 - }, - { - "epoch": 0.49, - "learning_rate": 1.6743491462989698e-05, - "loss": 0.6387, - "step": 22900 - }, - { - "epoch": 0.49, - "learning_rate": 1.6729269038492993e-05, - "loss": 0.6173, - "step": 23000 - }, - { - "epoch": 0.49, - "learning_rate": 1.6715046613996288e-05, - "loss": 0.6297, - "step": 23100 - }, - { - "epoch": 0.49, - "learning_rate": 1.6700824189499583e-05, - "loss": 0.6182, - "step": 23200 - }, - { - "epoch": 0.5, - "learning_rate": 1.668660176500288e-05, - "loss": 0.617, - "step": 23300 - }, - { - "epoch": 0.5, - "learning_rate": 1.6672379340506177e-05, - "loss": 0.6307, - "step": 23400 - }, - { - "epoch": 0.5, - "learning_rate": 1.6658156916009475e-05, - "loss": 0.6184, - "step": 23500 - }, - { - "epoch": 0.5, - "learning_rate": 1.664393449151277e-05, - "loss": 0.6373, - "step": 23600 - }, - { - "epoch": 0.51, - "learning_rate": 1.6629712067016065e-05, - "loss": 0.6167, - "step": 23700 - }, - { - "epoch": 0.51, - "learning_rate": 1.6615489642519363e-05, - "loss": 0.6177, - "step": 23800 - }, - { - "epoch": 0.51, - "learning_rate": 1.6601267218022658e-05, - "loss": 0.6016, - "step": 23900 - }, - { - "epoch": 0.51, - "learning_rate": 1.6587044793525953e-05, - "loss": 0.6185, - "step": 24000 - }, - { - "epoch": 0.51, - "learning_rate": 1.6572822369029248e-05, - "loss": 0.6159, - "step": 24100 - }, - { - "epoch": 0.52, - "learning_rate": 1.6558599944532547e-05, - "loss": 0.621, - "step": 24200 - }, - { - "epoch": 0.52, - "learning_rate": 1.654437752003584e-05, - "loss": 0.6064, - "step": 24300 - }, - { - "epoch": 0.52, - "learning_rate": 1.6530155095539136e-05, - "loss": 0.6027, - "step": 24400 - }, - { - "epoch": 0.52, - "learning_rate": 1.6515932671042435e-05, - "loss": 0.6034, - "step": 24500 - }, - { - "epoch": 0.52, - "learning_rate": 1.650171024654573e-05, - "loss": 0.6112, - "step": 24600 - }, - { - "epoch": 0.53, - "learning_rate": 1.6487487822049025e-05, - "loss": 0.6028, - "step": 24700 - }, - { - "epoch": 0.53, - "learning_rate": 1.6473265397552323e-05, - "loss": 0.6073, - "step": 24800 - }, - { - "epoch": 0.53, - "learning_rate": 1.6459042973055618e-05, - "loss": 0.5982, - "step": 24900 - }, - { - "epoch": 0.53, - "learning_rate": 1.6444820548558916e-05, - "loss": 0.5926, - "step": 25000 - }, - { - "epoch": 0.54, - "learning_rate": 1.6430598124062208e-05, - "loss": 0.5944, - "step": 25100 - }, - { - "epoch": 0.54, - "learning_rate": 1.6416375699565506e-05, - "loss": 0.5909, - "step": 25200 - }, - { - "epoch": 0.54, - "learning_rate": 1.64021532750688e-05, - "loss": 0.5974, - "step": 25300 - }, - { - "epoch": 0.54, - "learning_rate": 1.6387930850572096e-05, - "loss": 0.5845, - "step": 25400 - }, - { - "epoch": 0.54, - "learning_rate": 1.6373708426075395e-05, - "loss": 0.5812, - "step": 25500 - }, - { - "epoch": 0.55, - "learning_rate": 1.635948600157869e-05, - "loss": 0.5901, - "step": 25600 - }, - { - "epoch": 0.55, - "learning_rate": 1.6345263577081988e-05, - "loss": 0.5836, - "step": 25700 - }, - { - "epoch": 0.55, - "learning_rate": 1.6331041152585283e-05, - "loss": 0.5664, - "step": 25800 - }, - { - "epoch": 0.55, - "learning_rate": 1.6316818728088578e-05, - "loss": 0.5745, - "step": 25900 - }, - { - "epoch": 0.55, - "learning_rate": 1.6302596303591876e-05, - "loss": 0.5736, - "step": 26000 - }, - { - "epoch": 0.56, - "learning_rate": 1.628837387909517e-05, - "loss": 0.5821, - "step": 26100 - }, - { - "epoch": 0.56, - "learning_rate": 1.6274151454598466e-05, - "loss": 0.5744, - "step": 26200 - }, - { - "epoch": 0.56, - "learning_rate": 1.625992903010176e-05, - "loss": 0.5787, - "step": 26300 - }, - { - "epoch": 0.56, - "learning_rate": 1.624570660560506e-05, - "loss": 0.5776, - "step": 26400 - }, - { - "epoch": 0.57, - "learning_rate": 1.6231484181108355e-05, - "loss": 0.5824, - "step": 26500 - }, - { - "epoch": 0.57, - "learning_rate": 1.621726175661165e-05, - "loss": 0.5772, - "step": 26600 - }, - { - "epoch": 0.57, - "learning_rate": 1.6203039332114948e-05, - "loss": 0.5656, - "step": 26700 - }, - { - "epoch": 0.57, - "learning_rate": 1.6188816907618243e-05, - "loss": 0.5696, - "step": 26800 - }, - { - "epoch": 0.57, - "learning_rate": 1.6174594483121538e-05, - "loss": 0.5771, - "step": 26900 - }, - { - "epoch": 0.58, - "learning_rate": 1.6160372058624836e-05, - "loss": 0.5678, - "step": 27000 - }, - { - "epoch": 0.58, - "learning_rate": 1.614614963412813e-05, - "loss": 0.584, - "step": 27100 - }, - { - "epoch": 0.58, - "learning_rate": 1.6131927209631426e-05, - "loss": 0.5743, - "step": 27200 - }, - { - "epoch": 0.58, - "learning_rate": 1.611770478513472e-05, - "loss": 0.5527, - "step": 27300 - }, - { - "epoch": 0.58, - "learning_rate": 1.610348236063802e-05, - "loss": 0.5484, - "step": 27400 - }, - { - "epoch": 0.59, - "learning_rate": 1.6089259936141315e-05, - "loss": 0.5642, - "step": 27500 - }, - { - "epoch": 0.59, - "learning_rate": 1.607503751164461e-05, - "loss": 0.5504, - "step": 27600 - }, - { - "epoch": 0.59, - "learning_rate": 1.6060815087147908e-05, - "loss": 0.5496, - "step": 27700 - }, - { - "epoch": 0.59, - "learning_rate": 1.6046592662651203e-05, - "loss": 0.5705, - "step": 27800 - }, - { - "epoch": 0.6, - "learning_rate": 1.60323702381545e-05, - "loss": 0.5427, - "step": 27900 - }, - { - "epoch": 0.6, - "learning_rate": 1.6018147813657796e-05, - "loss": 0.5535, - "step": 28000 - }, - { - "epoch": 0.6, - "learning_rate": 1.600392538916109e-05, - "loss": 0.5502, - "step": 28100 - }, - { - "epoch": 0.6, - "learning_rate": 1.598970296466439e-05, - "loss": 0.5453, - "step": 28200 - }, - { - "epoch": 0.6, - "learning_rate": 1.597548054016768e-05, - "loss": 0.5424, - "step": 28300 - }, - { - "epoch": 0.61, - "learning_rate": 1.596125811567098e-05, - "loss": 0.5127, - "step": 28400 - }, - { - "epoch": 0.61, - "learning_rate": 1.5947035691174274e-05, - "loss": 0.5446, - "step": 28500 - }, - { - "epoch": 0.61, - "learning_rate": 1.5932813266677573e-05, - "loss": 0.5379, - "step": 28600 - }, - { - "epoch": 0.61, - "learning_rate": 1.5918590842180868e-05, - "loss": 0.5471, - "step": 28700 - }, - { - "epoch": 0.61, - "learning_rate": 1.5904368417684163e-05, - "loss": 0.5288, - "step": 28800 - }, - { - "epoch": 0.62, - "learning_rate": 1.589014599318746e-05, - "loss": 0.5265, - "step": 28900 - }, - { - "epoch": 0.62, - "learning_rate": 1.5875923568690756e-05, - "loss": 0.5476, - "step": 29000 - }, - { - "epoch": 0.62, - "learning_rate": 1.586170114419405e-05, - "loss": 0.5381, - "step": 29100 - }, - { - "epoch": 0.62, - "learning_rate": 1.584747871969735e-05, - "loss": 0.5208, - "step": 29200 - }, - { - "epoch": 0.63, - "learning_rate": 1.5833256295200644e-05, - "loss": 0.5339, - "step": 29300 - }, - { - "epoch": 0.63, - "learning_rate": 1.581903387070394e-05, - "loss": 0.5274, - "step": 29400 - }, - { - "epoch": 0.63, - "learning_rate": 1.5804811446207234e-05, - "loss": 0.5269, - "step": 29500 - }, - { - "epoch": 0.63, - "learning_rate": 1.5790589021710533e-05, - "loss": 0.5386, - "step": 29600 - }, - { - "epoch": 0.63, - "learning_rate": 1.5776366597213828e-05, - "loss": 0.527, - "step": 29700 - }, - { - "epoch": 0.64, - "learning_rate": 1.5762144172717123e-05, - "loss": 0.5031, - "step": 29800 - }, - { - "epoch": 0.64, - "learning_rate": 1.574792174822042e-05, - "loss": 0.5191, - "step": 29900 - }, - { - "epoch": 0.64, - "learning_rate": 1.5733699323723716e-05, - "loss": 0.5249, - "step": 30000 - }, - { - "epoch": 0.64, - "learning_rate": 1.5719476899227014e-05, - "loss": 0.5226, - "step": 30100 - }, - { - "epoch": 0.64, - "learning_rate": 1.570525447473031e-05, - "loss": 0.4992, - "step": 30200 - }, - { - "epoch": 0.65, - "learning_rate": 1.5691032050233604e-05, - "loss": 0.5102, - "step": 30300 - }, - { - "epoch": 0.65, - "learning_rate": 1.56768096257369e-05, - "loss": 0.5007, - "step": 30400 - }, - { - "epoch": 0.65, - "learning_rate": 1.5662587201240194e-05, - "loss": 0.5084, - "step": 30500 - }, - { - "epoch": 0.65, - "learning_rate": 1.5648364776743493e-05, - "loss": 0.5194, - "step": 30600 - }, - { - "epoch": 0.65, - "learning_rate": 1.5634142352246788e-05, - "loss": 0.5018, - "step": 30700 - }, - { - "epoch": 0.66, - "learning_rate": 1.5619919927750086e-05, - "loss": 0.4992, - "step": 30800 - }, - { - "epoch": 0.66, - "learning_rate": 1.560569750325338e-05, - "loss": 0.514, - "step": 30900 - }, - { - "epoch": 0.66, - "learning_rate": 1.5591475078756676e-05, - "loss": 0.5105, - "step": 31000 - }, - { - "epoch": 0.66, - "learning_rate": 1.5577252654259974e-05, - "loss": 0.504, - "step": 31100 - }, - { - "epoch": 0.67, - "learning_rate": 1.556303022976327e-05, - "loss": 0.5019, - "step": 31200 - }, - { - "epoch": 0.67, - "learning_rate": 1.5548807805266564e-05, - "loss": 0.4822, - "step": 31300 - }, - { - "epoch": 0.67, - "learning_rate": 1.5534585380769863e-05, - "loss": 0.4964, - "step": 31400 - }, - { - "epoch": 0.67, - "learning_rate": 1.5520362956273158e-05, - "loss": 0.4911, - "step": 31500 - }, - { - "epoch": 0.67, - "learning_rate": 1.5506140531776452e-05, - "loss": 0.5093, - "step": 31600 - }, - { - "epoch": 0.68, - "learning_rate": 1.5491918107279747e-05, - "loss": 0.4942, - "step": 31700 - }, - { - "epoch": 0.68, - "learning_rate": 1.5477695682783046e-05, - "loss": 0.4844, - "step": 31800 - }, - { - "epoch": 0.68, - "learning_rate": 1.546347325828634e-05, - "loss": 0.4793, - "step": 31900 - }, - { - "epoch": 0.68, - "learning_rate": 1.5449250833789636e-05, - "loss": 0.475, - "step": 32000 - }, - { - "epoch": 0.68, - "learning_rate": 1.5435028409292934e-05, - "loss": 0.4877, - "step": 32100 - }, - { - "epoch": 0.69, - "learning_rate": 1.542080598479623e-05, - "loss": 0.4953, - "step": 32200 - }, - { - "epoch": 0.69, - "learning_rate": 1.5406583560299527e-05, - "loss": 0.4976, - "step": 32300 - }, - { - "epoch": 0.69, - "learning_rate": 1.5392361135802822e-05, - "loss": 0.4776, - "step": 32400 - }, - { - "epoch": 0.69, - "learning_rate": 1.5378138711306117e-05, - "loss": 0.4982, - "step": 32500 - }, - { - "epoch": 0.7, - "learning_rate": 1.5363916286809412e-05, - "loss": 0.4969, - "step": 32600 - }, - { - "epoch": 0.7, - "learning_rate": 1.5349693862312707e-05, - "loss": 0.4938, - "step": 32700 - }, - { - "epoch": 0.7, - "learning_rate": 1.5335471437816006e-05, - "loss": 0.4569, - "step": 32800 - }, - { - "epoch": 0.7, - "learning_rate": 1.53212490133193e-05, - "loss": 0.4794, - "step": 32900 - }, - { - "epoch": 0.7, - "learning_rate": 1.53070265888226e-05, - "loss": 0.466, - "step": 33000 - }, - { - "epoch": 0.71, - "learning_rate": 1.5292804164325894e-05, - "loss": 0.4713, - "step": 33100 - }, - { - "epoch": 0.71, - "learning_rate": 1.527858173982919e-05, - "loss": 0.4773, - "step": 33200 - }, - { - "epoch": 0.71, - "learning_rate": 1.5264359315332487e-05, - "loss": 0.4909, - "step": 33300 - }, - { - "epoch": 0.71, - "learning_rate": 1.5250136890835782e-05, - "loss": 0.4662, - "step": 33400 - }, - { - "epoch": 0.71, - "learning_rate": 1.5235914466339079e-05, - "loss": 0.4831, - "step": 33500 - }, - { - "epoch": 0.72, - "learning_rate": 1.5221692041842372e-05, - "loss": 0.4603, - "step": 33600 - }, - { - "epoch": 0.72, - "learning_rate": 1.5207469617345669e-05, - "loss": 0.4735, - "step": 33700 - }, - { - "epoch": 0.72, - "learning_rate": 1.5193247192848966e-05, - "loss": 0.452, - "step": 33800 - }, - { - "epoch": 0.72, - "learning_rate": 1.5179024768352262e-05, - "loss": 0.4793, - "step": 33900 - }, - { - "epoch": 0.73, - "learning_rate": 1.5164802343855557e-05, - "loss": 0.4628, - "step": 34000 - }, - { - "epoch": 0.73, - "learning_rate": 1.5150579919358854e-05, - "loss": 0.4497, - "step": 34100 - }, - { - "epoch": 0.73, - "learning_rate": 1.513635749486215e-05, - "loss": 0.4485, - "step": 34200 - }, - { - "epoch": 0.73, - "learning_rate": 1.5122135070365447e-05, - "loss": 0.4534, - "step": 34300 - }, - { - "epoch": 0.73, - "learning_rate": 1.5107912645868742e-05, - "loss": 0.4523, - "step": 34400 - }, - { - "epoch": 0.74, - "learning_rate": 1.5093690221372039e-05, - "loss": 0.4545, - "step": 34500 - }, - { - "epoch": 0.74, - "learning_rate": 1.5079467796875336e-05, - "loss": 0.4551, - "step": 34600 - }, - { - "epoch": 0.74, - "learning_rate": 1.5065245372378629e-05, - "loss": 0.4603, - "step": 34700 - }, - { - "epoch": 0.74, - "learning_rate": 1.5051022947881926e-05, - "loss": 0.457, - "step": 34800 - }, - { - "epoch": 0.74, - "learning_rate": 1.5036800523385222e-05, - "loss": 0.4584, - "step": 34900 - }, - { - "epoch": 0.75, - "learning_rate": 1.5022578098888519e-05, - "loss": 0.4593, - "step": 35000 - }, - { - "epoch": 0.75, - "learning_rate": 1.5008355674391814e-05, - "loss": 0.4399, - "step": 35100 - }, - { - "epoch": 0.75, - "learning_rate": 1.499413324989511e-05, - "loss": 0.453, - "step": 35200 - }, - { - "epoch": 0.75, - "learning_rate": 1.4979910825398407e-05, - "loss": 0.4294, - "step": 35300 - }, - { - "epoch": 0.76, - "learning_rate": 1.4965688400901704e-05, - "loss": 0.4628, - "step": 35400 - }, - { - "epoch": 0.76, - "learning_rate": 1.4951465976404999e-05, - "loss": 0.4385, - "step": 35500 - }, - { - "epoch": 0.76, - "learning_rate": 1.4937243551908295e-05, - "loss": 0.4391, - "step": 35600 - }, - { - "epoch": 0.76, - "learning_rate": 1.4923021127411592e-05, - "loss": 0.4282, - "step": 35700 - }, - { - "epoch": 0.76, - "learning_rate": 1.4908798702914885e-05, - "loss": 0.4628, - "step": 35800 - }, - { - "epoch": 0.77, - "learning_rate": 1.4894576278418182e-05, - "loss": 0.4413, - "step": 35900 - }, - { - "epoch": 0.77, - "learning_rate": 1.4880353853921479e-05, - "loss": 0.4279, - "step": 36000 - }, - { - "epoch": 0.77, - "learning_rate": 1.4866131429424775e-05, - "loss": 0.4371, - "step": 36100 - }, - { - "epoch": 0.77, - "learning_rate": 1.485190900492807e-05, - "loss": 0.4335, - "step": 36200 - }, - { - "epoch": 0.77, - "learning_rate": 1.4837686580431367e-05, - "loss": 0.441, - "step": 36300 - }, - { - "epoch": 0.78, - "learning_rate": 1.4823464155934664e-05, - "loss": 0.4377, - "step": 36400 - }, - { - "epoch": 0.78, - "learning_rate": 1.480924173143796e-05, - "loss": 0.4416, - "step": 36500 - }, - { - "epoch": 0.78, - "learning_rate": 1.4795019306941255e-05, - "loss": 0.4267, - "step": 36600 - }, - { - "epoch": 0.78, - "learning_rate": 1.4780796882444552e-05, - "loss": 0.4242, - "step": 36700 - }, - { - "epoch": 0.79, - "learning_rate": 1.4766574457947847e-05, - "loss": 0.4171, - "step": 36800 - }, - { - "epoch": 0.79, - "learning_rate": 1.4752352033451142e-05, - "loss": 0.4289, - "step": 36900 - }, - { - "epoch": 0.79, - "learning_rate": 1.4738129608954439e-05, - "loss": 0.4184, - "step": 37000 - }, - { - "epoch": 0.79, - "learning_rate": 1.4723907184457735e-05, - "loss": 0.4153, - "step": 37100 - }, - { - "epoch": 0.79, - "learning_rate": 1.4709684759961032e-05, - "loss": 0.415, - "step": 37200 - }, - { - "epoch": 0.8, - "learning_rate": 1.4695462335464327e-05, - "loss": 0.4275, - "step": 37300 - }, - { - "epoch": 0.8, - "learning_rate": 1.4681239910967624e-05, - "loss": 0.4092, - "step": 37400 - }, - { - "epoch": 0.8, - "learning_rate": 1.466701748647092e-05, - "loss": 0.4217, - "step": 37500 - }, - { - "epoch": 0.8, - "learning_rate": 1.4652795061974217e-05, - "loss": 0.4131, - "step": 37600 - }, - { - "epoch": 0.8, - "learning_rate": 1.4638572637477514e-05, - "loss": 0.4134, - "step": 37700 - }, - { - "epoch": 0.81, - "learning_rate": 1.4624350212980809e-05, - "loss": 0.4313, - "step": 37800 - }, - { - "epoch": 0.81, - "learning_rate": 1.4610127788484104e-05, - "loss": 0.4308, - "step": 37900 - }, - { - "epoch": 0.81, - "learning_rate": 1.4595905363987399e-05, - "loss": 0.4264, - "step": 38000 - }, - { - "epoch": 0.81, - "learning_rate": 1.4581682939490695e-05, - "loss": 0.4125, - "step": 38100 - }, - { - "epoch": 0.81, - "learning_rate": 1.4567460514993992e-05, - "loss": 0.4268, - "step": 38200 - }, - { - "epoch": 0.82, - "learning_rate": 1.4553238090497289e-05, - "loss": 0.4356, - "step": 38300 - }, - { - "epoch": 0.82, - "learning_rate": 1.4539015666000584e-05, - "loss": 0.402, - "step": 38400 - }, - { - "epoch": 0.82, - "learning_rate": 1.452479324150388e-05, - "loss": 0.4082, - "step": 38500 - }, - { - "epoch": 0.82, - "learning_rate": 1.4510570817007177e-05, - "loss": 0.4094, - "step": 38600 - }, - { - "epoch": 0.83, - "learning_rate": 1.4496348392510474e-05, - "loss": 0.4018, - "step": 38700 - }, - { - "epoch": 0.83, - "learning_rate": 1.448212596801377e-05, - "loss": 0.3903, - "step": 38800 - }, - { - "epoch": 0.83, - "learning_rate": 1.4467903543517065e-05, - "loss": 0.4179, - "step": 38900 - }, - { - "epoch": 0.83, - "learning_rate": 1.445368111902036e-05, - "loss": 0.4045, - "step": 39000 - }, - { - "epoch": 0.83, - "learning_rate": 1.4439458694523655e-05, - "loss": 0.4085, - "step": 39100 - }, - { - "epoch": 0.84, - "learning_rate": 1.4425236270026952e-05, - "loss": 0.3943, - "step": 39200 - }, - { - "epoch": 0.84, - "learning_rate": 1.4411013845530248e-05, - "loss": 0.4101, - "step": 39300 - }, - { - "epoch": 0.84, - "learning_rate": 1.4396791421033545e-05, - "loss": 0.4135, - "step": 39400 - }, - { - "epoch": 0.84, - "learning_rate": 1.438256899653684e-05, - "loss": 0.3968, - "step": 39500 - }, - { - "epoch": 0.84, - "learning_rate": 1.4368346572040137e-05, - "loss": 0.4126, - "step": 39600 - }, - { - "epoch": 0.85, - "learning_rate": 1.4354124147543433e-05, - "loss": 0.3894, - "step": 39700 - }, - { - "epoch": 0.85, - "learning_rate": 1.433990172304673e-05, - "loss": 0.3907, - "step": 39800 - }, - { - "epoch": 0.85, - "learning_rate": 1.4325679298550027e-05, - "loss": 0.4037, - "step": 39900 - }, - { - "epoch": 0.85, - "learning_rate": 1.431145687405332e-05, - "loss": 0.3898, - "step": 40000 - }, - { - "epoch": 0.86, - "learning_rate": 1.4297234449556617e-05, - "loss": 0.3866, - "step": 40100 - }, - { - "epoch": 0.86, - "learning_rate": 1.4283012025059912e-05, - "loss": 0.4, - "step": 40200 - }, - { - "epoch": 0.86, - "learning_rate": 1.4268789600563208e-05, - "loss": 0.3873, - "step": 40300 - }, - { - "epoch": 0.86, - "learning_rate": 1.4254567176066505e-05, - "loss": 0.3777, - "step": 40400 - }, - { - "epoch": 0.86, - "learning_rate": 1.4240344751569802e-05, - "loss": 0.3975, - "step": 40500 - }, - { - "epoch": 0.87, - "learning_rate": 1.4226122327073097e-05, - "loss": 0.3801, - "step": 40600 - }, - { - "epoch": 0.87, - "learning_rate": 1.4211899902576393e-05, - "loss": 0.3945, - "step": 40700 - }, - { - "epoch": 0.87, - "learning_rate": 1.419767747807969e-05, - "loss": 0.3947, - "step": 40800 - }, - { - "epoch": 0.87, - "learning_rate": 1.4183455053582987e-05, - "loss": 0.3694, - "step": 40900 - }, - { - "epoch": 0.87, - "learning_rate": 1.4169232629086283e-05, - "loss": 0.3815, - "step": 41000 - }, - { - "epoch": 0.88, - "learning_rate": 1.4155010204589577e-05, - "loss": 0.3938, - "step": 41100 - }, - { - "epoch": 0.88, - "learning_rate": 1.4140787780092873e-05, - "loss": 0.386, - "step": 41200 - }, - { - "epoch": 0.88, - "learning_rate": 1.4126565355596168e-05, - "loss": 0.3914, - "step": 41300 - }, - { - "epoch": 0.88, - "learning_rate": 1.4112342931099465e-05, - "loss": 0.3895, - "step": 41400 - }, - { - "epoch": 0.89, - "learning_rate": 1.4098120506602762e-05, - "loss": 0.3768, - "step": 41500 - }, - { - "epoch": 0.89, - "learning_rate": 1.4083898082106058e-05, - "loss": 0.3814, - "step": 41600 - }, - { - "epoch": 0.89, - "learning_rate": 1.4069675657609353e-05, - "loss": 0.3863, - "step": 41700 - }, - { - "epoch": 0.89, - "learning_rate": 1.405545323311265e-05, - "loss": 0.3705, - "step": 41800 - }, - { - "epoch": 0.89, - "learning_rate": 1.4041230808615947e-05, - "loss": 0.3761, - "step": 41900 - }, - { - "epoch": 0.9, - "learning_rate": 1.4027008384119243e-05, - "loss": 0.3674, - "step": 42000 - }, - { - "epoch": 0.9, - "learning_rate": 1.401278595962254e-05, - "loss": 0.3677, - "step": 42100 - }, - { - "epoch": 0.9, - "learning_rate": 1.3998563535125833e-05, - "loss": 0.3705, - "step": 42200 - }, - { - "epoch": 0.9, - "learning_rate": 1.398434111062913e-05, - "loss": 0.3634, - "step": 42300 - }, - { - "epoch": 0.9, - "learning_rate": 1.3970118686132425e-05, - "loss": 0.3487, - "step": 42400 - }, - { - "epoch": 0.91, - "learning_rate": 1.3955896261635721e-05, - "loss": 0.3644, - "step": 42500 - }, - { - "epoch": 0.91, - "learning_rate": 1.3941673837139018e-05, - "loss": 0.3585, - "step": 42600 - }, - { - "epoch": 0.91, - "learning_rate": 1.3927451412642315e-05, - "loss": 0.358, - "step": 42700 - }, - { - "epoch": 0.91, - "learning_rate": 1.391322898814561e-05, - "loss": 0.3545, - "step": 42800 - }, - { - "epoch": 0.92, - "learning_rate": 1.3899006563648906e-05, - "loss": 0.3625, - "step": 42900 - }, - { - "epoch": 0.92, - "learning_rate": 1.3884784139152203e-05, - "loss": 0.3677, - "step": 43000 - }, - { - "epoch": 0.92, - "learning_rate": 1.38705617146555e-05, - "loss": 0.3644, - "step": 43100 - }, - { - "epoch": 0.92, - "learning_rate": 1.3856339290158793e-05, - "loss": 0.3576, - "step": 43200 - }, - { - "epoch": 0.92, - "learning_rate": 1.384211686566209e-05, - "loss": 0.3645, - "step": 43300 - }, - { - "epoch": 0.93, - "learning_rate": 1.3827894441165386e-05, - "loss": 0.3631, - "step": 43400 - }, - { - "epoch": 0.93, - "learning_rate": 1.3813672016668681e-05, - "loss": 0.3549, - "step": 43500 - }, - { - "epoch": 0.93, - "learning_rate": 1.3799449592171978e-05, - "loss": 0.3506, - "step": 43600 - }, - { - "epoch": 0.93, - "learning_rate": 1.3785227167675275e-05, - "loss": 0.3516, - "step": 43700 - }, - { - "epoch": 0.93, - "learning_rate": 1.3771004743178571e-05, - "loss": 0.3545, - "step": 43800 - }, - { - "epoch": 0.94, - "learning_rate": 1.3756782318681866e-05, - "loss": 0.352, - "step": 43900 - }, - { - "epoch": 0.94, - "learning_rate": 1.3742559894185163e-05, - "loss": 0.3454, - "step": 44000 - }, - { - "epoch": 0.94, - "learning_rate": 1.372833746968846e-05, - "loss": 0.3397, - "step": 44100 - }, - { - "epoch": 0.94, - "learning_rate": 1.3714115045191756e-05, - "loss": 0.3675, - "step": 44200 - }, - { - "epoch": 0.95, - "learning_rate": 1.369989262069505e-05, - "loss": 0.3536, - "step": 44300 - }, - { - "epoch": 0.95, - "learning_rate": 1.3685670196198346e-05, - "loss": 0.3483, - "step": 44400 - }, - { - "epoch": 0.95, - "learning_rate": 1.3671447771701643e-05, - "loss": 0.3316, - "step": 44500 - }, - { - "epoch": 0.95, - "learning_rate": 1.3657225347204938e-05, - "loss": 0.3445, - "step": 44600 - }, - { - "epoch": 0.95, - "learning_rate": 1.3643002922708235e-05, - "loss": 0.3493, - "step": 44700 - }, - { - "epoch": 0.96, - "learning_rate": 1.3628780498211531e-05, - "loss": 0.339, - "step": 44800 - }, - { - "epoch": 0.96, - "learning_rate": 1.3614558073714828e-05, - "loss": 0.3454, - "step": 44900 - }, - { - "epoch": 0.96, - "learning_rate": 1.3600335649218125e-05, - "loss": 0.3459, - "step": 45000 - }, - { - "epoch": 0.96, - "learning_rate": 1.358611322472142e-05, - "loss": 0.342, - "step": 45100 - }, - { - "epoch": 0.96, - "learning_rate": 1.3571890800224716e-05, - "loss": 0.3472, - "step": 45200 - }, - { - "epoch": 0.97, - "learning_rate": 1.3557668375728013e-05, - "loss": 0.3356, - "step": 45300 - }, - { - "epoch": 0.97, - "learning_rate": 1.3543445951231306e-05, - "loss": 0.3452, - "step": 45400 - }, - { - "epoch": 0.97, - "learning_rate": 1.3529223526734603e-05, - "loss": 0.3339, - "step": 45500 - }, - { - "epoch": 0.97, - "learning_rate": 1.35150011022379e-05, - "loss": 0.3416, - "step": 45600 - }, - { - "epoch": 0.97, - "learning_rate": 1.3500778677741195e-05, - "loss": 0.3326, - "step": 45700 - }, - { - "epoch": 0.98, - "learning_rate": 1.3486556253244491e-05, - "loss": 0.3366, - "step": 45800 - }, - { - "epoch": 0.98, - "learning_rate": 1.3472333828747788e-05, - "loss": 0.3363, - "step": 45900 - }, - { - "epoch": 0.98, - "learning_rate": 1.3458111404251085e-05, - "loss": 0.3271, - "step": 46000 - }, - { - "epoch": 0.98, - "learning_rate": 1.3443888979754381e-05, - "loss": 0.3385, - "step": 46100 - }, - { - "epoch": 0.99, - "learning_rate": 1.3429666555257676e-05, - "loss": 0.3313, - "step": 46200 - }, - { - "epoch": 0.99, - "learning_rate": 1.3415444130760973e-05, - "loss": 0.3348, - "step": 46300 - }, - { - "epoch": 0.99, - "learning_rate": 1.3401221706264266e-05, - "loss": 0.343, - "step": 46400 - }, - { - "epoch": 0.99, - "learning_rate": 1.3386999281767563e-05, - "loss": 0.3353, - "step": 46500 - }, - { - "epoch": 0.99, - "learning_rate": 1.337277685727086e-05, - "loss": 0.3172, - "step": 46600 - }, - { - "epoch": 1.0, - "learning_rate": 1.3358554432774156e-05, - "loss": 0.3477, - "step": 46700 - }, - { - "epoch": 1.0, - "learning_rate": 1.3344332008277451e-05, - "loss": 0.3094, - "step": 46800 - }, - { - "epoch": 1.0, - "learning_rate": 1.3330109583780748e-05, - "loss": 0.3051, - "step": 46900 - }, - { - "epoch": 1.0, - "learning_rate": 1.3315887159284044e-05, - "loss": 0.2026, - "step": 47000 - }, - { - "epoch": 1.0, - "learning_rate": 1.3301664734787341e-05, - "loss": 0.2015, - "step": 47100 - }, - { - "epoch": 1.01, - "learning_rate": 1.3287442310290638e-05, - "loss": 0.2039, - "step": 47200 - }, - { - "epoch": 1.01, - "learning_rate": 1.3273219885793933e-05, - "loss": 0.1948, - "step": 47300 - }, - { - "epoch": 1.01, - "learning_rate": 1.325899746129723e-05, - "loss": 0.1971, - "step": 47400 - }, - { - "epoch": 1.01, - "learning_rate": 1.3244775036800523e-05, - "loss": 0.2006, - "step": 47500 - }, - { - "epoch": 1.02, - "learning_rate": 1.323055261230382e-05, - "loss": 0.1922, - "step": 47600 - }, - { - "epoch": 1.02, - "learning_rate": 1.3216330187807116e-05, - "loss": 0.202, - "step": 47700 - }, - { - "epoch": 1.02, - "learning_rate": 1.3202107763310413e-05, - "loss": 0.1944, - "step": 47800 - }, - { - "epoch": 1.02, - "learning_rate": 1.3187885338813708e-05, - "loss": 0.2059, - "step": 47900 - }, - { - "epoch": 1.02, - "learning_rate": 1.3173662914317004e-05, - "loss": 0.1968, - "step": 48000 - }, - { - "epoch": 1.03, - "learning_rate": 1.3159440489820301e-05, - "loss": 0.1966, - "step": 48100 - }, - { - "epoch": 1.03, - "learning_rate": 1.3145218065323598e-05, - "loss": 0.1919, - "step": 48200 - }, - { - "epoch": 1.03, - "learning_rate": 1.3130995640826894e-05, - "loss": 0.2018, - "step": 48300 - }, - { - "epoch": 1.03, - "learning_rate": 1.311677321633019e-05, - "loss": 0.1958, - "step": 48400 - }, - { - "epoch": 1.03, - "learning_rate": 1.3102550791833486e-05, - "loss": 0.1955, - "step": 48500 - }, - { - "epoch": 1.04, - "learning_rate": 1.308832836733678e-05, - "loss": 0.2018, - "step": 48600 - }, - { - "epoch": 1.04, - "learning_rate": 1.3074105942840076e-05, - "loss": 0.1949, - "step": 48700 - }, - { - "epoch": 1.04, - "learning_rate": 1.3059883518343373e-05, - "loss": 0.2036, - "step": 48800 - }, - { - "epoch": 1.04, - "learning_rate": 1.304566109384667e-05, - "loss": 0.1973, - "step": 48900 - }, - { - "epoch": 1.05, - "learning_rate": 1.3031438669349964e-05, - "loss": 0.1982, - "step": 49000 - }, - { - "epoch": 1.05, - "learning_rate": 1.3017216244853261e-05, - "loss": 0.1945, - "step": 49100 - }, - { - "epoch": 1.05, - "learning_rate": 1.3002993820356558e-05, - "loss": 0.1942, - "step": 49200 - }, - { - "epoch": 1.05, - "learning_rate": 1.2988771395859854e-05, - "loss": 0.1977, - "step": 49300 - }, - { - "epoch": 1.05, - "learning_rate": 1.2974548971363151e-05, - "loss": 0.2052, - "step": 49400 - }, - { - "epoch": 1.06, - "learning_rate": 1.2960326546866446e-05, - "loss": 0.2056, - "step": 49500 - }, - { - "epoch": 1.06, - "learning_rate": 1.294610412236974e-05, - "loss": 0.195, - "step": 49600 - }, - { - "epoch": 1.06, - "learning_rate": 1.2931881697873036e-05, - "loss": 0.2011, - "step": 49700 - }, - { - "epoch": 1.06, - "learning_rate": 1.2917659273376332e-05, - "loss": 0.1933, - "step": 49800 - }, - { - "epoch": 1.06, - "learning_rate": 1.2903436848879629e-05, - "loss": 0.1921, - "step": 49900 - }, - { - "epoch": 1.07, - "learning_rate": 1.2889214424382926e-05, - "loss": 0.208, - "step": 50000 - }, - { - "epoch": 1.07, - "learning_rate": 1.287499199988622e-05, - "loss": 0.1859, - "step": 50100 - }, - { - "epoch": 1.07, - "learning_rate": 1.2860769575389517e-05, - "loss": 0.1959, - "step": 50200 - }, - { - "epoch": 1.07, - "learning_rate": 1.2846547150892814e-05, - "loss": 0.1964, - "step": 50300 - }, - { - "epoch": 1.08, - "learning_rate": 1.283232472639611e-05, - "loss": 0.185, - "step": 50400 - }, - { - "epoch": 1.08, - "learning_rate": 1.2818102301899407e-05, - "loss": 0.2019, - "step": 50500 - }, - { - "epoch": 1.08, - "learning_rate": 1.2803879877402702e-05, - "loss": 0.2006, - "step": 50600 - }, - { - "epoch": 1.08, - "learning_rate": 1.2789657452905997e-05, - "loss": 0.2069, - "step": 50700 - }, - { - "epoch": 1.08, - "learning_rate": 1.2775435028409292e-05, - "loss": 0.1995, - "step": 50800 - }, - { - "epoch": 1.09, - "learning_rate": 1.2761212603912589e-05, - "loss": 0.1874, - "step": 50900 - }, - { - "epoch": 1.09, - "learning_rate": 1.2746990179415886e-05, - "loss": 0.1952, - "step": 51000 - }, - { - "epoch": 1.09, - "learning_rate": 1.2732767754919182e-05, - "loss": 0.1904, - "step": 51100 - }, - { - "epoch": 1.09, - "learning_rate": 1.2718545330422477e-05, - "loss": 0.1907, - "step": 51200 - }, - { - "epoch": 1.09, - "learning_rate": 1.2704322905925774e-05, - "loss": 0.1886, - "step": 51300 - }, - { - "epoch": 1.1, - "learning_rate": 1.269010048142907e-05, - "loss": 0.1909, - "step": 51400 - }, - { - "epoch": 1.1, - "learning_rate": 1.2675878056932367e-05, - "loss": 0.2054, - "step": 51500 - }, - { - "epoch": 1.1, - "learning_rate": 1.2661655632435664e-05, - "loss": 0.1825, - "step": 51600 - }, - { - "epoch": 1.1, - "learning_rate": 1.2647433207938959e-05, - "loss": 0.1923, - "step": 51700 - }, - { - "epoch": 1.11, - "learning_rate": 1.2633210783442254e-05, - "loss": 0.205, - "step": 51800 - }, - { - "epoch": 1.11, - "learning_rate": 1.2618988358945549e-05, - "loss": 0.2, - "step": 51900 - }, - { - "epoch": 1.11, - "learning_rate": 1.2604765934448846e-05, - "loss": 0.1891, - "step": 52000 - }, - { - "epoch": 1.11, - "learning_rate": 1.2590543509952142e-05, - "loss": 0.1873, - "step": 52100 - }, - { - "epoch": 1.11, - "learning_rate": 1.2576321085455439e-05, - "loss": 0.2028, - "step": 52200 - }, - { - "epoch": 1.12, - "learning_rate": 1.2562098660958734e-05, - "loss": 0.1998, - "step": 52300 - }, - { - "epoch": 1.12, - "learning_rate": 1.254787623646203e-05, - "loss": 0.1904, - "step": 52400 - }, - { - "epoch": 1.12, - "learning_rate": 1.2533653811965327e-05, - "loss": 0.1915, - "step": 52500 - }, - { - "epoch": 1.12, - "learning_rate": 1.2519431387468624e-05, - "loss": 0.1876, - "step": 52600 - }, - { - "epoch": 1.12, - "learning_rate": 1.250520896297192e-05, - "loss": 0.2011, - "step": 52700 - }, - { - "epoch": 1.13, - "learning_rate": 1.2490986538475214e-05, - "loss": 0.1973, - "step": 52800 - }, - { - "epoch": 1.13, - "learning_rate": 1.247676411397851e-05, - "loss": 0.2013, - "step": 52900 - }, - { - "epoch": 1.13, - "learning_rate": 1.2462541689481806e-05, - "loss": 0.1887, - "step": 53000 - }, - { - "epoch": 1.13, - "learning_rate": 1.2448319264985102e-05, - "loss": 0.1776, - "step": 53100 - }, - { - "epoch": 1.13, - "learning_rate": 1.2434096840488399e-05, - "loss": 0.1858, - "step": 53200 - }, - { - "epoch": 1.14, - "learning_rate": 1.2419874415991696e-05, - "loss": 0.1938, - "step": 53300 - }, - { - "epoch": 1.14, - "learning_rate": 1.2405651991494992e-05, - "loss": 0.1924, - "step": 53400 - }, - { - "epoch": 1.14, - "learning_rate": 1.2391429566998287e-05, - "loss": 0.1926, - "step": 53500 - }, - { - "epoch": 1.14, - "learning_rate": 1.2377207142501584e-05, - "loss": 0.1925, - "step": 53600 - }, - { - "epoch": 1.15, - "learning_rate": 1.236298471800488e-05, - "loss": 0.1919, - "step": 53700 - }, - { - "epoch": 1.15, - "learning_rate": 1.2348762293508177e-05, - "loss": 0.1857, - "step": 53800 - }, - { - "epoch": 1.15, - "learning_rate": 1.233453986901147e-05, - "loss": 0.1822, - "step": 53900 - }, - { - "epoch": 1.15, - "learning_rate": 1.2320317444514767e-05, - "loss": 0.1891, - "step": 54000 - }, - { - "epoch": 1.15, - "learning_rate": 1.2306095020018062e-05, - "loss": 0.1868, - "step": 54100 - }, - { - "epoch": 1.16, - "learning_rate": 1.2291872595521359e-05, - "loss": 0.1821, - "step": 54200 - }, - { - "epoch": 1.16, - "learning_rate": 1.2277650171024655e-05, - "loss": 0.1937, - "step": 54300 - }, - { - "epoch": 1.16, - "learning_rate": 1.2263427746527952e-05, - "loss": 0.1864, - "step": 54400 - }, - { - "epoch": 1.16, - "learning_rate": 1.2249205322031249e-05, - "loss": 0.1932, - "step": 54500 - }, - { - "epoch": 1.16, - "learning_rate": 1.2234982897534544e-05, - "loss": 0.202, - "step": 54600 - }, - { - "epoch": 1.17, - "learning_rate": 1.222076047303784e-05, - "loss": 0.1852, - "step": 54700 - }, - { - "epoch": 1.17, - "learning_rate": 1.2206538048541137e-05, - "loss": 0.1908, - "step": 54800 - }, - { - "epoch": 1.17, - "learning_rate": 1.2192315624044434e-05, - "loss": 0.1803, - "step": 54900 - }, - { - "epoch": 1.17, - "learning_rate": 1.2178093199547727e-05, - "loss": 0.189, - "step": 55000 - }, - { - "epoch": 1.18, - "learning_rate": 1.2163870775051024e-05, - "loss": 0.1913, - "step": 55100 - }, - { - "epoch": 1.18, - "learning_rate": 1.2149648350554319e-05, - "loss": 0.1865, - "step": 55200 - }, - { - "epoch": 1.18, - "learning_rate": 1.2135425926057615e-05, - "loss": 0.1852, - "step": 55300 - }, - { - "epoch": 1.18, - "learning_rate": 1.2121203501560912e-05, - "loss": 0.1868, - "step": 55400 - }, - { - "epoch": 1.18, - "learning_rate": 1.2106981077064209e-05, - "loss": 0.1978, - "step": 55500 - }, - { - "epoch": 1.19, - "learning_rate": 1.2092758652567505e-05, - "loss": 0.1946, - "step": 55600 - }, - { - "epoch": 1.19, - "learning_rate": 1.20785362280708e-05, - "loss": 0.203, - "step": 55700 - }, - { - "epoch": 1.19, - "learning_rate": 1.2064313803574097e-05, - "loss": 0.1886, - "step": 55800 - }, - { - "epoch": 1.19, - "learning_rate": 1.2050091379077394e-05, - "loss": 0.1869, - "step": 55900 - }, - { - "epoch": 1.19, - "learning_rate": 1.2035868954580687e-05, - "loss": 0.1841, - "step": 56000 - }, - { - "epoch": 1.2, - "learning_rate": 1.2021646530083984e-05, - "loss": 0.1929, - "step": 56100 - }, - { - "epoch": 1.2, - "learning_rate": 1.200742410558728e-05, - "loss": 0.1966, - "step": 56200 - }, - { - "epoch": 1.2, - "learning_rate": 1.1993201681090575e-05, - "loss": 0.1888, - "step": 56300 - }, - { - "epoch": 1.2, - "learning_rate": 1.1978979256593872e-05, - "loss": 0.1837, - "step": 56400 - }, - { - "epoch": 1.21, - "learning_rate": 1.1964756832097169e-05, - "loss": 0.1912, - "step": 56500 - }, - { - "epoch": 1.21, - "learning_rate": 1.1950534407600465e-05, - "loss": 0.1883, - "step": 56600 - }, - { - "epoch": 1.21, - "learning_rate": 1.1936311983103762e-05, - "loss": 0.1931, - "step": 56700 - }, - { - "epoch": 1.21, - "learning_rate": 1.1922089558607057e-05, - "loss": 0.1939, - "step": 56800 - }, - { - "epoch": 1.21, - "learning_rate": 1.1907867134110354e-05, - "loss": 0.1839, - "step": 56900 - }, - { - "epoch": 1.22, - "learning_rate": 1.189364470961365e-05, - "loss": 0.1755, - "step": 57000 - }, - { - "epoch": 1.22, - "learning_rate": 1.1879422285116943e-05, - "loss": 0.1846, - "step": 57100 - }, - { - "epoch": 1.22, - "learning_rate": 1.186519986062024e-05, - "loss": 0.182, - "step": 57200 - }, - { - "epoch": 1.22, - "learning_rate": 1.1850977436123537e-05, - "loss": 0.1978, - "step": 57300 - }, - { - "epoch": 1.22, - "learning_rate": 1.1836755011626832e-05, - "loss": 0.1789, - "step": 57400 - }, - { - "epoch": 1.23, - "learning_rate": 1.1822532587130128e-05, - "loss": 0.1898, - "step": 57500 - }, - { - "epoch": 1.23, - "learning_rate": 1.1808310162633425e-05, - "loss": 0.1912, - "step": 57600 - }, - { - "epoch": 1.23, - "learning_rate": 1.1794087738136722e-05, - "loss": 0.195, - "step": 57700 - }, - { - "epoch": 1.23, - "learning_rate": 1.1779865313640018e-05, - "loss": 0.1908, - "step": 57800 - }, - { - "epoch": 1.24, - "learning_rate": 1.1765642889143313e-05, - "loss": 0.1922, - "step": 57900 - }, - { - "epoch": 1.24, - "learning_rate": 1.175142046464661e-05, - "loss": 0.1822, - "step": 58000 - }, - { - "epoch": 1.24, - "learning_rate": 1.1737198040149907e-05, - "loss": 0.1798, - "step": 58100 - }, - { - "epoch": 1.24, - "learning_rate": 1.17229756156532e-05, - "loss": 0.1837, - "step": 58200 - }, - { - "epoch": 1.24, - "learning_rate": 1.1708753191156497e-05, - "loss": 0.1994, - "step": 58300 - }, - { - "epoch": 1.25, - "learning_rate": 1.1694530766659793e-05, - "loss": 0.1769, - "step": 58400 - }, - { - "epoch": 1.25, - "learning_rate": 1.1680308342163088e-05, - "loss": 0.1933, - "step": 58500 - }, - { - "epoch": 1.25, - "learning_rate": 1.1666085917666385e-05, - "loss": 0.1894, - "step": 58600 - }, - { - "epoch": 1.25, - "learning_rate": 1.1651863493169682e-05, - "loss": 0.1799, - "step": 58700 - }, - { - "epoch": 1.25, - "learning_rate": 1.1637641068672978e-05, - "loss": 0.187, - "step": 58800 - }, - { - "epoch": 1.26, - "learning_rate": 1.1623418644176275e-05, - "loss": 0.185, - "step": 58900 - }, - { - "epoch": 1.26, - "learning_rate": 1.160919621967957e-05, - "loss": 0.1875, - "step": 59000 - }, - { - "epoch": 1.26, - "learning_rate": 1.1594973795182867e-05, - "loss": 0.1884, - "step": 59100 - }, - { - "epoch": 1.26, - "learning_rate": 1.158075137068616e-05, - "loss": 0.1809, - "step": 59200 - }, - { - "epoch": 1.27, - "learning_rate": 1.1566528946189457e-05, - "loss": 0.1852, - "step": 59300 - }, - { - "epoch": 1.27, - "learning_rate": 1.1552306521692753e-05, - "loss": 0.1728, - "step": 59400 - }, - { - "epoch": 1.27, - "learning_rate": 1.153808409719605e-05, - "loss": 0.1837, - "step": 59500 - }, - { - "epoch": 1.27, - "learning_rate": 1.1523861672699345e-05, - "loss": 0.1871, - "step": 59600 - }, - { - "epoch": 1.27, - "learning_rate": 1.1509639248202642e-05, - "loss": 0.1844, - "step": 59700 - }, - { - "epoch": 1.28, - "learning_rate": 1.1495416823705938e-05, - "loss": 0.1756, - "step": 59800 - }, - { - "epoch": 1.28, - "learning_rate": 1.1481194399209235e-05, - "loss": 0.1853, - "step": 59900 - }, - { - "epoch": 1.28, - "learning_rate": 1.1466971974712532e-05, - "loss": 0.192, - "step": 60000 - }, - { - "epoch": 1.28, - "learning_rate": 1.1452749550215827e-05, - "loss": 0.1881, - "step": 60100 - }, - { - "epoch": 1.28, - "learning_rate": 1.1438527125719123e-05, - "loss": 0.1805, - "step": 60200 - }, - { - "epoch": 1.29, - "learning_rate": 1.1424304701222416e-05, - "loss": 0.1799, - "step": 60300 - }, - { - "epoch": 1.29, - "learning_rate": 1.1410082276725713e-05, - "loss": 0.1815, - "step": 60400 - }, - { - "epoch": 1.29, - "learning_rate": 1.139585985222901e-05, - "loss": 0.1892, - "step": 60500 - }, - { - "epoch": 1.29, - "learning_rate": 1.1381637427732307e-05, - "loss": 0.1831, - "step": 60600 - }, - { - "epoch": 1.29, - "learning_rate": 1.1367415003235601e-05, - "loss": 0.1757, - "step": 60700 - }, - { - "epoch": 1.3, - "learning_rate": 1.1353192578738898e-05, - "loss": 0.1825, - "step": 60800 - }, - { - "epoch": 1.3, - "learning_rate": 1.1338970154242195e-05, - "loss": 0.1748, - "step": 60900 - }, - { - "epoch": 1.3, - "learning_rate": 1.1324747729745491e-05, - "loss": 0.1934, - "step": 61000 - }, - { - "epoch": 1.3, - "learning_rate": 1.1310525305248788e-05, - "loss": 0.1605, - "step": 61100 - }, - { - "epoch": 1.31, - "learning_rate": 1.1296302880752083e-05, - "loss": 0.1794, - "step": 61200 - }, - { - "epoch": 1.31, - "learning_rate": 1.128208045625538e-05, - "loss": 0.1717, - "step": 61300 - }, - { - "epoch": 1.31, - "learning_rate": 1.1267858031758673e-05, - "loss": 0.1781, - "step": 61400 - }, - { - "epoch": 1.31, - "learning_rate": 1.125363560726197e-05, - "loss": 0.1758, - "step": 61500 - }, - { - "epoch": 1.31, - "learning_rate": 1.1239413182765266e-05, - "loss": 0.1763, - "step": 61600 - }, - { - "epoch": 1.32, - "learning_rate": 1.1225190758268563e-05, - "loss": 0.173, - "step": 61700 - }, - { - "epoch": 1.32, - "learning_rate": 1.121096833377186e-05, - "loss": 0.1768, - "step": 61800 - }, - { - "epoch": 1.32, - "learning_rate": 1.1196745909275155e-05, - "loss": 0.182, - "step": 61900 - }, - { - "epoch": 1.32, - "learning_rate": 1.1182523484778451e-05, - "loss": 0.1826, - "step": 62000 - }, - { - "epoch": 1.32, - "learning_rate": 1.1168301060281748e-05, - "loss": 0.1719, - "step": 62100 - }, - { - "epoch": 1.33, - "learning_rate": 1.1154078635785045e-05, - "loss": 0.1602, - "step": 62200 - }, - { - "epoch": 1.33, - "learning_rate": 1.113985621128834e-05, - "loss": 0.1802, - "step": 62300 - }, - { - "epoch": 1.33, - "learning_rate": 1.1125633786791635e-05, - "loss": 0.1758, - "step": 62400 - }, - { - "epoch": 1.33, - "learning_rate": 1.111141136229493e-05, - "loss": 0.1716, - "step": 62500 - }, - { - "epoch": 1.34, - "learning_rate": 1.1097188937798226e-05, - "loss": 0.1689, - "step": 62600 - }, - { - "epoch": 1.34, - "learning_rate": 1.1082966513301523e-05, - "loss": 0.1829, - "step": 62700 - }, - { - "epoch": 1.34, - "learning_rate": 1.106874408880482e-05, - "loss": 0.1796, - "step": 62800 - }, - { - "epoch": 1.34, - "learning_rate": 1.1054521664308116e-05, - "loss": 0.1691, - "step": 62900 - }, - { - "epoch": 1.34, - "learning_rate": 1.1040299239811411e-05, - "loss": 0.1764, - "step": 63000 - }, - { - "epoch": 1.35, - "learning_rate": 1.1026076815314708e-05, - "loss": 0.1698, - "step": 63100 - }, - { - "epoch": 1.35, - "learning_rate": 1.1011854390818005e-05, - "loss": 0.1666, - "step": 63200 - }, - { - "epoch": 1.35, - "learning_rate": 1.0997631966321301e-05, - "loss": 0.1657, - "step": 63300 - }, - { - "epoch": 1.35, - "learning_rate": 1.0983409541824596e-05, - "loss": 0.1753, - "step": 63400 - }, - { - "epoch": 1.35, - "learning_rate": 1.0969187117327891e-05, - "loss": 0.1753, - "step": 63500 - }, - { - "epoch": 1.36, - "learning_rate": 1.0954964692831186e-05, - "loss": 0.1812, - "step": 63600 - }, - { - "epoch": 1.36, - "learning_rate": 1.0940742268334483e-05, - "loss": 0.184, - "step": 63700 - }, - { - "epoch": 1.36, - "learning_rate": 1.092651984383778e-05, - "loss": 0.1636, - "step": 63800 - }, - { - "epoch": 1.36, - "learning_rate": 1.0912297419341076e-05, - "loss": 0.1713, - "step": 63900 - }, - { - "epoch": 1.37, - "learning_rate": 1.0898074994844373e-05, - "loss": 0.1623, - "step": 64000 - }, - { - "epoch": 1.37, - "learning_rate": 1.0883852570347668e-05, - "loss": 0.1724, - "step": 64100 - }, - { - "epoch": 1.37, - "learning_rate": 1.0869630145850965e-05, - "loss": 0.1701, - "step": 64200 - }, - { - "epoch": 1.37, - "learning_rate": 1.0855407721354261e-05, - "loss": 0.1783, - "step": 64300 - }, - { - "epoch": 1.37, - "learning_rate": 1.0841185296857558e-05, - "loss": 0.1818, - "step": 64400 - }, - { - "epoch": 1.38, - "learning_rate": 1.0826962872360853e-05, - "loss": 0.1586, - "step": 64500 - }, - { - "epoch": 1.38, - "learning_rate": 1.0812740447864148e-05, - "loss": 0.1753, - "step": 64600 - }, - { - "epoch": 1.38, - "learning_rate": 1.0798518023367443e-05, - "loss": 0.1768, - "step": 64700 - }, - { - "epoch": 1.38, - "learning_rate": 1.078429559887074e-05, - "loss": 0.1781, - "step": 64800 - }, - { - "epoch": 1.38, - "learning_rate": 1.0770073174374036e-05, - "loss": 0.1708, - "step": 64900 - }, - { - "epoch": 1.39, - "learning_rate": 1.0755850749877333e-05, - "loss": 0.1727, - "step": 65000 - }, - { - "epoch": 1.39, - "learning_rate": 1.074162832538063e-05, - "loss": 0.1648, - "step": 65100 - }, - { - "epoch": 1.39, - "learning_rate": 1.0727405900883924e-05, - "loss": 0.1734, - "step": 65200 - }, - { - "epoch": 1.39, - "learning_rate": 1.0713183476387221e-05, - "loss": 0.1769, - "step": 65300 - }, - { - "epoch": 1.4, - "learning_rate": 1.0698961051890518e-05, - "loss": 0.1655, - "step": 65400 - }, - { - "epoch": 1.4, - "learning_rate": 1.0684738627393814e-05, - "loss": 0.1742, - "step": 65500 - }, - { - "epoch": 1.4, - "learning_rate": 1.0670516202897108e-05, - "loss": 0.1564, - "step": 65600 - }, - { - "epoch": 1.4, - "learning_rate": 1.0656293778400404e-05, - "loss": 0.1657, - "step": 65700 - }, - { - "epoch": 1.4, - "learning_rate": 1.06420713539037e-05, - "loss": 0.1712, - "step": 65800 - }, - { - "epoch": 1.41, - "learning_rate": 1.0627848929406996e-05, - "loss": 0.1627, - "step": 65900 - }, - { - "epoch": 1.41, - "learning_rate": 1.0613626504910293e-05, - "loss": 0.1628, - "step": 66000 - }, - { - "epoch": 1.41, - "learning_rate": 1.059940408041359e-05, - "loss": 0.1651, - "step": 66100 - }, - { - "epoch": 1.41, - "learning_rate": 1.0585181655916886e-05, - "loss": 0.1664, - "step": 66200 - }, - { - "epoch": 1.41, - "learning_rate": 1.0570959231420181e-05, - "loss": 0.1609, - "step": 66300 - }, - { - "epoch": 1.42, - "learning_rate": 1.0556736806923478e-05, - "loss": 0.1754, - "step": 66400 - }, - { - "epoch": 1.42, - "learning_rate": 1.0542514382426774e-05, - "loss": 0.1697, - "step": 66500 - }, - { - "epoch": 1.42, - "learning_rate": 1.0528291957930071e-05, - "loss": 0.1598, - "step": 66600 - }, - { - "epoch": 1.42, - "learning_rate": 1.0514069533433364e-05, - "loss": 0.1612, - "step": 66700 - }, - { - "epoch": 1.43, - "learning_rate": 1.0499847108936661e-05, - "loss": 0.1736, - "step": 66800 - }, - { - "epoch": 1.43, - "learning_rate": 1.0485624684439956e-05, - "loss": 0.164, - "step": 66900 - }, - { - "epoch": 1.43, - "learning_rate": 1.0471402259943253e-05, - "loss": 0.168, - "step": 67000 - }, - { - "epoch": 1.43, - "learning_rate": 1.045717983544655e-05, - "loss": 0.164, - "step": 67100 - }, - { - "epoch": 1.43, - "learning_rate": 1.0442957410949846e-05, - "loss": 0.1538, - "step": 67200 - }, - { - "epoch": 1.44, - "learning_rate": 1.0428734986453143e-05, - "loss": 0.1625, - "step": 67300 - }, - { - "epoch": 1.44, - "learning_rate": 1.0414512561956438e-05, - "loss": 0.1682, - "step": 67400 - }, - { - "epoch": 1.44, - "learning_rate": 1.0400290137459734e-05, - "loss": 0.1597, - "step": 67500 - }, - { - "epoch": 1.44, - "learning_rate": 1.0386067712963031e-05, - "loss": 0.1632, - "step": 67600 - }, - { - "epoch": 1.44, - "learning_rate": 1.0371845288466328e-05, - "loss": 0.1648, - "step": 67700 - }, - { - "epoch": 1.45, - "learning_rate": 1.035762286396962e-05, - "loss": 0.1599, - "step": 67800 - }, - { - "epoch": 1.45, - "learning_rate": 1.0343400439472917e-05, - "loss": 0.1677, - "step": 67900 - }, - { - "epoch": 1.45, - "learning_rate": 1.0329178014976212e-05, - "loss": 0.1663, - "step": 68000 - }, - { - "epoch": 1.45, - "learning_rate": 1.0314955590479509e-05, - "loss": 0.1727, - "step": 68100 - }, - { - "epoch": 1.45, - "learning_rate": 1.0300733165982806e-05, - "loss": 0.1672, - "step": 68200 - }, - { - "epoch": 1.46, - "learning_rate": 1.0286510741486102e-05, - "loss": 0.157, - "step": 68300 - }, - { - "epoch": 1.46, - "learning_rate": 1.0272288316989399e-05, - "loss": 0.1694, - "step": 68400 - }, - { - "epoch": 1.46, - "learning_rate": 1.0258065892492694e-05, - "loss": 0.1534, - "step": 68500 - }, - { - "epoch": 1.46, - "learning_rate": 1.024384346799599e-05, - "loss": 0.1748, - "step": 68600 - }, - { - "epoch": 1.47, - "learning_rate": 1.0229621043499287e-05, - "loss": 0.1788, - "step": 68700 - }, - { - "epoch": 1.47, - "learning_rate": 1.021539861900258e-05, - "loss": 0.1577, - "step": 68800 - }, - { - "epoch": 1.47, - "learning_rate": 1.0201176194505877e-05, - "loss": 0.1552, - "step": 68900 - }, - { - "epoch": 1.47, - "learning_rate": 1.0186953770009174e-05, - "loss": 0.1551, - "step": 69000 - }, - { - "epoch": 1.47, - "learning_rate": 1.0172731345512469e-05, - "loss": 0.1617, - "step": 69100 - }, - { - "epoch": 1.48, - "learning_rate": 1.0158508921015766e-05, - "loss": 0.158, - "step": 69200 - }, - { - "epoch": 1.48, - "learning_rate": 1.0144286496519062e-05, - "loss": 0.1581, - "step": 69300 - }, - { - "epoch": 1.48, - "learning_rate": 1.0130064072022359e-05, - "loss": 0.162, - "step": 69400 - }, - { - "epoch": 1.48, - "learning_rate": 1.0115841647525656e-05, - "loss": 0.168, - "step": 69500 - }, - { - "epoch": 1.48, - "learning_rate": 1.010161922302895e-05, - "loss": 0.157, - "step": 69600 - }, - { - "epoch": 1.49, - "learning_rate": 1.0087396798532247e-05, - "loss": 0.1485, - "step": 69700 - }, - { - "epoch": 1.49, - "learning_rate": 1.0073174374035544e-05, - "loss": 0.1569, - "step": 69800 - }, - { - "epoch": 1.49, - "learning_rate": 1.0058951949538837e-05, - "loss": 0.1596, - "step": 69900 - }, - { - "epoch": 1.49, - "learning_rate": 1.0044729525042134e-05, - "loss": 0.1622, - "step": 70000 - }, - { - "epoch": 1.5, - "learning_rate": 1.003050710054543e-05, - "loss": 0.1471, - "step": 70100 - }, - { - "epoch": 1.5, - "learning_rate": 1.0016284676048727e-05, - "loss": 0.1538, - "step": 70200 - }, - { - "epoch": 1.5, - "learning_rate": 1.0002062251552022e-05, - "loss": 0.1546, - "step": 70300 - }, - { - "epoch": 1.5, - "learning_rate": 9.987839827055319e-06, - "loss": 0.1559, - "step": 70400 - }, - { - "epoch": 1.5, - "learning_rate": 9.973617402558616e-06, - "loss": 0.163, - "step": 70500 - }, - { - "epoch": 1.51, - "learning_rate": 9.959394978061912e-06, - "loss": 0.1602, - "step": 70600 - }, - { - "epoch": 1.51, - "learning_rate": 9.945172553565207e-06, - "loss": 0.1563, - "step": 70700 - }, - { - "epoch": 1.51, - "learning_rate": 9.930950129068502e-06, - "loss": 0.1585, - "step": 70800 - }, - { - "epoch": 1.51, - "learning_rate": 9.916727704571799e-06, - "loss": 0.1653, - "step": 70900 - }, - { - "epoch": 1.51, - "learning_rate": 9.902505280075096e-06, - "loss": 0.158, - "step": 71000 - }, - { - "epoch": 1.52, - "learning_rate": 9.888282855578392e-06, - "loss": 0.1555, - "step": 71100 - }, - { - "epoch": 1.52, - "learning_rate": 9.874060431081687e-06, - "loss": 0.1613, - "step": 71200 - }, - { - "epoch": 1.52, - "learning_rate": 9.859838006584984e-06, - "loss": 0.1569, - "step": 71300 - }, - { - "epoch": 1.52, - "learning_rate": 9.845615582088279e-06, - "loss": 0.1606, - "step": 71400 - }, - { - "epoch": 1.53, - "learning_rate": 9.831393157591576e-06, - "loss": 0.1606, - "step": 71500 - }, - { - "epoch": 1.53, - "learning_rate": 9.817170733094872e-06, - "loss": 0.1609, - "step": 71600 - }, - { - "epoch": 1.53, - "learning_rate": 9.802948308598169e-06, - "loss": 0.1533, - "step": 71700 - }, - { - "epoch": 1.53, - "learning_rate": 9.788725884101464e-06, - "loss": 0.1619, - "step": 71800 - }, - { - "epoch": 1.53, - "learning_rate": 9.774503459604759e-06, - "loss": 0.1546, - "step": 71900 - }, - { - "epoch": 1.54, - "learning_rate": 9.760281035108055e-06, - "loss": 0.1538, - "step": 72000 - }, - { - "epoch": 1.54, - "learning_rate": 9.746058610611352e-06, - "loss": 0.1534, - "step": 72100 - }, - { - "epoch": 1.54, - "learning_rate": 9.731836186114649e-06, - "loss": 0.1589, - "step": 72200 - }, - { - "epoch": 1.54, - "learning_rate": 9.717613761617944e-06, - "loss": 0.1648, - "step": 72300 - }, - { - "epoch": 1.54, - "learning_rate": 9.70339133712124e-06, - "loss": 0.1531, - "step": 72400 - }, - { - "epoch": 1.55, - "learning_rate": 9.689168912624535e-06, - "loss": 0.1569, - "step": 72500 - }, - { - "epoch": 1.55, - "learning_rate": 9.674946488127832e-06, - "loss": 0.1495, - "step": 72600 - }, - { - "epoch": 1.55, - "learning_rate": 9.660724063631129e-06, - "loss": 0.1564, - "step": 72700 - }, - { - "epoch": 1.55, - "learning_rate": 9.646501639134424e-06, - "loss": 0.1664, - "step": 72800 - }, - { - "epoch": 1.56, - "learning_rate": 9.63227921463772e-06, - "loss": 0.1573, - "step": 72900 - }, - { - "epoch": 1.56, - "learning_rate": 9.618056790141015e-06, - "loss": 0.1539, - "step": 73000 - }, - { - "epoch": 1.56, - "learning_rate": 9.603834365644312e-06, - "loss": 0.1557, - "step": 73100 - }, - { - "epoch": 1.56, - "learning_rate": 9.589611941147609e-06, - "loss": 0.152, - "step": 73200 - }, - { - "epoch": 1.56, - "learning_rate": 9.575389516650905e-06, - "loss": 0.1604, - "step": 73300 - }, - { - "epoch": 1.57, - "learning_rate": 9.5611670921542e-06, - "loss": 0.1437, - "step": 73400 - }, - { - "epoch": 1.57, - "learning_rate": 9.546944667657497e-06, - "loss": 0.152, - "step": 73500 - }, - { - "epoch": 1.57, - "learning_rate": 9.532722243160792e-06, - "loss": 0.1546, - "step": 73600 - }, - { - "epoch": 1.57, - "learning_rate": 9.518499818664089e-06, - "loss": 0.1448, - "step": 73700 - }, - { - "epoch": 1.57, - "learning_rate": 9.504277394167385e-06, - "loss": 0.152, - "step": 73800 - }, - { - "epoch": 1.58, - "learning_rate": 9.49005496967068e-06, - "loss": 0.1549, - "step": 73900 - }, - { - "epoch": 1.58, - "learning_rate": 9.475832545173977e-06, - "loss": 0.148, - "step": 74000 - }, - { - "epoch": 1.58, - "learning_rate": 9.461610120677272e-06, - "loss": 0.1441, - "step": 74100 - }, - { - "epoch": 1.58, - "learning_rate": 9.447387696180569e-06, - "loss": 0.1462, - "step": 74200 - }, - { - "epoch": 1.59, - "learning_rate": 9.433165271683865e-06, - "loss": 0.1459, - "step": 74300 - }, - { - "epoch": 1.59, - "learning_rate": 9.41894284718716e-06, - "loss": 0.1561, - "step": 74400 - }, - { - "epoch": 1.59, - "learning_rate": 9.404720422690457e-06, - "loss": 0.1598, - "step": 74500 - }, - { - "epoch": 1.59, - "learning_rate": 9.390497998193754e-06, - "loss": 0.1564, - "step": 74600 - }, - { - "epoch": 1.59, - "learning_rate": 9.376275573697049e-06, - "loss": 0.1552, - "step": 74700 - }, - { - "epoch": 1.6, - "learning_rate": 9.362053149200345e-06, - "loss": 0.1442, - "step": 74800 - }, - { - "epoch": 1.6, - "learning_rate": 9.347830724703642e-06, - "loss": 0.1589, - "step": 74900 - }, - { - "epoch": 1.6, - "learning_rate": 9.333608300206937e-06, - "loss": 0.1598, - "step": 75000 - }, - { - "epoch": 1.6, - "learning_rate": 9.319385875710234e-06, - "loss": 0.1484, - "step": 75100 - }, - { - "epoch": 1.6, - "learning_rate": 9.305163451213528e-06, - "loss": 0.1469, - "step": 75200 - }, - { - "epoch": 1.61, - "learning_rate": 9.290941026716825e-06, - "loss": 0.1463, - "step": 75300 - }, - { - "epoch": 1.61, - "learning_rate": 9.276718602220122e-06, - "loss": 0.1424, - "step": 75400 - }, - { - "epoch": 1.61, - "learning_rate": 9.262496177723417e-06, - "loss": 0.1551, - "step": 75500 - }, - { - "epoch": 1.61, - "learning_rate": 9.248273753226713e-06, - "loss": 0.1478, - "step": 75600 - }, - { - "epoch": 1.61, - "learning_rate": 9.23405132873001e-06, - "loss": 0.1316, - "step": 75700 - }, - { - "epoch": 1.62, - "learning_rate": 9.219828904233305e-06, - "loss": 0.1446, - "step": 75800 - }, - { - "epoch": 1.62, - "learning_rate": 9.205606479736602e-06, - "loss": 0.1384, - "step": 75900 - }, - { - "epoch": 1.62, - "learning_rate": 9.191384055239897e-06, - "loss": 0.1397, - "step": 76000 - }, - { - "epoch": 1.62, - "learning_rate": 9.177161630743193e-06, - "loss": 0.1472, - "step": 76100 - }, - { - "epoch": 1.63, - "learning_rate": 9.16293920624649e-06, - "loss": 0.1485, - "step": 76200 - }, - { - "epoch": 1.63, - "learning_rate": 9.148716781749785e-06, - "loss": 0.1543, - "step": 76300 - }, - { - "epoch": 1.63, - "learning_rate": 9.134494357253082e-06, - "loss": 0.146, - "step": 76400 - }, - { - "epoch": 1.63, - "learning_rate": 9.120271932756378e-06, - "loss": 0.1468, - "step": 76500 - }, - { - "epoch": 1.63, - "learning_rate": 9.106049508259673e-06, - "loss": 0.1497, - "step": 76600 - }, - { - "epoch": 1.64, - "learning_rate": 9.09182708376297e-06, - "loss": 0.1383, - "step": 76700 - }, - { - "epoch": 1.64, - "learning_rate": 9.077604659266267e-06, - "loss": 0.1493, - "step": 76800 - }, - { - "epoch": 1.64, - "learning_rate": 9.063382234769562e-06, - "loss": 0.1469, - "step": 76900 - }, - { - "epoch": 1.64, - "learning_rate": 9.049159810272858e-06, - "loss": 0.1442, - "step": 77000 - }, - { - "epoch": 1.64, - "learning_rate": 9.034937385776153e-06, - "loss": 0.1471, - "step": 77100 - }, - { - "epoch": 1.65, - "learning_rate": 9.02071496127945e-06, - "loss": 0.1365, - "step": 77200 - }, - { - "epoch": 1.65, - "learning_rate": 9.006492536782747e-06, - "loss": 0.1406, - "step": 77300 - }, - { - "epoch": 1.65, - "learning_rate": 8.992270112286042e-06, - "loss": 0.1412, - "step": 77400 - }, - { - "epoch": 1.65, - "learning_rate": 8.978047687789338e-06, - "loss": 0.1498, - "step": 77500 - }, - { - "epoch": 1.66, - "learning_rate": 8.963825263292633e-06, - "loss": 0.1559, - "step": 77600 - }, - { - "epoch": 1.66, - "learning_rate": 8.94960283879593e-06, - "loss": 0.1426, - "step": 77700 - }, - { - "epoch": 1.66, - "learning_rate": 8.935380414299227e-06, - "loss": 0.1364, - "step": 77800 - }, - { - "epoch": 1.66, - "learning_rate": 8.921157989802523e-06, - "loss": 0.1499, - "step": 77900 - }, - { - "epoch": 1.66, - "learning_rate": 8.906935565305818e-06, - "loss": 0.1437, - "step": 78000 - }, - { - "epoch": 1.67, - "learning_rate": 8.892713140809115e-06, - "loss": 0.1397, - "step": 78100 - }, - { - "epoch": 1.67, - "learning_rate": 8.87849071631241e-06, - "loss": 0.1479, - "step": 78200 - }, - { - "epoch": 1.67, - "learning_rate": 8.864268291815707e-06, - "loss": 0.144, - "step": 78300 - }, - { - "epoch": 1.67, - "learning_rate": 8.850045867319003e-06, - "loss": 0.1526, - "step": 78400 - }, - { - "epoch": 1.67, - "learning_rate": 8.835823442822298e-06, - "loss": 0.1407, - "step": 78500 - }, - { - "epoch": 1.68, - "learning_rate": 8.821601018325595e-06, - "loss": 0.136, - "step": 78600 - }, - { - "epoch": 1.68, - "learning_rate": 8.80737859382889e-06, - "loss": 0.1387, - "step": 78700 - }, - { - "epoch": 1.68, - "learning_rate": 8.793156169332186e-06, - "loss": 0.1487, - "step": 78800 - }, - { - "epoch": 1.68, - "learning_rate": 8.778933744835483e-06, - "loss": 0.1349, - "step": 78900 - }, - { - "epoch": 1.69, - "learning_rate": 8.76471132033878e-06, - "loss": 0.1447, - "step": 79000 - }, - { - "epoch": 1.69, - "learning_rate": 8.750488895842075e-06, - "loss": 0.1443, - "step": 79100 - }, - { - "epoch": 1.69, - "learning_rate": 8.73626647134537e-06, - "loss": 0.1322, - "step": 79200 - }, - { - "epoch": 1.69, - "learning_rate": 8.722044046848666e-06, - "loss": 0.1402, - "step": 79300 - }, - { - "epoch": 1.69, - "learning_rate": 8.707821622351963e-06, - "loss": 0.1429, - "step": 79400 - }, - { - "epoch": 1.7, - "learning_rate": 8.69359919785526e-06, - "loss": 0.1429, - "step": 79500 - }, - { - "epoch": 1.7, - "learning_rate": 8.679376773358555e-06, - "loss": 0.1336, - "step": 79600 - }, - { - "epoch": 1.7, - "learning_rate": 8.665154348861851e-06, - "loss": 0.1401, - "step": 79700 - }, - { - "epoch": 1.7, - "learning_rate": 8.650931924365146e-06, - "loss": 0.1442, - "step": 79800 - }, - { - "epoch": 1.7, - "learning_rate": 8.636709499868443e-06, - "loss": 0.1371, - "step": 79900 - }, - { - "epoch": 1.71, - "learning_rate": 8.62248707537174e-06, - "loss": 0.1363, - "step": 80000 - }, - { - "epoch": 1.71, - "learning_rate": 8.608264650875036e-06, - "loss": 0.1429, - "step": 80100 - }, - { - "epoch": 1.71, - "learning_rate": 8.594042226378331e-06, - "loss": 0.1332, - "step": 80200 - }, - { - "epoch": 1.71, - "learning_rate": 8.579819801881626e-06, - "loss": 0.1415, - "step": 80300 - }, - { - "epoch": 1.72, - "learning_rate": 8.565597377384923e-06, - "loss": 0.1379, - "step": 80400 - }, - { - "epoch": 1.72, - "learning_rate": 8.55137495288822e-06, - "loss": 0.1436, - "step": 80500 - }, - { - "epoch": 1.72, - "learning_rate": 8.537152528391516e-06, - "loss": 0.1323, - "step": 80600 - }, - { - "epoch": 1.72, - "learning_rate": 8.522930103894811e-06, - "loss": 0.1424, - "step": 80700 - }, - { - "epoch": 1.72, - "learning_rate": 8.508707679398108e-06, - "loss": 0.1467, - "step": 80800 - }, - { - "epoch": 1.73, - "learning_rate": 8.494485254901403e-06, - "loss": 0.1486, - "step": 80900 - }, - { - "epoch": 1.73, - "learning_rate": 8.4802628304047e-06, - "loss": 0.1442, - "step": 81000 - }, - { - "epoch": 1.73, - "learning_rate": 8.466040405907996e-06, - "loss": 0.1399, - "step": 81100 - }, - { - "epoch": 1.73, - "learning_rate": 8.451817981411293e-06, - "loss": 0.1378, - "step": 81200 - }, - { - "epoch": 1.73, - "learning_rate": 8.437595556914588e-06, - "loss": 0.1335, - "step": 81300 - }, - { - "epoch": 1.74, - "learning_rate": 8.423373132417883e-06, - "loss": 0.132, - "step": 81400 - }, - { - "epoch": 1.74, - "learning_rate": 8.40915070792118e-06, - "loss": 0.1364, - "step": 81500 - }, - { - "epoch": 1.74, - "learning_rate": 8.394928283424476e-06, - "loss": 0.1358, - "step": 81600 - }, - { - "epoch": 1.74, - "learning_rate": 8.380705858927773e-06, - "loss": 0.1353, - "step": 81700 - }, - { - "epoch": 1.75, - "learning_rate": 8.366483434431068e-06, - "loss": 0.1376, - "step": 81800 - }, - { - "epoch": 1.75, - "learning_rate": 8.352261009934365e-06, - "loss": 0.1308, - "step": 81900 - }, - { - "epoch": 1.75, - "learning_rate": 8.33803858543766e-06, - "loss": 0.1415, - "step": 82000 - }, - { - "epoch": 1.75, - "learning_rate": 8.323816160940956e-06, - "loss": 0.1305, - "step": 82100 - }, - { - "epoch": 1.75, - "learning_rate": 8.309593736444253e-06, - "loss": 0.1302, - "step": 82200 - }, - { - "epoch": 1.76, - "learning_rate": 8.29537131194755e-06, - "loss": 0.1409, - "step": 82300 - }, - { - "epoch": 1.76, - "learning_rate": 8.281148887450845e-06, - "loss": 0.1402, - "step": 82400 - }, - { - "epoch": 1.76, - "learning_rate": 8.26692646295414e-06, - "loss": 0.1458, - "step": 82500 - }, - { - "epoch": 1.76, - "learning_rate": 8.252704038457436e-06, - "loss": 0.1336, - "step": 82600 - }, - { - "epoch": 1.76, - "learning_rate": 8.238481613960733e-06, - "loss": 0.1375, - "step": 82700 - }, - { - "epoch": 1.77, - "learning_rate": 8.22425918946403e-06, - "loss": 0.1277, - "step": 82800 - }, - { - "epoch": 1.77, - "learning_rate": 8.210036764967324e-06, - "loss": 0.1283, - "step": 82900 - }, - { - "epoch": 1.77, - "learning_rate": 8.195814340470621e-06, - "loss": 0.1402, - "step": 83000 - }, - { - "epoch": 1.77, - "learning_rate": 8.181591915973916e-06, - "loss": 0.1412, - "step": 83100 - }, - { - "epoch": 1.77, - "learning_rate": 8.167369491477213e-06, - "loss": 0.1316, - "step": 83200 - }, - { - "epoch": 1.78, - "learning_rate": 8.15314706698051e-06, - "loss": 0.1424, - "step": 83300 - }, - { - "epoch": 1.78, - "learning_rate": 8.138924642483806e-06, - "loss": 0.1248, - "step": 83400 - }, - { - "epoch": 1.78, - "learning_rate": 8.124702217987101e-06, - "loss": 0.1267, - "step": 83500 - }, - { - "epoch": 1.78, - "learning_rate": 8.110479793490396e-06, - "loss": 0.1472, - "step": 83600 - }, - { - "epoch": 1.79, - "learning_rate": 8.096257368993693e-06, - "loss": 0.1256, - "step": 83700 - }, - { - "epoch": 1.79, - "learning_rate": 8.08203494449699e-06, - "loss": 0.1329, - "step": 83800 - }, - { - "epoch": 1.79, - "learning_rate": 8.067812520000286e-06, - "loss": 0.1387, - "step": 83900 - }, - { - "epoch": 1.79, - "learning_rate": 8.053590095503581e-06, - "loss": 0.1238, - "step": 84000 - }, - { - "epoch": 1.79, - "learning_rate": 8.039367671006878e-06, - "loss": 0.134, - "step": 84100 - }, - { - "epoch": 1.8, - "learning_rate": 8.025145246510173e-06, - "loss": 0.1317, - "step": 84200 - }, - { - "epoch": 1.8, - "learning_rate": 8.01092282201347e-06, - "loss": 0.1279, - "step": 84300 - }, - { - "epoch": 1.8, - "learning_rate": 7.996700397516766e-06, - "loss": 0.1263, - "step": 84400 - }, - { - "epoch": 1.8, - "learning_rate": 7.982477973020063e-06, - "loss": 0.1356, - "step": 84500 - }, - { - "epoch": 1.8, - "learning_rate": 7.968255548523358e-06, - "loss": 0.1308, - "step": 84600 - }, - { - "epoch": 1.81, - "learning_rate": 7.954033124026653e-06, - "loss": 0.1268, - "step": 84700 - }, - { - "epoch": 1.81, - "learning_rate": 7.93981069952995e-06, - "loss": 0.1383, - "step": 84800 - }, - { - "epoch": 1.81, - "learning_rate": 7.925588275033246e-06, - "loss": 0.1303, - "step": 84900 - }, - { - "epoch": 1.81, - "learning_rate": 7.911365850536543e-06, - "loss": 0.1277, - "step": 85000 - }, - { - "epoch": 1.82, - "learning_rate": 7.897143426039838e-06, - "loss": 0.1368, - "step": 85100 - }, - { - "epoch": 1.82, - "learning_rate": 7.882921001543134e-06, - "loss": 0.1273, - "step": 85200 - }, - { - "epoch": 1.82, - "learning_rate": 7.86869857704643e-06, - "loss": 0.1238, - "step": 85300 - }, - { - "epoch": 1.82, - "learning_rate": 7.854476152549726e-06, - "loss": 0.127, - "step": 85400 - }, - { - "epoch": 1.82, - "learning_rate": 7.840253728053023e-06, - "loss": 0.127, - "step": 85500 - }, - { - "epoch": 1.83, - "learning_rate": 7.826031303556318e-06, - "loss": 0.1369, - "step": 85600 - }, - { - "epoch": 1.83, - "learning_rate": 7.811808879059614e-06, - "loss": 0.1252, - "step": 85700 - }, - { - "epoch": 1.83, - "learning_rate": 7.79758645456291e-06, - "loss": 0.1266, - "step": 85800 - }, - { - "epoch": 1.83, - "learning_rate": 7.783364030066206e-06, - "loss": 0.1293, - "step": 85900 - }, - { - "epoch": 1.83, - "learning_rate": 7.769141605569503e-06, - "loss": 0.1243, - "step": 86000 - }, - { - "epoch": 1.84, - "learning_rate": 7.7549191810728e-06, - "loss": 0.119, - "step": 86100 - }, - { - "epoch": 1.84, - "learning_rate": 7.740696756576094e-06, - "loss": 0.1352, - "step": 86200 - }, - { - "epoch": 1.84, - "learning_rate": 7.72647433207939e-06, - "loss": 0.13, - "step": 86300 - }, - { - "epoch": 1.84, - "learning_rate": 7.712251907582686e-06, - "loss": 0.1244, - "step": 86400 - }, - { - "epoch": 1.85, - "learning_rate": 7.698029483085982e-06, - "loss": 0.1303, - "step": 86500 - }, - { - "epoch": 1.85, - "learning_rate": 7.683807058589279e-06, - "loss": 0.1297, - "step": 86600 - }, - { - "epoch": 1.85, - "learning_rate": 7.669584634092574e-06, - "loss": 0.1263, - "step": 86700 - }, - { - "epoch": 1.85, - "learning_rate": 7.65536220959587e-06, - "loss": 0.1252, - "step": 86800 - }, - { - "epoch": 1.85, - "learning_rate": 7.641139785099166e-06, - "loss": 0.1212, - "step": 86900 - }, - { - "epoch": 1.86, - "learning_rate": 7.626917360602462e-06, - "loss": 0.1251, - "step": 87000 - }, - { - "epoch": 1.86, - "learning_rate": 7.612694936105759e-06, - "loss": 0.1408, - "step": 87100 - }, - { - "epoch": 1.86, - "learning_rate": 7.598472511609054e-06, - "loss": 0.1309, - "step": 87200 - }, - { - "epoch": 1.86, - "learning_rate": 7.584250087112351e-06, - "loss": 0.1214, - "step": 87300 - }, - { - "epoch": 1.86, - "learning_rate": 7.5700276626156465e-06, - "loss": 0.1373, - "step": 87400 - }, - { - "epoch": 1.87, - "learning_rate": 7.555805238118943e-06, - "loss": 0.1156, - "step": 87500 - }, - { - "epoch": 1.87, - "learning_rate": 7.541582813622239e-06, - "loss": 0.1355, - "step": 87600 - }, - { - "epoch": 1.87, - "learning_rate": 7.527360389125536e-06, - "loss": 0.1304, - "step": 87700 - }, - { - "epoch": 1.87, - "learning_rate": 7.513137964628831e-06, - "loss": 0.1155, - "step": 87800 - }, - { - "epoch": 1.88, - "learning_rate": 7.4989155401321265e-06, - "loss": 0.129, - "step": 87900 - }, - { - "epoch": 1.88, - "learning_rate": 7.484693115635423e-06, - "loss": 0.1157, - "step": 88000 - }, - { - "epoch": 1.88, - "learning_rate": 7.470470691138719e-06, - "loss": 0.1363, - "step": 88100 - }, - { - "epoch": 1.88, - "learning_rate": 7.456248266642016e-06, - "loss": 0.1218, - "step": 88200 - }, - { - "epoch": 1.88, - "learning_rate": 7.442025842145311e-06, - "loss": 0.1203, - "step": 88300 - }, - { - "epoch": 1.89, - "learning_rate": 7.427803417648607e-06, - "loss": 0.1235, - "step": 88400 - }, - { - "epoch": 1.89, - "learning_rate": 7.413580993151903e-06, - "loss": 0.1322, - "step": 88500 - }, - { - "epoch": 1.89, - "learning_rate": 7.3993585686552e-06, - "loss": 0.1358, - "step": 88600 - }, - { - "epoch": 1.89, - "learning_rate": 7.385136144158496e-06, - "loss": 0.1335, - "step": 88700 - }, - { - "epoch": 1.89, - "learning_rate": 7.3709137196617906e-06, - "loss": 0.1176, - "step": 88800 - }, - { - "epoch": 1.9, - "learning_rate": 7.356691295165087e-06, - "loss": 0.1216, - "step": 88900 - }, - { - "epoch": 1.9, - "learning_rate": 7.342468870668383e-06, - "loss": 0.1271, - "step": 89000 - }, - { - "epoch": 1.9, - "learning_rate": 7.32824644617168e-06, - "loss": 0.1285, - "step": 89100 - }, - { - "epoch": 1.9, - "learning_rate": 7.3140240216749755e-06, - "loss": 0.1163, - "step": 89200 - }, - { - "epoch": 1.91, - "learning_rate": 7.299801597178272e-06, - "loss": 0.1207, - "step": 89300 - }, - { - "epoch": 1.91, - "learning_rate": 7.285579172681567e-06, - "loss": 0.1171, - "step": 89400 - }, - { - "epoch": 1.91, - "learning_rate": 7.271356748184864e-06, - "loss": 0.1144, - "step": 89500 - }, - { - "epoch": 1.91, - "learning_rate": 7.25713432368816e-06, - "loss": 0.1155, - "step": 89600 - }, - { - "epoch": 1.91, - "learning_rate": 7.242911899191456e-06, - "loss": 0.1208, - "step": 89700 - }, - { - "epoch": 1.92, - "learning_rate": 7.228689474694752e-06, - "loss": 0.1169, - "step": 89800 - }, - { - "epoch": 1.92, - "learning_rate": 7.214467050198047e-06, - "loss": 0.1195, - "step": 89900 - }, - { - "epoch": 1.92, - "learning_rate": 7.200244625701344e-06, - "loss": 0.1207, - "step": 90000 - }, - { - "epoch": 1.92, - "learning_rate": 7.18602220120464e-06, - "loss": 0.1251, - "step": 90100 - }, - { - "epoch": 1.92, - "learning_rate": 7.171799776707936e-06, - "loss": 0.1208, - "step": 90200 - }, - { - "epoch": 1.93, - "learning_rate": 7.157577352211232e-06, - "loss": 0.1189, - "step": 90300 - }, - { - "epoch": 1.93, - "learning_rate": 7.143354927714528e-06, - "loss": 0.1312, - "step": 90400 - }, - { - "epoch": 1.93, - "learning_rate": 7.129132503217824e-06, - "loss": 0.1124, - "step": 90500 - }, - { - "epoch": 1.93, - "learning_rate": 7.11491007872112e-06, - "loss": 0.1194, - "step": 90600 - }, - { - "epoch": 1.93, - "learning_rate": 7.100687654224416e-06, - "loss": 0.1244, - "step": 90700 - }, - { - "epoch": 1.94, - "learning_rate": 7.086465229727713e-06, - "loss": 0.1185, - "step": 90800 - }, - { - "epoch": 1.94, - "learning_rate": 7.072242805231009e-06, - "loss": 0.1165, - "step": 90900 - }, - { - "epoch": 1.94, - "learning_rate": 7.058020380734304e-06, - "loss": 0.1173, - "step": 91000 - }, - { - "epoch": 1.94, - "learning_rate": 7.0437979562376e-06, - "loss": 0.1099, - "step": 91100 - }, - { - "epoch": 1.95, - "learning_rate": 7.029575531740896e-06, - "loss": 0.1213, - "step": 91200 - }, - { - "epoch": 1.95, - "learning_rate": 7.015353107244193e-06, - "loss": 0.1138, - "step": 91300 - }, - { - "epoch": 1.95, - "learning_rate": 7.001130682747489e-06, - "loss": 0.1109, - "step": 91400 - }, - { - "epoch": 1.95, - "learning_rate": 6.9869082582507845e-06, - "loss": 0.1118, - "step": 91500 - }, - { - "epoch": 1.95, - "learning_rate": 6.97268583375408e-06, - "loss": 0.1275, - "step": 91600 - }, - { - "epoch": 1.96, - "learning_rate": 6.958463409257377e-06, - "loss": 0.1169, - "step": 91700 - }, - { - "epoch": 1.96, - "learning_rate": 6.944240984760673e-06, - "loss": 0.1144, - "step": 91800 - }, - { - "epoch": 1.96, - "learning_rate": 6.9300185602639695e-06, - "loss": 0.1136, - "step": 91900 - }, - { - "epoch": 1.96, - "learning_rate": 6.9157961357672645e-06, - "loss": 0.1198, - "step": 92000 - }, - { - "epoch": 1.96, - "learning_rate": 6.90157371127056e-06, - "loss": 0.1079, - "step": 92100 - }, - { - "epoch": 1.97, - "learning_rate": 6.887351286773857e-06, - "loss": 0.1153, - "step": 92200 - }, - { - "epoch": 1.97, - "learning_rate": 6.873128862277153e-06, - "loss": 0.1128, - "step": 92300 - }, - { - "epoch": 1.97, - "learning_rate": 6.8589064377804494e-06, - "loss": 0.126, - "step": 92400 - }, - { - "epoch": 1.97, - "learning_rate": 6.844684013283745e-06, - "loss": 0.1188, - "step": 92500 - }, - { - "epoch": 1.98, - "learning_rate": 6.830461588787041e-06, - "loss": 0.1118, - "step": 92600 - }, - { - "epoch": 1.98, - "learning_rate": 6.816239164290337e-06, - "loss": 0.1129, - "step": 92700 - }, - { - "epoch": 1.98, - "learning_rate": 6.8020167397936336e-06, - "loss": 0.1102, - "step": 92800 - }, - { - "epoch": 1.98, - "learning_rate": 6.787794315296929e-06, - "loss": 0.1171, - "step": 92900 - }, - { - "epoch": 1.98, - "learning_rate": 6.773571890800226e-06, - "loss": 0.1087, - "step": 93000 - }, - { - "epoch": 1.99, - "learning_rate": 6.759349466303521e-06, - "loss": 0.1193, - "step": 93100 - }, - { - "epoch": 1.99, - "learning_rate": 6.745127041806817e-06, - "loss": 0.1039, - "step": 93200 - }, - { - "epoch": 1.99, - "learning_rate": 6.7309046173101135e-06, - "loss": 0.1098, - "step": 93300 - }, - { - "epoch": 1.99, - "learning_rate": 6.716682192813409e-06, - "loss": 0.1228, - "step": 93400 - }, - { - "epoch": 1.99, - "learning_rate": 6.702459768316706e-06, - "loss": 0.1096, - "step": 93500 - }, - { - "epoch": 2.0, - "learning_rate": 6.688237343820001e-06, - "loss": 0.1125, - "step": 93600 - }, - { - "epoch": 2.0, - "learning_rate": 6.674014919323298e-06, - "loss": 0.1111, - "step": 93700 - }, - { - "epoch": 2.0, - "learning_rate": 6.6597924948265935e-06, - "loss": 0.0934, - "step": 93800 - }, - { - "epoch": 2.0, - "learning_rate": 6.64557007032989e-06, - "loss": 0.0468, - "step": 93900 - }, - { - "epoch": 2.01, - "learning_rate": 6.631347645833186e-06, - "loss": 0.0639, - "step": 94000 - }, - { - "epoch": 2.01, - "learning_rate": 6.617125221336483e-06, - "loss": 0.0554, - "step": 94100 - }, - { - "epoch": 2.01, - "learning_rate": 6.602902796839778e-06, - "loss": 0.0583, - "step": 94200 - }, - { - "epoch": 2.01, - "learning_rate": 6.588680372343073e-06, - "loss": 0.0497, - "step": 94300 - }, - { - "epoch": 2.01, - "learning_rate": 6.57445794784637e-06, - "loss": 0.0507, - "step": 94400 - }, - { - "epoch": 2.02, - "learning_rate": 6.560235523349666e-06, - "loss": 0.0503, - "step": 94500 - }, - { - "epoch": 2.02, - "learning_rate": 6.5460130988529626e-06, - "loss": 0.0513, - "step": 94600 - }, - { - "epoch": 2.02, - "learning_rate": 6.5317906743562575e-06, - "loss": 0.0495, - "step": 94700 - }, - { - "epoch": 2.02, - "learning_rate": 6.517568249859554e-06, - "loss": 0.053, - "step": 94800 - }, - { - "epoch": 2.02, - "learning_rate": 6.50334582536285e-06, - "loss": 0.0556, - "step": 94900 - }, - { - "epoch": 2.03, - "learning_rate": 6.489123400866147e-06, - "loss": 0.0551, - "step": 95000 - }, - { - "epoch": 2.03, - "learning_rate": 6.4749009763694425e-06, - "loss": 0.0572, - "step": 95100 - }, - { - "epoch": 2.03, - "learning_rate": 6.4606785518727375e-06, - "loss": 0.045, - "step": 95200 - }, - { - "epoch": 2.03, - "learning_rate": 6.446456127376034e-06, - "loss": 0.0542, - "step": 95300 - }, - { - "epoch": 2.04, - "learning_rate": 6.43223370287933e-06, - "loss": 0.0582, - "step": 95400 - }, - { - "epoch": 2.04, - "learning_rate": 6.418011278382627e-06, - "loss": 0.0503, - "step": 95500 - }, - { - "epoch": 2.04, - "learning_rate": 6.4037888538859225e-06, - "loss": 0.0554, - "step": 95600 - }, - { - "epoch": 2.04, - "learning_rate": 6.389566429389219e-06, - "loss": 0.0539, - "step": 95700 - }, - { - "epoch": 2.04, - "learning_rate": 6.375344004892514e-06, - "loss": 0.0525, - "step": 95800 - }, - { - "epoch": 2.05, - "learning_rate": 6.361121580395811e-06, - "loss": 0.0511, - "step": 95900 - }, - { - "epoch": 2.05, - "learning_rate": 6.346899155899107e-06, - "loss": 0.0552, - "step": 96000 - }, - { - "epoch": 2.05, - "learning_rate": 6.332676731402403e-06, - "loss": 0.0512, - "step": 96100 - }, - { - "epoch": 2.05, - "learning_rate": 6.318454306905699e-06, - "loss": 0.0543, - "step": 96200 - }, - { - "epoch": 2.05, - "learning_rate": 6.304231882408994e-06, - "loss": 0.0528, - "step": 96300 - }, - { - "epoch": 2.06, - "learning_rate": 6.290009457912291e-06, - "loss": 0.061, - "step": 96400 - }, - { - "epoch": 2.06, - "learning_rate": 6.2757870334155865e-06, - "loss": 0.055, - "step": 96500 - }, - { - "epoch": 2.06, - "learning_rate": 6.261564608918883e-06, - "loss": 0.053, - "step": 96600 - }, - { - "epoch": 2.06, - "learning_rate": 6.247342184422179e-06, - "loss": 0.0506, - "step": 96700 - }, - { - "epoch": 2.07, - "learning_rate": 6.233119759925475e-06, - "loss": 0.0522, - "step": 96800 - }, - { - "epoch": 2.07, - "learning_rate": 6.218897335428771e-06, - "loss": 0.0429, - "step": 96900 - }, - { - "epoch": 2.07, - "learning_rate": 6.204674910932067e-06, - "loss": 0.0446, - "step": 97000 - }, - { - "epoch": 2.07, - "learning_rate": 6.190452486435363e-06, - "loss": 0.0488, - "step": 97100 - }, - { - "epoch": 2.07, - "learning_rate": 6.17623006193866e-06, - "loss": 0.055, - "step": 97200 - }, - { - "epoch": 2.08, - "learning_rate": 6.162007637441956e-06, - "loss": 0.0558, - "step": 97300 - }, - { - "epoch": 2.08, - "learning_rate": 6.147785212945251e-06, - "loss": 0.0575, - "step": 97400 - }, - { - "epoch": 2.08, - "learning_rate": 6.133562788448547e-06, - "loss": 0.0616, - "step": 97500 - }, - { - "epoch": 2.08, - "learning_rate": 6.119340363951843e-06, - "loss": 0.0538, - "step": 97600 - }, - { - "epoch": 2.08, - "learning_rate": 6.10511793945514e-06, - "loss": 0.0578, - "step": 97700 - }, - { - "epoch": 2.09, - "learning_rate": 6.090895514958436e-06, - "loss": 0.0512, - "step": 97800 - }, - { - "epoch": 2.09, - "learning_rate": 6.076673090461731e-06, - "loss": 0.0522, - "step": 97900 - }, - { - "epoch": 2.09, - "learning_rate": 6.062450665965027e-06, - "loss": 0.0566, - "step": 98000 - }, - { - "epoch": 2.09, - "learning_rate": 6.048228241468324e-06, - "loss": 0.0532, - "step": 98100 - }, - { - "epoch": 2.09, - "learning_rate": 6.03400581697162e-06, - "loss": 0.0493, - "step": 98200 - }, - { - "epoch": 2.1, - "learning_rate": 6.019783392474916e-06, - "loss": 0.0523, - "step": 98300 - }, - { - "epoch": 2.1, - "learning_rate": 6.005560967978211e-06, - "loss": 0.0561, - "step": 98400 - }, - { - "epoch": 2.1, - "learning_rate": 5.991338543481507e-06, - "loss": 0.0557, - "step": 98500 - }, - { - "epoch": 2.1, - "learning_rate": 5.977116118984804e-06, - "loss": 0.0471, - "step": 98600 - }, - { - "epoch": 2.11, - "learning_rate": 5.9628936944881e-06, - "loss": 0.0547, - "step": 98700 - }, - { - "epoch": 2.11, - "learning_rate": 5.948671269991396e-06, - "loss": 0.0456, - "step": 98800 - }, - { - "epoch": 2.11, - "learning_rate": 5.934448845494692e-06, - "loss": 0.0537, - "step": 98900 - }, - { - "epoch": 2.11, - "learning_rate": 5.920226420997988e-06, - "loss": 0.0531, - "step": 99000 - }, - { - "epoch": 2.11, - "learning_rate": 5.906003996501284e-06, - "loss": 0.054, - "step": 99100 - }, - { - "epoch": 2.12, - "learning_rate": 5.8917815720045805e-06, - "loss": 0.0592, - "step": 99200 - }, - { - "epoch": 2.12, - "learning_rate": 5.877559147507876e-06, - "loss": 0.0549, - "step": 99300 - }, - { - "epoch": 2.12, - "learning_rate": 5.863336723011173e-06, - "loss": 0.0461, - "step": 99400 - }, - { - "epoch": 2.12, - "learning_rate": 5.849114298514468e-06, - "loss": 0.0548, - "step": 99500 - }, - { - "epoch": 2.12, - "learning_rate": 5.834891874017764e-06, - "loss": 0.047, - "step": 99600 - }, - { - "epoch": 2.13, - "learning_rate": 5.82066944952106e-06, - "loss": 0.047, - "step": 99700 - }, - { - "epoch": 2.13, - "learning_rate": 5.806447025024356e-06, - "loss": 0.0516, - "step": 99800 - }, - { - "epoch": 2.13, - "learning_rate": 5.792224600527653e-06, - "loss": 0.0496, - "step": 99900 - }, - { - "epoch": 2.13, - "learning_rate": 5.778002176030948e-06, - "loss": 0.058, - "step": 100000 - }, - { - "epoch": 2.14, - "learning_rate": 5.7637797515342445e-06, - "loss": 0.0506, - "step": 100100 - }, - { - "epoch": 2.14, - "learning_rate": 5.74955732703754e-06, - "loss": 0.0517, - "step": 100200 - }, - { - "epoch": 2.14, - "learning_rate": 5.735334902540837e-06, - "loss": 0.0578, - "step": 100300 - }, - { - "epoch": 2.14, - "learning_rate": 5.721112478044133e-06, - "loss": 0.0535, - "step": 100400 - }, - { - "epoch": 2.14, - "learning_rate": 5.7068900535474295e-06, - "loss": 0.057, - "step": 100500 - }, - { - "epoch": 2.15, - "learning_rate": 5.6926676290507245e-06, - "loss": 0.0576, - "step": 100600 - }, - { - "epoch": 2.15, - "learning_rate": 5.67844520455402e-06, - "loss": 0.0484, - "step": 100700 - }, - { - "epoch": 2.15, - "learning_rate": 5.664222780057317e-06, - "loss": 0.0527, - "step": 100800 - }, - { - "epoch": 2.15, - "learning_rate": 5.650000355560613e-06, - "loss": 0.0521, - "step": 100900 - }, - { - "epoch": 2.15, - "learning_rate": 5.6357779310639095e-06, - "loss": 0.0485, - "step": 101000 - }, - { - "epoch": 2.16, - "learning_rate": 5.6215555065672044e-06, - "loss": 0.0486, - "step": 101100 - }, - { - "epoch": 2.16, - "learning_rate": 5.607333082070501e-06, - "loss": 0.0503, - "step": 101200 - }, - { - "epoch": 2.16, - "learning_rate": 5.593110657573797e-06, - "loss": 0.0502, - "step": 101300 - }, - { - "epoch": 2.16, - "learning_rate": 5.578888233077094e-06, - "loss": 0.0485, - "step": 101400 - }, - { - "epoch": 2.17, - "learning_rate": 5.564665808580389e-06, - "loss": 0.0544, - "step": 101500 - }, - { - "epoch": 2.17, - "learning_rate": 5.550443384083684e-06, - "loss": 0.0465, - "step": 101600 - }, - { - "epoch": 2.17, - "learning_rate": 5.536220959586981e-06, - "loss": 0.0545, - "step": 101700 - }, - { - "epoch": 2.17, - "learning_rate": 5.521998535090277e-06, - "loss": 0.0474, - "step": 101800 - }, - { - "epoch": 2.17, - "learning_rate": 5.5077761105935736e-06, - "loss": 0.051, - "step": 101900 - }, - { - "epoch": 2.18, - "learning_rate": 5.493553686096869e-06, - "loss": 0.0577, - "step": 102000 - }, - { - "epoch": 2.18, - "learning_rate": 5.479331261600166e-06, - "loss": 0.0482, - "step": 102100 - }, - { - "epoch": 2.18, - "learning_rate": 5.465108837103461e-06, - "loss": 0.0569, - "step": 102200 - }, - { - "epoch": 2.18, - "learning_rate": 5.450886412606758e-06, - "loss": 0.0625, - "step": 102300 - }, - { - "epoch": 2.18, - "learning_rate": 5.4366639881100535e-06, - "loss": 0.038, - "step": 102400 - }, - { - "epoch": 2.19, - "learning_rate": 5.42244156361335e-06, - "loss": 0.0436, - "step": 102500 - }, - { - "epoch": 2.19, - "learning_rate": 5.408219139116646e-06, - "loss": 0.0533, - "step": 102600 - }, - { - "epoch": 2.19, - "learning_rate": 5.393996714619941e-06, - "loss": 0.0443, - "step": 102700 - }, - { - "epoch": 2.19, - "learning_rate": 5.379774290123238e-06, - "loss": 0.0546, - "step": 102800 - }, - { - "epoch": 2.2, - "learning_rate": 5.3655518656265335e-06, - "loss": 0.0486, - "step": 102900 - }, - { - "epoch": 2.2, - "learning_rate": 5.35132944112983e-06, - "loss": 0.0512, - "step": 103000 - }, - { - "epoch": 2.2, - "learning_rate": 5.337107016633126e-06, - "loss": 0.0511, - "step": 103100 - }, - { - "epoch": 2.2, - "learning_rate": 5.322884592136422e-06, - "loss": 0.0528, - "step": 103200 - }, - { - "epoch": 2.2, - "learning_rate": 5.308662167639718e-06, - "loss": 0.0562, - "step": 103300 - }, - { - "epoch": 2.21, - "learning_rate": 5.294439743143014e-06, - "loss": 0.0519, - "step": 103400 - }, - { - "epoch": 2.21, - "learning_rate": 5.28021731864631e-06, - "loss": 0.0491, - "step": 103500 - }, - { - "epoch": 2.21, - "learning_rate": 5.265994894149607e-06, - "loss": 0.0494, - "step": 103600 - }, - { - "epoch": 2.21, - "learning_rate": 5.2517724696529026e-06, - "loss": 0.0501, - "step": 103700 - }, - { - "epoch": 2.21, - "learning_rate": 5.2375500451561975e-06, - "loss": 0.0487, - "step": 103800 - }, - { - "epoch": 2.22, - "learning_rate": 5.223327620659494e-06, - "loss": 0.0492, - "step": 103900 - }, - { - "epoch": 2.22, - "learning_rate": 5.20910519616279e-06, - "loss": 0.0499, - "step": 104000 - }, - { - "epoch": 2.22, - "learning_rate": 5.194882771666087e-06, - "loss": 0.049, - "step": 104100 - }, - { - "epoch": 2.22, - "learning_rate": 5.1806603471693825e-06, - "loss": 0.0502, - "step": 104200 - }, - { - "epoch": 2.23, - "learning_rate": 5.166437922672678e-06, - "loss": 0.0489, - "step": 104300 - }, - { - "epoch": 2.23, - "learning_rate": 5.152215498175974e-06, - "loss": 0.051, - "step": 104400 - }, - { - "epoch": 2.23, - "learning_rate": 5.137993073679271e-06, - "loss": 0.0517, - "step": 104500 - }, - { - "epoch": 2.23, - "learning_rate": 5.123770649182567e-06, - "loss": 0.0571, - "step": 104600 - }, - { - "epoch": 2.23, - "learning_rate": 5.109548224685863e-06, - "loss": 0.0458, - "step": 104700 - }, - { - "epoch": 2.24, - "learning_rate": 5.095325800189158e-06, - "loss": 0.0533, - "step": 104800 - }, - { - "epoch": 2.24, - "learning_rate": 5.081103375692454e-06, - "loss": 0.0452, - "step": 104900 - }, - { - "epoch": 2.24, - "learning_rate": 5.066880951195751e-06, - "loss": 0.044, - "step": 105000 - }, - { - "epoch": 2.24, - "learning_rate": 5.052658526699047e-06, - "loss": 0.051, - "step": 105100 - }, - { - "epoch": 2.24, - "learning_rate": 5.038436102202343e-06, - "loss": 0.0603, - "step": 105200 - }, - { - "epoch": 2.25, - "learning_rate": 5.024213677705639e-06, - "loss": 0.0546, - "step": 105300 - }, - { - "epoch": 2.25, - "learning_rate": 5.009991253208935e-06, - "loss": 0.0492, - "step": 105400 - }, - { - "epoch": 2.25, - "learning_rate": 4.995768828712231e-06, - "loss": 0.0537, - "step": 105500 - }, - { - "epoch": 2.25, - "learning_rate": 4.981546404215527e-06, - "loss": 0.059, - "step": 105600 - }, - { - "epoch": 2.25, - "learning_rate": 4.967323979718823e-06, - "loss": 0.0485, - "step": 105700 - }, - { - "epoch": 2.26, - "learning_rate": 4.953101555222119e-06, - "loss": 0.0495, - "step": 105800 - }, - { - "epoch": 2.26, - "learning_rate": 4.938879130725415e-06, - "loss": 0.0453, - "step": 105900 - }, - { - "epoch": 2.26, - "learning_rate": 4.924656706228711e-06, - "loss": 0.0504, - "step": 106000 - }, - { - "epoch": 2.26, - "learning_rate": 4.910434281732007e-06, - "loss": 0.0527, - "step": 106100 - }, - { - "epoch": 2.27, - "learning_rate": 4.896211857235303e-06, - "loss": 0.0499, - "step": 106200 - }, - { - "epoch": 2.27, - "learning_rate": 4.881989432738599e-06, - "loss": 0.0482, - "step": 106300 - }, - { - "epoch": 2.27, - "learning_rate": 4.867767008241896e-06, - "loss": 0.0476, - "step": 106400 - }, - { - "epoch": 2.27, - "learning_rate": 4.8535445837451915e-06, - "loss": 0.0641, - "step": 106500 - }, - { - "epoch": 2.27, - "learning_rate": 4.839322159248487e-06, - "loss": 0.0522, - "step": 106600 - }, - { - "epoch": 2.28, - "learning_rate": 4.825099734751784e-06, - "loss": 0.0474, - "step": 106700 - }, - { - "epoch": 2.28, - "learning_rate": 4.810877310255079e-06, - "loss": 0.054, - "step": 106800 - }, - { - "epoch": 2.28, - "learning_rate": 4.796654885758376e-06, - "loss": 0.0478, - "step": 106900 - }, - { - "epoch": 2.28, - "learning_rate": 4.782432461261671e-06, - "loss": 0.0462, - "step": 107000 - }, - { - "epoch": 2.28, - "learning_rate": 4.768210036764967e-06, - "loss": 0.0525, - "step": 107100 - }, - { - "epoch": 2.29, - "learning_rate": 4.753987612268264e-06, - "loss": 0.0449, - "step": 107200 - }, - { - "epoch": 2.29, - "learning_rate": 4.73976518777156e-06, - "loss": 0.0508, - "step": 107300 - }, - { - "epoch": 2.29, - "learning_rate": 4.7255427632748555e-06, - "loss": 0.0564, - "step": 107400 - }, - { - "epoch": 2.29, - "learning_rate": 4.711320338778152e-06, - "loss": 0.0513, - "step": 107500 - }, - { - "epoch": 2.3, - "learning_rate": 4.697097914281448e-06, - "loss": 0.0484, - "step": 107600 - }, - { - "epoch": 2.3, - "learning_rate": 4.682875489784744e-06, - "loss": 0.0523, - "step": 107700 - }, - { - "epoch": 2.3, - "learning_rate": 4.6686530652880405e-06, - "loss": 0.0484, - "step": 107800 - }, - { - "epoch": 2.3, - "learning_rate": 4.6544306407913355e-06, - "loss": 0.056, - "step": 107900 - }, - { - "epoch": 2.3, - "learning_rate": 4.640208216294632e-06, - "loss": 0.0509, - "step": 108000 - }, - { - "epoch": 2.31, - "learning_rate": 4.625985791797928e-06, - "loss": 0.048, - "step": 108100 - }, - { - "epoch": 2.31, - "learning_rate": 4.611763367301224e-06, - "loss": 0.058, - "step": 108200 - }, - { - "epoch": 2.31, - "learning_rate": 4.5975409428045205e-06, - "loss": 0.049, - "step": 108300 - }, - { - "epoch": 2.31, - "learning_rate": 4.583318518307816e-06, - "loss": 0.0418, - "step": 108400 - }, - { - "epoch": 2.31, - "learning_rate": 4.569096093811112e-06, - "loss": 0.0509, - "step": 108500 - }, - { - "epoch": 2.32, - "learning_rate": 4.554873669314409e-06, - "loss": 0.0445, - "step": 108600 - }, - { - "epoch": 2.32, - "learning_rate": 4.540651244817705e-06, - "loss": 0.0491, - "step": 108700 - }, - { - "epoch": 2.32, - "learning_rate": 4.526428820321e-06, - "loss": 0.0412, - "step": 108800 - }, - { - "epoch": 2.32, - "learning_rate": 4.512206395824297e-06, - "loss": 0.0463, - "step": 108900 - }, - { - "epoch": 2.33, - "learning_rate": 4.497983971327592e-06, - "loss": 0.0543, - "step": 109000 - }, - { - "epoch": 2.33, - "learning_rate": 4.483761546830889e-06, - "loss": 0.0498, - "step": 109100 - }, - { - "epoch": 2.33, - "learning_rate": 4.4695391223341845e-06, - "loss": 0.045, - "step": 109200 - }, - { - "epoch": 2.33, - "learning_rate": 4.45531669783748e-06, - "loss": 0.0486, - "step": 109300 - }, - { - "epoch": 2.33, - "learning_rate": 4.441094273340777e-06, - "loss": 0.05, - "step": 109400 - }, - { - "epoch": 2.34, - "learning_rate": 4.426871848844073e-06, - "loss": 0.0514, - "step": 109500 - }, - { - "epoch": 2.34, - "learning_rate": 4.412649424347369e-06, - "loss": 0.0502, - "step": 109600 - }, - { - "epoch": 2.34, - "learning_rate": 4.398426999850665e-06, - "loss": 0.0459, - "step": 109700 - }, - { - "epoch": 2.34, - "learning_rate": 4.384204575353961e-06, - "loss": 0.043, - "step": 109800 - }, - { - "epoch": 2.34, - "learning_rate": 4.369982150857257e-06, - "loss": 0.0472, - "step": 109900 - }, - { - "epoch": 2.35, - "learning_rate": 4.355759726360553e-06, - "loss": 0.0547, - "step": 110000 - }, - { - "epoch": 2.35, - "learning_rate": 4.341537301863849e-06, - "loss": 0.053, - "step": 110100 - }, - { - "epoch": 2.35, - "learning_rate": 4.327314877367145e-06, - "loss": 0.0444, - "step": 110200 - }, - { - "epoch": 2.35, - "learning_rate": 4.313092452870441e-06, - "loss": 0.0507, - "step": 110300 - }, - { - "epoch": 2.36, - "learning_rate": 4.298870028373737e-06, - "loss": 0.0504, - "step": 110400 - }, - { - "epoch": 2.36, - "learning_rate": 4.284647603877034e-06, - "loss": 0.0425, - "step": 110500 - }, - { - "epoch": 2.36, - "learning_rate": 4.270425179380329e-06, - "loss": 0.0555, - "step": 110600 - }, - { - "epoch": 2.36, - "learning_rate": 4.256202754883625e-06, - "loss": 0.0586, - "step": 110700 - }, - { - "epoch": 2.36, - "learning_rate": 4.241980330386921e-06, - "loss": 0.043, - "step": 110800 - }, - { - "epoch": 2.37, - "learning_rate": 4.227757905890218e-06, - "loss": 0.0509, - "step": 110900 - }, - { - "epoch": 2.37, - "learning_rate": 4.2135354813935135e-06, - "loss": 0.0506, - "step": 111000 - }, - { - "epoch": 2.37, - "learning_rate": 4.199313056896809e-06, - "loss": 0.0504, - "step": 111100 - }, - { - "epoch": 2.37, - "learning_rate": 4.185090632400105e-06, - "loss": 0.0487, - "step": 111200 - }, - { - "epoch": 2.37, - "learning_rate": 4.170868207903402e-06, - "loss": 0.0493, - "step": 111300 - }, - { - "epoch": 2.38, - "learning_rate": 4.156645783406698e-06, - "loss": 0.0483, - "step": 111400 - }, - { - "epoch": 2.38, - "learning_rate": 4.1424233589099935e-06, - "loss": 0.0437, - "step": 111500 - }, - { - "epoch": 2.38, - "learning_rate": 4.128200934413289e-06, - "loss": 0.0623, - "step": 111600 - }, - { - "epoch": 2.38, - "learning_rate": 4.113978509916586e-06, - "loss": 0.0511, - "step": 111700 - }, - { - "epoch": 2.39, - "learning_rate": 4.099756085419882e-06, - "loss": 0.0527, - "step": 111800 - }, - { - "epoch": 2.39, - "learning_rate": 4.085533660923178e-06, - "loss": 0.0468, - "step": 111900 - }, - { - "epoch": 2.39, - "learning_rate": 4.071311236426474e-06, - "loss": 0.0479, - "step": 112000 - }, - { - "epoch": 2.39, - "learning_rate": 4.05708881192977e-06, - "loss": 0.0509, - "step": 112100 - }, - { - "epoch": 2.39, - "learning_rate": 4.042866387433066e-06, - "loss": 0.0506, - "step": 112200 - }, - { - "epoch": 2.4, - "learning_rate": 4.028643962936362e-06, - "loss": 0.0414, - "step": 112300 - }, - { - "epoch": 2.4, - "learning_rate": 4.014421538439658e-06, - "loss": 0.0456, - "step": 112400 - }, - { - "epoch": 2.4, - "learning_rate": 4.000199113942954e-06, - "loss": 0.0523, - "step": 112500 - }, - { - "epoch": 2.4, - "learning_rate": 3.98597668944625e-06, - "loss": 0.0463, - "step": 112600 - }, - { - "epoch": 2.4, - "learning_rate": 3.971754264949546e-06, - "loss": 0.0428, - "step": 112700 - }, - { - "epoch": 2.41, - "learning_rate": 3.9575318404528426e-06, - "loss": 0.0464, - "step": 112800 - }, - { - "epoch": 2.41, - "learning_rate": 3.943309415956138e-06, - "loss": 0.0482, - "step": 112900 - }, - { - "epoch": 2.41, - "learning_rate": 3.929086991459434e-06, - "loss": 0.0438, - "step": 113000 - }, - { - "epoch": 2.41, - "learning_rate": 3.914864566962731e-06, - "loss": 0.0406, - "step": 113100 - }, - { - "epoch": 2.41, - "learning_rate": 3.900642142466026e-06, - "loss": 0.0402, - "step": 113200 - }, - { - "epoch": 2.42, - "learning_rate": 3.8864197179693225e-06, - "loss": 0.0498, - "step": 113300 - }, - { - "epoch": 2.42, - "learning_rate": 3.872197293472618e-06, - "loss": 0.0502, - "step": 113400 - }, - { - "epoch": 2.42, - "learning_rate": 3.857974868975914e-06, - "loss": 0.0549, - "step": 113500 - }, - { - "epoch": 2.42, - "learning_rate": 3.843752444479211e-06, - "loss": 0.0559, - "step": 113600 - }, - { - "epoch": 2.43, - "learning_rate": 3.829530019982507e-06, - "loss": 0.051, - "step": 113700 - }, - { - "epoch": 2.43, - "learning_rate": 3.8153075954858025e-06, - "loss": 0.0485, - "step": 113800 - }, - { - "epoch": 2.43, - "learning_rate": 3.8010851709890987e-06, - "loss": 0.0493, - "step": 113900 - }, - { - "epoch": 2.43, - "learning_rate": 3.7868627464923945e-06, - "loss": 0.053, - "step": 114000 - }, - { - "epoch": 2.43, - "learning_rate": 3.7726403219956908e-06, - "loss": 0.0542, - "step": 114100 - }, - { - "epoch": 2.44, - "learning_rate": 3.758417897498987e-06, - "loss": 0.0468, - "step": 114200 - }, - { - "epoch": 2.44, - "learning_rate": 3.744195473002283e-06, - "loss": 0.0538, - "step": 114300 - }, - { - "epoch": 2.44, - "learning_rate": 3.729973048505579e-06, - "loss": 0.0557, - "step": 114400 - }, - { - "epoch": 2.44, - "learning_rate": 3.7157506240088753e-06, - "loss": 0.0515, - "step": 114500 - }, - { - "epoch": 2.44, - "learning_rate": 3.701528199512171e-06, - "loss": 0.0476, - "step": 114600 - }, - { - "epoch": 2.45, - "learning_rate": 3.6873057750154674e-06, - "loss": 0.0441, - "step": 114700 - }, - { - "epoch": 2.45, - "learning_rate": 3.6730833505187628e-06, - "loss": 0.0506, - "step": 114800 - }, - { - "epoch": 2.45, - "learning_rate": 3.658860926022059e-06, - "loss": 0.0545, - "step": 114900 - }, - { - "epoch": 2.45, - "learning_rate": 3.6446385015253557e-06, - "loss": 0.0448, - "step": 115000 - }, - { - "epoch": 2.46, - "learning_rate": 3.630416077028651e-06, - "loss": 0.0462, - "step": 115100 - }, - { - "epoch": 2.46, - "learning_rate": 3.6161936525319473e-06, - "loss": 0.0573, - "step": 115200 - }, - { - "epoch": 2.46, - "learning_rate": 3.6019712280352436e-06, - "loss": 0.0433, - "step": 115300 - }, - { - "epoch": 2.46, - "learning_rate": 3.5877488035385394e-06, - "loss": 0.0434, - "step": 115400 - }, - { - "epoch": 2.46, - "learning_rate": 3.5735263790418356e-06, - "loss": 0.0511, - "step": 115500 - }, - { - "epoch": 2.47, - "learning_rate": 3.5593039545451315e-06, - "loss": 0.0364, - "step": 115600 - }, - { - "epoch": 2.47, - "learning_rate": 3.5450815300484277e-06, - "loss": 0.0473, - "step": 115700 - }, - { - "epoch": 2.47, - "learning_rate": 3.530859105551724e-06, - "loss": 0.05, - "step": 115800 - }, - { - "epoch": 2.47, - "learning_rate": 3.5166366810550193e-06, - "loss": 0.0478, - "step": 115900 - }, - { - "epoch": 2.47, - "learning_rate": 3.5024142565583156e-06, - "loss": 0.0464, - "step": 116000 - }, - { - "epoch": 2.48, - "learning_rate": 3.4881918320616123e-06, - "loss": 0.0427, - "step": 116100 - }, - { - "epoch": 2.48, - "learning_rate": 3.4739694075649077e-06, - "loss": 0.0513, - "step": 116200 - }, - { - "epoch": 2.48, - "learning_rate": 3.459746983068204e-06, - "loss": 0.0473, - "step": 116300 - }, - { - "epoch": 2.48, - "learning_rate": 3.4455245585714997e-06, - "loss": 0.045, - "step": 116400 - }, - { - "epoch": 2.49, - "learning_rate": 3.431302134074796e-06, - "loss": 0.0486, - "step": 116500 - }, - { - "epoch": 2.49, - "learning_rate": 3.417079709578092e-06, - "loss": 0.0452, - "step": 116600 - }, - { - "epoch": 2.49, - "learning_rate": 3.402857285081388e-06, - "loss": 0.0509, - "step": 116700 - }, - { - "epoch": 2.49, - "learning_rate": 3.3886348605846843e-06, - "loss": 0.0465, - "step": 116800 - }, - { - "epoch": 2.49, - "learning_rate": 3.3744124360879805e-06, - "loss": 0.0531, - "step": 116900 - }, - { - "epoch": 2.5, - "learning_rate": 3.360190011591276e-06, - "loss": 0.0492, - "step": 117000 - }, - { - "epoch": 2.5, - "learning_rate": 3.3459675870945726e-06, - "loss": 0.0472, - "step": 117100 - }, - { - "epoch": 2.5, - "learning_rate": 3.331745162597868e-06, - "loss": 0.0509, - "step": 117200 - }, - { - "epoch": 2.5, - "learning_rate": 3.3175227381011642e-06, - "loss": 0.0516, - "step": 117300 - }, - { - "epoch": 2.5, - "learning_rate": 3.3033003136044605e-06, - "loss": 0.0445, - "step": 117400 - }, - { - "epoch": 2.51, - "learning_rate": 3.2890778891077563e-06, - "loss": 0.0506, - "step": 117500 - }, - { - "epoch": 2.51, - "learning_rate": 3.2748554646110525e-06, - "loss": 0.0582, - "step": 117600 - }, - { - "epoch": 2.51, - "learning_rate": 3.2606330401143488e-06, - "loss": 0.0395, - "step": 117700 - }, - { - "epoch": 2.51, - "learning_rate": 3.2464106156176446e-06, - "loss": 0.044, - "step": 117800 - }, - { - "epoch": 2.52, - "learning_rate": 3.232188191120941e-06, - "loss": 0.0445, - "step": 117900 - }, - { - "epoch": 2.52, - "learning_rate": 3.2179657666242362e-06, - "loss": 0.0433, - "step": 118000 - }, - { - "epoch": 2.52, - "learning_rate": 3.2037433421275325e-06, - "loss": 0.0459, - "step": 118100 - }, - { - "epoch": 2.52, - "learning_rate": 3.189520917630829e-06, - "loss": 0.0474, - "step": 118200 - }, - { - "epoch": 2.52, - "learning_rate": 3.1752984931341245e-06, - "loss": 0.0495, - "step": 118300 - }, - { - "epoch": 2.53, - "learning_rate": 3.1610760686374208e-06, - "loss": 0.0477, - "step": 118400 - }, - { - "epoch": 2.53, - "learning_rate": 3.146853644140717e-06, - "loss": 0.0539, - "step": 118500 - }, - { - "epoch": 2.53, - "learning_rate": 3.132631219644013e-06, - "loss": 0.0466, - "step": 118600 - }, - { - "epoch": 2.53, - "learning_rate": 3.118408795147309e-06, - "loss": 0.0484, - "step": 118700 - }, - { - "epoch": 2.53, - "learning_rate": 3.104186370650605e-06, - "loss": 0.043, - "step": 118800 - }, - { - "epoch": 2.54, - "learning_rate": 3.089963946153901e-06, - "loss": 0.0451, - "step": 118900 - }, - { - "epoch": 2.54, - "learning_rate": 3.0757415216571974e-06, - "loss": 0.045, - "step": 119000 - }, - { - "epoch": 2.54, - "learning_rate": 3.061519097160493e-06, - "loss": 0.0413, - "step": 119100 - }, - { - "epoch": 2.54, - "learning_rate": 3.0472966726637895e-06, - "loss": 0.0449, - "step": 119200 - }, - { - "epoch": 2.55, - "learning_rate": 3.0330742481670857e-06, - "loss": 0.0518, - "step": 119300 - }, - { - "epoch": 2.55, - "learning_rate": 3.018851823670381e-06, - "loss": 0.0472, - "step": 119400 - }, - { - "epoch": 2.55, - "learning_rate": 3.0046293991736774e-06, - "loss": 0.0471, - "step": 119500 - }, - { - "epoch": 2.55, - "learning_rate": 2.990406974676973e-06, - "loss": 0.0401, - "step": 119600 - }, - { - "epoch": 2.55, - "learning_rate": 2.9761845501802694e-06, - "loss": 0.0524, - "step": 119700 - }, - { - "epoch": 2.56, - "learning_rate": 2.9619621256835657e-06, - "loss": 0.0506, - "step": 119800 - }, - { - "epoch": 2.56, - "learning_rate": 2.9477397011868615e-06, - "loss": 0.047, - "step": 119900 - }, - { - "epoch": 2.56, - "learning_rate": 2.9335172766901577e-06, - "loss": 0.049, - "step": 120000 - }, - { - "epoch": 2.56, - "learning_rate": 2.919294852193454e-06, - "loss": 0.0404, - "step": 120100 - }, - { - "epoch": 2.56, - "learning_rate": 2.9050724276967494e-06, - "loss": 0.0466, - "step": 120200 - }, - { - "epoch": 2.57, - "learning_rate": 2.890850003200046e-06, - "loss": 0.0414, - "step": 120300 - }, - { - "epoch": 2.57, - "learning_rate": 2.8766275787033414e-06, - "loss": 0.0512, - "step": 120400 - }, - { - "epoch": 2.57, - "learning_rate": 2.8624051542066377e-06, - "loss": 0.0427, - "step": 120500 - }, - { - "epoch": 2.57, - "learning_rate": 2.848182729709934e-06, - "loss": 0.0507, - "step": 120600 - }, - { - "epoch": 2.57, - "learning_rate": 2.8339603052132297e-06, - "loss": 0.0472, - "step": 120700 - }, - { - "epoch": 2.58, - "learning_rate": 2.819737880716526e-06, - "loss": 0.0455, - "step": 120800 - }, - { - "epoch": 2.58, - "learning_rate": 2.8055154562198222e-06, - "loss": 0.0417, - "step": 120900 - }, - { - "epoch": 2.58, - "learning_rate": 2.791293031723118e-06, - "loss": 0.0479, - "step": 121000 - }, - { - "epoch": 2.58, - "learning_rate": 2.7770706072264143e-06, - "loss": 0.0541, - "step": 121100 - }, - { - "epoch": 2.59, - "learning_rate": 2.7628481827297097e-06, - "loss": 0.0467, - "step": 121200 - }, - { - "epoch": 2.59, - "learning_rate": 2.7486257582330064e-06, - "loss": 0.0411, - "step": 121300 - }, - { - "epoch": 2.59, - "learning_rate": 2.7344033337363026e-06, - "loss": 0.0508, - "step": 121400 - }, - { - "epoch": 2.59, - "learning_rate": 2.720180909239598e-06, - "loss": 0.0582, - "step": 121500 - }, - { - "epoch": 2.59, - "learning_rate": 2.7059584847428942e-06, - "loss": 0.0454, - "step": 121600 - }, - { - "epoch": 2.6, - "learning_rate": 2.6917360602461905e-06, - "loss": 0.0409, - "step": 121700 - }, - { - "epoch": 2.6, - "learning_rate": 2.6775136357494863e-06, - "loss": 0.0477, - "step": 121800 - }, - { - "epoch": 2.6, - "learning_rate": 2.6632912112527826e-06, - "loss": 0.047, - "step": 121900 - }, - { - "epoch": 2.6, - "learning_rate": 2.6490687867560784e-06, - "loss": 0.0463, - "step": 122000 - }, - { - "epoch": 2.6, - "learning_rate": 2.6348463622593746e-06, - "loss": 0.0412, - "step": 122100 - }, - { - "epoch": 2.61, - "learning_rate": 2.620623937762671e-06, - "loss": 0.0442, - "step": 122200 - }, - { - "epoch": 2.61, - "learning_rate": 2.6064015132659663e-06, - "loss": 0.0548, - "step": 122300 - }, - { - "epoch": 2.61, - "learning_rate": 2.592179088769263e-06, - "loss": 0.0463, - "step": 122400 - }, - { - "epoch": 2.61, - "learning_rate": 2.577956664272559e-06, - "loss": 0.0469, - "step": 122500 - }, - { - "epoch": 2.62, - "learning_rate": 2.5637342397758546e-06, - "loss": 0.0459, - "step": 122600 - }, - { - "epoch": 2.62, - "learning_rate": 2.549511815279151e-06, - "loss": 0.0539, - "step": 122700 - }, - { - "epoch": 2.62, - "learning_rate": 2.5352893907824466e-06, - "loss": 0.0472, - "step": 122800 - }, - { - "epoch": 2.62, - "learning_rate": 2.521066966285743e-06, - "loss": 0.0501, - "step": 122900 - }, - { - "epoch": 2.62, - "learning_rate": 2.506844541789039e-06, - "loss": 0.047, - "step": 123000 - }, - { - "epoch": 2.63, - "learning_rate": 2.492622117292335e-06, - "loss": 0.0451, - "step": 123100 - }, - { - "epoch": 2.63, - "learning_rate": 2.478399692795631e-06, - "loss": 0.0439, - "step": 123200 - }, - { - "epoch": 2.63, - "learning_rate": 2.464177268298927e-06, - "loss": 0.0487, - "step": 123300 - }, - { - "epoch": 2.63, - "learning_rate": 2.4499548438022232e-06, - "loss": 0.0428, - "step": 123400 - }, - { - "epoch": 2.63, - "learning_rate": 2.4357324193055195e-06, - "loss": 0.0474, - "step": 123500 - }, - { - "epoch": 2.64, - "learning_rate": 2.4215099948088153e-06, - "loss": 0.0443, - "step": 123600 - }, - { - "epoch": 2.64, - "learning_rate": 2.407287570312111e-06, - "loss": 0.0462, - "step": 123700 - }, - { - "epoch": 2.64, - "learning_rate": 2.3930651458154074e-06, - "loss": 0.0458, - "step": 123800 - }, - { - "epoch": 2.64, - "learning_rate": 2.3788427213187036e-06, - "loss": 0.0512, - "step": 123900 - }, - { - "epoch": 2.65, - "learning_rate": 2.3646202968219994e-06, - "loss": 0.0468, - "step": 124000 - }, - { - "epoch": 2.65, - "learning_rate": 2.3503978723252953e-06, - "loss": 0.041, - "step": 124100 - }, - { - "epoch": 2.65, - "learning_rate": 2.3361754478285915e-06, - "loss": 0.039, - "step": 124200 - }, - { - "epoch": 2.65, - "learning_rate": 2.3219530233318877e-06, - "loss": 0.0458, - "step": 124300 - }, - { - "epoch": 2.65, - "learning_rate": 2.3077305988351836e-06, - "loss": 0.0431, - "step": 124400 - }, - { - "epoch": 2.66, - "learning_rate": 2.29350817433848e-06, - "loss": 0.0433, - "step": 124500 - }, - { - "epoch": 2.66, - "learning_rate": 2.2792857498417756e-06, - "loss": 0.0433, - "step": 124600 - }, - { - "epoch": 2.66, - "learning_rate": 2.265063325345072e-06, - "loss": 0.0435, - "step": 124700 - }, - { - "epoch": 2.66, - "learning_rate": 2.2508409008483677e-06, - "loss": 0.0471, - "step": 124800 - }, - { - "epoch": 2.66, - "learning_rate": 2.236618476351664e-06, - "loss": 0.0447, - "step": 124900 - }, - { - "epoch": 2.67, - "learning_rate": 2.2223960518549598e-06, - "loss": 0.0356, - "step": 125000 - }, - { - "epoch": 2.67, - "learning_rate": 2.208173627358256e-06, - "loss": 0.0447, - "step": 125100 - }, - { - "epoch": 2.67, - "learning_rate": 2.193951202861552e-06, - "loss": 0.0433, - "step": 125200 - }, - { - "epoch": 2.67, - "learning_rate": 2.179728778364848e-06, - "loss": 0.0448, - "step": 125300 - }, - { - "epoch": 2.68, - "learning_rate": 2.165506353868144e-06, - "loss": 0.0461, - "step": 125400 - }, - { - "epoch": 2.68, - "learning_rate": 2.15128392937144e-06, - "loss": 0.0353, - "step": 125500 - }, - { - "epoch": 2.68, - "learning_rate": 2.1370615048747364e-06, - "loss": 0.0465, - "step": 125600 - }, - { - "epoch": 2.68, - "learning_rate": 2.122839080378032e-06, - "loss": 0.05, - "step": 125700 - }, - { - "epoch": 2.68, - "learning_rate": 2.108616655881328e-06, - "loss": 0.0421, - "step": 125800 - }, - { - "epoch": 2.69, - "learning_rate": 2.0943942313846243e-06, - "loss": 0.0432, - "step": 125900 - }, - { - "epoch": 2.69, - "learning_rate": 2.0801718068879205e-06, - "loss": 0.0397, - "step": 126000 - }, - { - "epoch": 2.69, - "learning_rate": 2.0659493823912163e-06, - "loss": 0.0431, - "step": 126100 - }, - { - "epoch": 2.69, - "learning_rate": 2.051726957894512e-06, - "loss": 0.0452, - "step": 126200 - }, - { - "epoch": 2.69, - "learning_rate": 2.0375045333978084e-06, - "loss": 0.0488, - "step": 126300 - }, - { - "epoch": 2.7, - "learning_rate": 2.0232821089011046e-06, - "loss": 0.0514, - "step": 126400 - }, - { - "epoch": 2.7, - "learning_rate": 2.0090596844044005e-06, - "loss": 0.0454, - "step": 126500 - }, - { - "epoch": 2.7, - "learning_rate": 1.9948372599076967e-06, - "loss": 0.0447, - "step": 126600 - }, - { - "epoch": 2.7, - "learning_rate": 1.980614835410993e-06, - "loss": 0.0419, - "step": 126700 - }, - { - "epoch": 2.71, - "learning_rate": 1.9663924109142888e-06, - "loss": 0.039, - "step": 126800 - }, - { - "epoch": 2.71, - "learning_rate": 1.9521699864175846e-06, - "loss": 0.0455, - "step": 126900 - }, - { - "epoch": 2.71, - "learning_rate": 1.937947561920881e-06, - "loss": 0.0469, - "step": 127000 - }, - { - "epoch": 2.71, - "learning_rate": 1.923725137424177e-06, - "loss": 0.0398, - "step": 127100 - }, - { - "epoch": 2.71, - "learning_rate": 1.909502712927473e-06, - "loss": 0.0412, - "step": 127200 - }, - { - "epoch": 2.72, - "learning_rate": 1.895280288430769e-06, - "loss": 0.0435, - "step": 127300 - }, - { - "epoch": 2.72, - "learning_rate": 1.881057863934065e-06, - "loss": 0.042, - "step": 127400 - }, - { - "epoch": 2.72, - "learning_rate": 1.8668354394373612e-06, - "loss": 0.0423, - "step": 127500 - }, - { - "epoch": 2.72, - "learning_rate": 1.852613014940657e-06, - "loss": 0.0448, - "step": 127600 - }, - { - "epoch": 2.72, - "learning_rate": 1.838390590443953e-06, - "loss": 0.0401, - "step": 127700 - }, - { - "epoch": 2.73, - "learning_rate": 1.824168165947249e-06, - "loss": 0.0374, - "step": 127800 - }, - { - "epoch": 2.73, - "learning_rate": 1.8099457414505453e-06, - "loss": 0.04, - "step": 127900 - }, - { - "epoch": 2.73, - "learning_rate": 1.7957233169538414e-06, - "loss": 0.0407, - "step": 128000 - }, - { - "epoch": 2.73, - "learning_rate": 1.7815008924571372e-06, - "loss": 0.0473, - "step": 128100 - }, - { - "epoch": 2.73, - "learning_rate": 1.7672784679604332e-06, - "loss": 0.0403, - "step": 128200 - }, - { - "epoch": 2.74, - "learning_rate": 1.7530560434637295e-06, - "loss": 0.036, - "step": 128300 - }, - { - "epoch": 2.74, - "learning_rate": 1.7388336189670255e-06, - "loss": 0.0412, - "step": 128400 - }, - { - "epoch": 2.74, - "learning_rate": 1.7246111944703215e-06, - "loss": 0.0451, - "step": 128500 - }, - { - "epoch": 2.74, - "learning_rate": 1.7103887699736173e-06, - "loss": 0.046, - "step": 128600 - }, - { - "epoch": 2.75, - "learning_rate": 1.6961663454769136e-06, - "loss": 0.0411, - "step": 128700 - }, - { - "epoch": 2.75, - "learning_rate": 1.6819439209802096e-06, - "loss": 0.0468, - "step": 128800 - }, - { - "epoch": 2.75, - "learning_rate": 1.6677214964835057e-06, - "loss": 0.0404, - "step": 128900 - }, - { - "epoch": 2.75, - "learning_rate": 1.6534990719868017e-06, - "loss": 0.0462, - "step": 129000 - }, - { - "epoch": 2.75, - "learning_rate": 1.639276647490098e-06, - "loss": 0.042, - "step": 129100 - }, - { - "epoch": 2.76, - "learning_rate": 1.6250542229933938e-06, - "loss": 0.0462, - "step": 129200 - }, - { - "epoch": 2.76, - "learning_rate": 1.6108317984966898e-06, - "loss": 0.0364, - "step": 129300 - }, - { - "epoch": 2.76, - "learning_rate": 1.5966093739999858e-06, - "loss": 0.0456, - "step": 129400 - }, - { - "epoch": 2.76, - "learning_rate": 1.582386949503282e-06, - "loss": 0.0401, - "step": 129500 - }, - { - "epoch": 2.76, - "learning_rate": 1.568164525006578e-06, - "loss": 0.051, - "step": 129600 - }, - { - "epoch": 2.77, - "learning_rate": 1.553942100509874e-06, - "loss": 0.0526, - "step": 129700 - }, - { - "epoch": 2.77, - "learning_rate": 1.53971967601317e-06, - "loss": 0.0404, - "step": 129800 - }, - { - "epoch": 2.77, - "learning_rate": 1.5254972515164662e-06, - "loss": 0.0408, - "step": 129900 - }, - { - "epoch": 2.77, - "learning_rate": 1.5112748270197622e-06, - "loss": 0.0491, - "step": 130000 - }, - { - "epoch": 2.78, - "learning_rate": 1.4970524025230583e-06, - "loss": 0.0405, - "step": 130100 - }, - { - "epoch": 2.78, - "learning_rate": 1.482829978026354e-06, - "loss": 0.0462, - "step": 130200 - }, - { - "epoch": 2.78, - "learning_rate": 1.4686075535296503e-06, - "loss": 0.0493, - "step": 130300 - }, - { - "epoch": 2.78, - "learning_rate": 1.4543851290329464e-06, - "loss": 0.0401, - "step": 130400 - }, - { - "epoch": 2.78, - "learning_rate": 1.4401627045362424e-06, - "loss": 0.0413, - "step": 130500 - }, - { - "epoch": 2.79, - "learning_rate": 1.4259402800395384e-06, - "loss": 0.0377, - "step": 130600 - }, - { - "epoch": 2.79, - "learning_rate": 1.4117178555428347e-06, - "loss": 0.0463, - "step": 130700 - }, - { - "epoch": 2.79, - "learning_rate": 1.3974954310461305e-06, - "loss": 0.0386, - "step": 130800 - }, - { - "epoch": 2.79, - "learning_rate": 1.3832730065494265e-06, - "loss": 0.0375, - "step": 130900 - }, - { - "epoch": 2.79, - "learning_rate": 1.3690505820527225e-06, - "loss": 0.0432, - "step": 131000 - }, - { - "epoch": 2.8, - "learning_rate": 1.3548281575560188e-06, - "loss": 0.0463, - "step": 131100 - }, - { - "epoch": 2.8, - "learning_rate": 1.3406057330593148e-06, - "loss": 0.0429, - "step": 131200 - }, - { - "epoch": 2.8, - "learning_rate": 1.3263833085626106e-06, - "loss": 0.0405, - "step": 131300 - }, - { - "epoch": 2.8, - "learning_rate": 1.3121608840659067e-06, - "loss": 0.0391, - "step": 131400 - }, - { - "epoch": 2.81, - "learning_rate": 1.297938459569203e-06, - "loss": 0.0377, - "step": 131500 - }, - { - "epoch": 2.81, - "learning_rate": 1.283716035072499e-06, - "loss": 0.0427, - "step": 131600 - }, - { - "epoch": 2.81, - "learning_rate": 1.269493610575795e-06, - "loss": 0.0375, - "step": 131700 - }, - { - "epoch": 2.81, - "learning_rate": 1.2552711860790908e-06, - "loss": 0.0451, - "step": 131800 - }, - { - "epoch": 2.81, - "learning_rate": 1.241048761582387e-06, - "loss": 0.0475, - "step": 131900 - }, - { - "epoch": 2.82, - "learning_rate": 1.226826337085683e-06, - "loss": 0.0423, - "step": 132000 - }, - { - "epoch": 2.82, - "learning_rate": 1.2126039125889791e-06, - "loss": 0.0476, - "step": 132100 - }, - { - "epoch": 2.82, - "learning_rate": 1.1983814880922751e-06, - "loss": 0.0389, - "step": 132200 - }, - { - "epoch": 2.82, - "learning_rate": 1.1841590635955712e-06, - "loss": 0.0425, - "step": 132300 - }, - { - "epoch": 2.82, - "learning_rate": 1.1699366390988672e-06, - "loss": 0.0372, - "step": 132400 - }, - { - "epoch": 2.83, - "learning_rate": 1.1557142146021632e-06, - "loss": 0.0463, - "step": 132500 - }, - { - "epoch": 2.83, - "learning_rate": 1.1414917901054593e-06, - "loss": 0.034, - "step": 132600 - }, - { - "epoch": 2.83, - "learning_rate": 1.1272693656087553e-06, - "loss": 0.0415, - "step": 132700 - }, - { - "epoch": 2.83, - "learning_rate": 1.1130469411120516e-06, - "loss": 0.0434, - "step": 132800 - }, - { - "epoch": 2.84, - "learning_rate": 1.0988245166153474e-06, - "loss": 0.0351, - "step": 132900 - }, - { - "epoch": 2.84, - "learning_rate": 1.0846020921186436e-06, - "loss": 0.0486, - "step": 133000 - }, - { - "epoch": 2.84, - "learning_rate": 1.0703796676219394e-06, - "loss": 0.0378, - "step": 133100 - }, - { - "epoch": 2.84, - "learning_rate": 1.0561572431252357e-06, - "loss": 0.0419, - "step": 133200 - }, - { - "epoch": 2.84, - "learning_rate": 1.0419348186285317e-06, - "loss": 0.0338, - "step": 133300 - }, - { - "epoch": 2.85, - "learning_rate": 1.0277123941318277e-06, - "loss": 0.0518, - "step": 133400 - }, - { - "epoch": 2.85, - "learning_rate": 1.0134899696351238e-06, - "loss": 0.0445, - "step": 133500 - }, - { - "epoch": 2.85, - "learning_rate": 9.992675451384198e-07, - "loss": 0.0429, - "step": 133600 - }, - { - "epoch": 2.85, - "learning_rate": 9.850451206417158e-07, - "loss": 0.045, - "step": 133700 - }, - { - "epoch": 2.85, - "learning_rate": 9.708226961450119e-07, - "loss": 0.0394, - "step": 133800 - }, - { - "epoch": 2.86, - "learning_rate": 9.56600271648308e-07, - "loss": 0.0408, - "step": 133900 - }, - { - "epoch": 2.86, - "learning_rate": 9.42377847151604e-07, - "loss": 0.0369, - "step": 134000 - }, - { - "epoch": 2.86, - "learning_rate": 9.281554226549e-07, - "loss": 0.044, - "step": 134100 - }, - { - "epoch": 2.86, - "learning_rate": 9.139329981581961e-07, - "loss": 0.0442, - "step": 134200 - }, - { - "epoch": 2.87, - "learning_rate": 8.99710573661492e-07, - "loss": 0.0472, - "step": 134300 - }, - { - "epoch": 2.87, - "learning_rate": 8.854881491647882e-07, - "loss": 0.0426, - "step": 134400 - }, - { - "epoch": 2.87, - "learning_rate": 8.712657246680842e-07, - "loss": 0.0373, - "step": 134500 - }, - { - "epoch": 2.87, - "learning_rate": 8.570433001713803e-07, - "loss": 0.0406, - "step": 134600 - }, - { - "epoch": 2.87, - "learning_rate": 8.428208756746763e-07, - "loss": 0.0491, - "step": 134700 - }, - { - "epoch": 2.88, - "learning_rate": 8.285984511779724e-07, - "loss": 0.0469, - "step": 134800 - }, - { - "epoch": 2.88, - "learning_rate": 8.143760266812683e-07, - "loss": 0.0445, - "step": 134900 - }, - { - "epoch": 2.88, - "learning_rate": 8.001536021845645e-07, - "loss": 0.0406, - "step": 135000 - }, - { - "epoch": 2.88, - "learning_rate": 7.859311776878604e-07, - "loss": 0.0508, - "step": 135100 - }, - { - "epoch": 2.88, - "learning_rate": 7.717087531911565e-07, - "loss": 0.0354, - "step": 135200 - }, - { - "epoch": 2.89, - "learning_rate": 7.574863286944526e-07, - "loss": 0.0509, - "step": 135300 - }, - { - "epoch": 2.89, - "learning_rate": 7.432639041977487e-07, - "loss": 0.0343, - "step": 135400 - }, - { - "epoch": 2.89, - "learning_rate": 7.290414797010446e-07, - "loss": 0.0453, - "step": 135500 - }, - { - "epoch": 2.89, - "learning_rate": 7.148190552043408e-07, - "loss": 0.038, - "step": 135600 - }, - { - "epoch": 2.89, - "learning_rate": 7.005966307076367e-07, - "loss": 0.0385, - "step": 135700 - }, - { - "epoch": 2.9, - "learning_rate": 6.863742062109328e-07, - "loss": 0.0461, - "step": 135800 - }, - { - "epoch": 2.9, - "learning_rate": 6.721517817142289e-07, - "loss": 0.0398, - "step": 135900 - }, - { - "epoch": 2.9, - "learning_rate": 6.579293572175249e-07, - "loss": 0.0454, - "step": 136000 - }, - { - "epoch": 2.9, - "learning_rate": 6.437069327208209e-07, - "loss": 0.0399, - "step": 136100 - }, - { - "epoch": 2.91, - "learning_rate": 6.294845082241171e-07, - "loss": 0.0439, - "step": 136200 - }, - { - "epoch": 2.91, - "learning_rate": 6.152620837274131e-07, - "loss": 0.0354, - "step": 136300 - }, - { - "epoch": 2.91, - "learning_rate": 6.010396592307091e-07, - "loss": 0.0423, - "step": 136400 - }, - { - "epoch": 2.91, - "learning_rate": 5.868172347340052e-07, - "loss": 0.0436, - "step": 136500 - }, - { - "epoch": 2.91, - "learning_rate": 5.725948102373012e-07, - "loss": 0.0387, - "step": 136600 - }, - { - "epoch": 2.92, - "learning_rate": 5.583723857405972e-07, - "loss": 0.0421, - "step": 136700 - }, - { - "epoch": 2.92, - "learning_rate": 5.441499612438933e-07, - "loss": 0.0399, - "step": 136800 - }, - { - "epoch": 2.92, - "learning_rate": 5.299275367471893e-07, - "loss": 0.0414, - "step": 136900 - }, - { - "epoch": 2.92, - "learning_rate": 5.157051122504854e-07, - "loss": 0.0443, - "step": 137000 - }, - { - "epoch": 2.92, - "learning_rate": 5.014826877537815e-07, - "loss": 0.0555, - "step": 137100 - }, - { - "epoch": 2.93, - "learning_rate": 4.872602632570775e-07, - "loss": 0.0406, - "step": 137200 - }, - { - "epoch": 2.93, - "learning_rate": 4.7303783876037353e-07, - "loss": 0.0449, - "step": 137300 - }, - { - "epoch": 2.93, - "learning_rate": 4.5881541426366957e-07, - "loss": 0.0407, - "step": 137400 - }, - { - "epoch": 2.93, - "learning_rate": 4.445929897669656e-07, - "loss": 0.0407, - "step": 137500 - }, - { - "epoch": 2.94, - "learning_rate": 4.303705652702617e-07, - "loss": 0.0419, - "step": 137600 - }, - { - "epoch": 2.94, - "learning_rate": 4.161481407735577e-07, - "loss": 0.0395, - "step": 137700 - }, - { - "epoch": 2.94, - "learning_rate": 4.0192571627685375e-07, - "loss": 0.0396, - "step": 137800 - }, - { - "epoch": 2.94, - "learning_rate": 3.877032917801498e-07, - "loss": 0.0447, - "step": 137900 - }, - { - "epoch": 2.94, - "learning_rate": 3.7348086728344587e-07, - "loss": 0.045, - "step": 138000 - }, - { - "epoch": 2.95, - "learning_rate": 3.592584427867419e-07, - "loss": 0.0413, - "step": 138100 - }, - { - "epoch": 2.95, - "learning_rate": 3.4503601829003793e-07, - "loss": 0.0471, - "step": 138200 - }, - { - "epoch": 2.95, - "learning_rate": 3.3081359379333396e-07, - "loss": 0.0362, - "step": 138300 - }, - { - "epoch": 2.95, - "learning_rate": 3.1659116929663005e-07, - "loss": 0.0361, - "step": 138400 - }, - { - "epoch": 2.95, - "learning_rate": 3.023687447999261e-07, - "loss": 0.0342, - "step": 138500 - }, - { - "epoch": 2.96, - "learning_rate": 2.881463203032221e-07, - "loss": 0.032, - "step": 138600 - }, - { - "epoch": 2.96, - "learning_rate": 2.7392389580651815e-07, - "loss": 0.0429, - "step": 138700 - }, - { - "epoch": 2.96, - "learning_rate": 2.5970147130981423e-07, - "loss": 0.0449, - "step": 138800 - }, - { - "epoch": 2.96, - "learning_rate": 2.4547904681311026e-07, - "loss": 0.0343, - "step": 138900 - }, - { - "epoch": 2.97, - "learning_rate": 2.312566223164063e-07, - "loss": 0.0331, - "step": 139000 - }, - { - "epoch": 2.97, - "learning_rate": 2.1703419781970235e-07, - "loss": 0.046, - "step": 139100 - }, - { - "epoch": 2.97, - "learning_rate": 2.0281177332299839e-07, - "loss": 0.0388, - "step": 139200 - }, - { - "epoch": 2.97, - "learning_rate": 1.8858934882629444e-07, - "loss": 0.0409, - "step": 139300 - }, - { - "epoch": 2.97, - "learning_rate": 1.7436692432959048e-07, - "loss": 0.055, - "step": 139400 - }, - { - "epoch": 2.98, - "learning_rate": 1.6014449983288654e-07, - "loss": 0.041, - "step": 139500 - }, - { - "epoch": 2.98, - "learning_rate": 1.4592207533618257e-07, - "loss": 0.0437, - "step": 139600 - }, - { - "epoch": 2.98, - "learning_rate": 1.3169965083947863e-07, - "loss": 0.0368, - "step": 139700 - }, - { - "epoch": 2.98, - "learning_rate": 1.1747722634277467e-07, - "loss": 0.05, - "step": 139800 - }, - { - "epoch": 2.98, - "learning_rate": 1.0325480184607072e-07, - "loss": 0.0418, - "step": 139900 - }, - { - "epoch": 2.99, - "learning_rate": 8.903237734936676e-08, - "loss": 0.0496, - "step": 140000 - } - ], - "logging_steps": 100, - "max_steps": 140625, - "num_train_epochs": 3, - "save_steps": 10000, - "total_flos": 3853034685726720.0, - "trial_name": null, - "trial_params": null -} +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5e246ac342cc6a38d69dc4b1c4ea012cefd2d9f776c16bb880e1ca5a7e9251 +size 171854