{ "best_metric": 0.587454617023468, "best_model_checkpoint": "models/mnli_xnli_shuff_all/checkpoint-184017", "epoch": 1.0, "global_step": 184017, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9945657194715707e-05, "loss": 0.796, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.9891314389431413e-05, "loss": 0.7802, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.9836971584147118e-05, "loss": 0.7726, "step": 1500 }, { "epoch": 0.01, "learning_rate": 1.9782628778862823e-05, "loss": 0.7543, "step": 2000 }, { "epoch": 0.01, "learning_rate": 1.972828597357853e-05, "loss": 0.7571, "step": 2500 }, { "epoch": 0.02, "learning_rate": 1.9673943168294238e-05, "loss": 0.7342, "step": 3000 }, { "epoch": 0.02, "learning_rate": 1.961960036300994e-05, "loss": 0.7401, "step": 3500 }, { "epoch": 0.02, "learning_rate": 1.956525755772565e-05, "loss": 0.7427, "step": 4000 }, { "epoch": 0.02, "learning_rate": 1.951091475244135e-05, "loss": 0.7412, "step": 4500 }, { "epoch": 0.03, "learning_rate": 1.9456571947157056e-05, "loss": 0.7342, "step": 5000 }, { "epoch": 0.03, "learning_rate": 1.9402229141872765e-05, "loss": 0.7286, "step": 5500 }, { "epoch": 0.03, "learning_rate": 1.9347886336588467e-05, "loss": 0.731, "step": 6000 }, { "epoch": 0.04, "learning_rate": 1.9293543531304176e-05, "loss": 0.7345, "step": 6500 }, { "epoch": 0.04, "learning_rate": 1.923920072601988e-05, "loss": 0.7233, "step": 7000 }, { "epoch": 0.04, "learning_rate": 1.9184857920735587e-05, "loss": 0.7177, "step": 7500 }, { "epoch": 0.04, "learning_rate": 1.9130515115451292e-05, "loss": 0.7239, "step": 8000 }, { "epoch": 0.05, "learning_rate": 1.9076172310166994e-05, "loss": 0.714, "step": 8500 }, { "epoch": 0.05, "learning_rate": 1.9021829504882703e-05, "loss": 0.7209, "step": 9000 }, { "epoch": 0.05, "learning_rate": 1.896748669959841e-05, "loss": 0.7191, "step": 9500 }, { "epoch": 0.05, "learning_rate": 1.8913143894314114e-05, "loss": 0.7079, "step": 10000 }, { "epoch": 0.06, "learning_rate": 1.885880108902982e-05, "loss": 0.7131, "step": 10500 }, { "epoch": 0.06, "learning_rate": 1.8804458283745525e-05, "loss": 0.7133, "step": 11000 }, { "epoch": 0.06, "learning_rate": 1.875011547846123e-05, "loss": 0.702, "step": 11500 }, { "epoch": 0.07, "learning_rate": 1.8695772673176936e-05, "loss": 0.6985, "step": 12000 }, { "epoch": 0.07, "learning_rate": 1.864142986789264e-05, "loss": 0.7004, "step": 12500 }, { "epoch": 0.07, "learning_rate": 1.8587087062608347e-05, "loss": 0.7062, "step": 13000 }, { "epoch": 0.07, "learning_rate": 1.8532744257324052e-05, "loss": 0.6984, "step": 13500 }, { "epoch": 0.08, "learning_rate": 1.8478401452039758e-05, "loss": 0.693, "step": 14000 }, { "epoch": 0.08, "learning_rate": 1.8424058646755467e-05, "loss": 0.697, "step": 14500 }, { "epoch": 0.08, "learning_rate": 1.836971584147117e-05, "loss": 0.703, "step": 15000 }, { "epoch": 0.08, "learning_rate": 1.8315373036186878e-05, "loss": 0.6923, "step": 15500 }, { "epoch": 0.09, "learning_rate": 1.826103023090258e-05, "loss": 0.6837, "step": 16000 }, { "epoch": 0.09, "learning_rate": 1.8206687425618285e-05, "loss": 0.7035, "step": 16500 }, { "epoch": 0.09, "learning_rate": 1.8152344620333994e-05, "loss": 0.687, "step": 17000 }, { "epoch": 0.1, "learning_rate": 1.8098001815049696e-05, "loss": 0.6872, "step": 17500 }, { "epoch": 0.1, "learning_rate": 1.8043659009765405e-05, "loss": 0.6864, "step": 18000 }, { "epoch": 0.1, "learning_rate": 1.798931620448111e-05, "loss": 0.6825, "step": 18500 }, { "epoch": 0.1, "learning_rate": 1.7934973399196816e-05, "loss": 0.6864, "step": 19000 }, { "epoch": 0.11, "learning_rate": 1.788063059391252e-05, "loss": 0.6834, "step": 19500 }, { "epoch": 0.11, "learning_rate": 1.7826287788628227e-05, "loss": 0.6794, "step": 20000 }, { "epoch": 0.11, "learning_rate": 1.7771944983343932e-05, "loss": 0.676, "step": 20500 }, { "epoch": 0.11, "learning_rate": 1.7717602178059638e-05, "loss": 0.6906, "step": 21000 }, { "epoch": 0.12, "learning_rate": 1.7663259372775343e-05, "loss": 0.6764, "step": 21500 }, { "epoch": 0.12, "learning_rate": 1.760891656749105e-05, "loss": 0.6764, "step": 22000 }, { "epoch": 0.12, "learning_rate": 1.7554573762206754e-05, "loss": 0.6833, "step": 22500 }, { "epoch": 0.12, "learning_rate": 1.750023095692246e-05, "loss": 0.6712, "step": 23000 }, { "epoch": 0.13, "learning_rate": 1.7445888151638165e-05, "loss": 0.6811, "step": 23500 }, { "epoch": 0.13, "learning_rate": 1.739154534635387e-05, "loss": 0.6707, "step": 24000 }, { "epoch": 0.13, "learning_rate": 1.7337202541069576e-05, "loss": 0.6749, "step": 24500 }, { "epoch": 0.14, "learning_rate": 1.728285973578528e-05, "loss": 0.6683, "step": 25000 }, { "epoch": 0.14, "learning_rate": 1.7228516930500987e-05, "loss": 0.6689, "step": 25500 }, { "epoch": 0.14, "learning_rate": 1.7174174125216695e-05, "loss": 0.6736, "step": 26000 }, { "epoch": 0.14, "learning_rate": 1.7119831319932398e-05, "loss": 0.6724, "step": 26500 }, { "epoch": 0.15, "learning_rate": 1.7065488514648106e-05, "loss": 0.6737, "step": 27000 }, { "epoch": 0.15, "learning_rate": 1.701114570936381e-05, "loss": 0.6631, "step": 27500 }, { "epoch": 0.15, "learning_rate": 1.6956802904079517e-05, "loss": 0.6861, "step": 28000 }, { "epoch": 0.15, "learning_rate": 1.6902460098795223e-05, "loss": 0.6749, "step": 28500 }, { "epoch": 0.16, "learning_rate": 1.6848117293510925e-05, "loss": 0.6619, "step": 29000 }, { "epoch": 0.16, "learning_rate": 1.6793774488226634e-05, "loss": 0.662, "step": 29500 }, { "epoch": 0.16, "learning_rate": 1.673943168294234e-05, "loss": 0.6522, "step": 30000 }, { "epoch": 0.17, "learning_rate": 1.6685088877658045e-05, "loss": 0.671, "step": 30500 }, { "epoch": 0.17, "learning_rate": 1.663074607237375e-05, "loss": 0.6617, "step": 31000 }, { "epoch": 0.17, "learning_rate": 1.6576403267089455e-05, "loss": 0.658, "step": 31500 }, { "epoch": 0.17, "learning_rate": 1.652206046180516e-05, "loss": 0.6588, "step": 32000 }, { "epoch": 0.18, "learning_rate": 1.6467717656520866e-05, "loss": 0.6615, "step": 32500 }, { "epoch": 0.18, "learning_rate": 1.6413374851236572e-05, "loss": 0.6628, "step": 33000 }, { "epoch": 0.18, "learning_rate": 1.6359032045952277e-05, "loss": 0.6513, "step": 33500 }, { "epoch": 0.18, "learning_rate": 1.6304689240667983e-05, "loss": 0.6586, "step": 34000 }, { "epoch": 0.19, "learning_rate": 1.6250346435383688e-05, "loss": 0.6491, "step": 34500 }, { "epoch": 0.19, "learning_rate": 1.6196003630099394e-05, "loss": 0.6708, "step": 35000 }, { "epoch": 0.19, "learning_rate": 1.61416608248151e-05, "loss": 0.6565, "step": 35500 }, { "epoch": 0.2, "learning_rate": 1.6087318019530808e-05, "loss": 0.6525, "step": 36000 }, { "epoch": 0.2, "learning_rate": 1.603297521424651e-05, "loss": 0.6503, "step": 36500 }, { "epoch": 0.2, "learning_rate": 1.5978632408962215e-05, "loss": 0.6465, "step": 37000 }, { "epoch": 0.2, "learning_rate": 1.5924289603677924e-05, "loss": 0.6477, "step": 37500 }, { "epoch": 0.21, "learning_rate": 1.5869946798393626e-05, "loss": 0.6473, "step": 38000 }, { "epoch": 0.21, "learning_rate": 1.5815603993109335e-05, "loss": 0.6494, "step": 38500 }, { "epoch": 0.21, "learning_rate": 1.5761261187825037e-05, "loss": 0.6371, "step": 39000 }, { "epoch": 0.21, "learning_rate": 1.5706918382540746e-05, "loss": 0.6434, "step": 39500 }, { "epoch": 0.22, "learning_rate": 1.565257557725645e-05, "loss": 0.645, "step": 40000 }, { "epoch": 0.22, "learning_rate": 1.5598232771972157e-05, "loss": 0.65, "step": 40500 }, { "epoch": 0.22, "learning_rate": 1.5543889966687863e-05, "loss": 0.6432, "step": 41000 }, { "epoch": 0.23, "learning_rate": 1.5489547161403568e-05, "loss": 0.6437, "step": 41500 }, { "epoch": 0.23, "learning_rate": 1.5435204356119273e-05, "loss": 0.6297, "step": 42000 }, { "epoch": 0.23, "learning_rate": 1.538086155083498e-05, "loss": 0.6376, "step": 42500 }, { "epoch": 0.23, "learning_rate": 1.5326518745550684e-05, "loss": 0.6439, "step": 43000 }, { "epoch": 0.24, "learning_rate": 1.527217594026639e-05, "loss": 0.6435, "step": 43500 }, { "epoch": 0.24, "learning_rate": 1.5217833134982097e-05, "loss": 0.6474, "step": 44000 }, { "epoch": 0.24, "learning_rate": 1.51634903296978e-05, "loss": 0.6356, "step": 44500 }, { "epoch": 0.24, "learning_rate": 1.5109147524413506e-05, "loss": 0.6386, "step": 45000 }, { "epoch": 0.25, "learning_rate": 1.5054804719129212e-05, "loss": 0.6349, "step": 45500 }, { "epoch": 0.25, "learning_rate": 1.5000461913844917e-05, "loss": 0.6333, "step": 46000 }, { "epoch": 0.25, "learning_rate": 1.4946119108560624e-05, "loss": 0.6439, "step": 46500 }, { "epoch": 0.26, "learning_rate": 1.4891776303276328e-05, "loss": 0.6334, "step": 47000 }, { "epoch": 0.26, "learning_rate": 1.4837433497992035e-05, "loss": 0.6384, "step": 47500 }, { "epoch": 0.26, "learning_rate": 1.478309069270774e-05, "loss": 0.6481, "step": 48000 }, { "epoch": 0.26, "learning_rate": 1.4728747887423446e-05, "loss": 0.6308, "step": 48500 }, { "epoch": 0.27, "learning_rate": 1.4674405082139151e-05, "loss": 0.6403, "step": 49000 }, { "epoch": 0.27, "learning_rate": 1.4620062276854857e-05, "loss": 0.6347, "step": 49500 }, { "epoch": 0.27, "learning_rate": 1.4565719471570562e-05, "loss": 0.6394, "step": 50000 }, { "epoch": 0.27, "learning_rate": 1.4511376666286268e-05, "loss": 0.6266, "step": 50500 }, { "epoch": 0.28, "learning_rate": 1.4457033861001975e-05, "loss": 0.6298, "step": 51000 }, { "epoch": 0.28, "learning_rate": 1.4402691055717679e-05, "loss": 0.6187, "step": 51500 }, { "epoch": 0.28, "learning_rate": 1.4348348250433386e-05, "loss": 0.6278, "step": 52000 }, { "epoch": 0.29, "learning_rate": 1.4294005445149091e-05, "loss": 0.6331, "step": 52500 }, { "epoch": 0.29, "learning_rate": 1.4239662639864795e-05, "loss": 0.6359, "step": 53000 }, { "epoch": 0.29, "learning_rate": 1.4185319834580502e-05, "loss": 0.6297, "step": 53500 }, { "epoch": 0.29, "learning_rate": 1.4130977029296206e-05, "loss": 0.6307, "step": 54000 }, { "epoch": 0.3, "learning_rate": 1.4076634224011913e-05, "loss": 0.6297, "step": 54500 }, { "epoch": 0.3, "learning_rate": 1.4022291418727619e-05, "loss": 0.6318, "step": 55000 }, { "epoch": 0.3, "learning_rate": 1.3967948613443326e-05, "loss": 0.6346, "step": 55500 }, { "epoch": 0.3, "learning_rate": 1.391360580815903e-05, "loss": 0.6179, "step": 56000 }, { "epoch": 0.31, "learning_rate": 1.3859263002874737e-05, "loss": 0.6215, "step": 56500 }, { "epoch": 0.31, "learning_rate": 1.380492019759044e-05, "loss": 0.6269, "step": 57000 }, { "epoch": 0.31, "learning_rate": 1.3750577392306146e-05, "loss": 0.6204, "step": 57500 }, { "epoch": 0.32, "learning_rate": 1.3696234587021853e-05, "loss": 0.6297, "step": 58000 }, { "epoch": 0.32, "learning_rate": 1.3641891781737557e-05, "loss": 0.6206, "step": 58500 }, { "epoch": 0.32, "learning_rate": 1.3587548976453264e-05, "loss": 0.6251, "step": 59000 }, { "epoch": 0.32, "learning_rate": 1.353320617116897e-05, "loss": 0.6268, "step": 59500 }, { "epoch": 0.33, "learning_rate": 1.3478863365884675e-05, "loss": 0.6182, "step": 60000 }, { "epoch": 0.33, "learning_rate": 1.342452056060038e-05, "loss": 0.6201, "step": 60500 }, { "epoch": 0.33, "learning_rate": 1.3370177755316086e-05, "loss": 0.6138, "step": 61000 }, { "epoch": 0.33, "learning_rate": 1.3315834950031791e-05, "loss": 0.6241, "step": 61500 }, { "epoch": 0.34, "learning_rate": 1.3261492144747497e-05, "loss": 0.6134, "step": 62000 }, { "epoch": 0.34, "learning_rate": 1.3207149339463204e-05, "loss": 0.6235, "step": 62500 }, { "epoch": 0.34, "learning_rate": 1.3152806534178908e-05, "loss": 0.6065, "step": 63000 }, { "epoch": 0.35, "learning_rate": 1.3098463728894615e-05, "loss": 0.6088, "step": 63500 }, { "epoch": 0.35, "learning_rate": 1.304412092361032e-05, "loss": 0.612, "step": 64000 }, { "epoch": 0.35, "learning_rate": 1.2989778118326026e-05, "loss": 0.6185, "step": 64500 }, { "epoch": 0.35, "learning_rate": 1.2935435313041731e-05, "loss": 0.6032, "step": 65000 }, { "epoch": 0.36, "learning_rate": 1.2881092507757435e-05, "loss": 0.6124, "step": 65500 }, { "epoch": 0.36, "learning_rate": 1.2826749702473142e-05, "loss": 0.6094, "step": 66000 }, { "epoch": 0.36, "learning_rate": 1.2772406897188848e-05, "loss": 0.6005, "step": 66500 }, { "epoch": 0.36, "learning_rate": 1.2718064091904555e-05, "loss": 0.6132, "step": 67000 }, { "epoch": 0.37, "learning_rate": 1.2663721286620258e-05, "loss": 0.6124, "step": 67500 }, { "epoch": 0.37, "learning_rate": 1.2609378481335966e-05, "loss": 0.6142, "step": 68000 }, { "epoch": 0.37, "learning_rate": 1.255503567605167e-05, "loss": 0.6104, "step": 68500 }, { "epoch": 0.37, "learning_rate": 1.2500692870767375e-05, "loss": 0.6183, "step": 69000 }, { "epoch": 0.38, "learning_rate": 1.2446350065483082e-05, "loss": 0.607, "step": 69500 }, { "epoch": 0.38, "learning_rate": 1.2392007260198786e-05, "loss": 0.5969, "step": 70000 }, { "epoch": 0.38, "learning_rate": 1.2337664454914493e-05, "loss": 0.6052, "step": 70500 }, { "epoch": 0.39, "learning_rate": 1.2283321649630198e-05, "loss": 0.613, "step": 71000 }, { "epoch": 0.39, "learning_rate": 1.2228978844345904e-05, "loss": 0.5975, "step": 71500 }, { "epoch": 0.39, "learning_rate": 1.217463603906161e-05, "loss": 0.5998, "step": 72000 }, { "epoch": 0.39, "learning_rate": 1.2120293233777316e-05, "loss": 0.5949, "step": 72500 }, { "epoch": 0.4, "learning_rate": 1.206595042849302e-05, "loss": 0.6029, "step": 73000 }, { "epoch": 0.4, "learning_rate": 1.2011607623208726e-05, "loss": 0.6074, "step": 73500 }, { "epoch": 0.4, "learning_rate": 1.1957264817924433e-05, "loss": 0.5985, "step": 74000 }, { "epoch": 0.4, "learning_rate": 1.1902922012640136e-05, "loss": 0.6105, "step": 74500 }, { "epoch": 0.41, "learning_rate": 1.1848579207355844e-05, "loss": 0.6064, "step": 75000 }, { "epoch": 0.41, "learning_rate": 1.1794236402071549e-05, "loss": 0.5912, "step": 75500 }, { "epoch": 0.41, "learning_rate": 1.1739893596787255e-05, "loss": 0.6117, "step": 76000 }, { "epoch": 0.42, "learning_rate": 1.168555079150296e-05, "loss": 0.5947, "step": 76500 }, { "epoch": 0.42, "learning_rate": 1.1631207986218664e-05, "loss": 0.5985, "step": 77000 }, { "epoch": 0.42, "learning_rate": 1.1576865180934371e-05, "loss": 0.6108, "step": 77500 }, { "epoch": 0.42, "learning_rate": 1.1522522375650076e-05, "loss": 0.5972, "step": 78000 }, { "epoch": 0.43, "learning_rate": 1.1468179570365784e-05, "loss": 0.6002, "step": 78500 }, { "epoch": 0.43, "learning_rate": 1.1413836765081487e-05, "loss": 0.5921, "step": 79000 }, { "epoch": 0.43, "learning_rate": 1.1359493959797194e-05, "loss": 0.6026, "step": 79500 }, { "epoch": 0.43, "learning_rate": 1.1305151154512898e-05, "loss": 0.603, "step": 80000 }, { "epoch": 0.44, "learning_rate": 1.1250808349228605e-05, "loss": 0.5963, "step": 80500 }, { "epoch": 0.44, "learning_rate": 1.119646554394431e-05, "loss": 0.5942, "step": 81000 }, { "epoch": 0.44, "learning_rate": 1.1142122738660015e-05, "loss": 0.6006, "step": 81500 }, { "epoch": 0.45, "learning_rate": 1.1087779933375722e-05, "loss": 0.6026, "step": 82000 }, { "epoch": 0.45, "learning_rate": 1.1033437128091427e-05, "loss": 0.5944, "step": 82500 }, { "epoch": 0.45, "learning_rate": 1.0979094322807133e-05, "loss": 0.6031, "step": 83000 }, { "epoch": 0.45, "learning_rate": 1.0924751517522838e-05, "loss": 0.5906, "step": 83500 }, { "epoch": 0.46, "learning_rate": 1.0870408712238545e-05, "loss": 0.595, "step": 84000 }, { "epoch": 0.46, "learning_rate": 1.0816065906954249e-05, "loss": 0.5921, "step": 84500 }, { "epoch": 0.46, "learning_rate": 1.0761723101669956e-05, "loss": 0.5944, "step": 85000 }, { "epoch": 0.46, "learning_rate": 1.0707380296385662e-05, "loss": 0.5752, "step": 85500 }, { "epoch": 0.47, "learning_rate": 1.0653037491101365e-05, "loss": 0.5942, "step": 86000 }, { "epoch": 0.47, "learning_rate": 1.0598694685817072e-05, "loss": 0.5946, "step": 86500 }, { "epoch": 0.47, "learning_rate": 1.0544351880532778e-05, "loss": 0.5911, "step": 87000 }, { "epoch": 0.48, "learning_rate": 1.0490009075248483e-05, "loss": 0.5982, "step": 87500 }, { "epoch": 0.48, "learning_rate": 1.0435666269964189e-05, "loss": 0.5941, "step": 88000 }, { "epoch": 0.48, "learning_rate": 1.0381323464679896e-05, "loss": 0.598, "step": 88500 }, { "epoch": 0.48, "learning_rate": 1.03269806593956e-05, "loss": 0.5845, "step": 89000 }, { "epoch": 0.49, "learning_rate": 1.0272637854111305e-05, "loss": 0.5958, "step": 89500 }, { "epoch": 0.49, "learning_rate": 1.0218295048827012e-05, "loss": 0.584, "step": 90000 }, { "epoch": 0.49, "learning_rate": 1.0163952243542716e-05, "loss": 0.5774, "step": 90500 }, { "epoch": 0.49, "learning_rate": 1.0109609438258423e-05, "loss": 0.5809, "step": 91000 }, { "epoch": 0.5, "learning_rate": 1.0055266632974127e-05, "loss": 0.5882, "step": 91500 }, { "epoch": 0.5, "learning_rate": 1.0000923827689834e-05, "loss": 0.5948, "step": 92000 }, { "epoch": 0.5, "learning_rate": 9.94658102240554e-06, "loss": 0.5809, "step": 92500 }, { "epoch": 0.51, "learning_rate": 9.892238217121245e-06, "loss": 0.5816, "step": 93000 }, { "epoch": 0.51, "learning_rate": 9.83789541183695e-06, "loss": 0.592, "step": 93500 }, { "epoch": 0.51, "learning_rate": 9.783552606552656e-06, "loss": 0.5822, "step": 94000 }, { "epoch": 0.51, "learning_rate": 9.729209801268361e-06, "loss": 0.5809, "step": 94500 }, { "epoch": 0.52, "learning_rate": 9.674866995984069e-06, "loss": 0.5736, "step": 95000 }, { "epoch": 0.52, "learning_rate": 9.620524190699772e-06, "loss": 0.5837, "step": 95500 }, { "epoch": 0.52, "learning_rate": 9.566181385415478e-06, "loss": 0.5812, "step": 96000 }, { "epoch": 0.52, "learning_rate": 9.511838580131183e-06, "loss": 0.5769, "step": 96500 }, { "epoch": 0.53, "learning_rate": 9.45749577484689e-06, "loss": 0.5784, "step": 97000 }, { "epoch": 0.53, "learning_rate": 9.403152969562596e-06, "loss": 0.5872, "step": 97500 }, { "epoch": 0.53, "learning_rate": 9.348810164278301e-06, "loss": 0.5807, "step": 98000 }, { "epoch": 0.54, "learning_rate": 9.294467358994007e-06, "loss": 0.5857, "step": 98500 }, { "epoch": 0.54, "learning_rate": 9.240124553709712e-06, "loss": 0.5706, "step": 99000 }, { "epoch": 0.54, "learning_rate": 9.185781748425418e-06, "loss": 0.5816, "step": 99500 }, { "epoch": 0.54, "learning_rate": 9.131438943141123e-06, "loss": 0.5888, "step": 100000 }, { "epoch": 0.55, "learning_rate": 9.077096137856829e-06, "loss": 0.5799, "step": 100500 }, { "epoch": 0.55, "learning_rate": 9.022753332572534e-06, "loss": 0.5825, "step": 101000 }, { "epoch": 0.55, "learning_rate": 8.968410527288241e-06, "loss": 0.5783, "step": 101500 }, { "epoch": 0.55, "learning_rate": 8.914067722003947e-06, "loss": 0.5749, "step": 102000 }, { "epoch": 0.56, "learning_rate": 8.859724916719652e-06, "loss": 0.5817, "step": 102500 }, { "epoch": 0.56, "learning_rate": 8.805382111435358e-06, "loss": 0.582, "step": 103000 }, { "epoch": 0.56, "learning_rate": 8.751039306151063e-06, "loss": 0.5687, "step": 103500 }, { "epoch": 0.57, "learning_rate": 8.696696500866769e-06, "loss": 0.5702, "step": 104000 }, { "epoch": 0.57, "learning_rate": 8.642353695582474e-06, "loss": 0.5734, "step": 104500 }, { "epoch": 0.57, "learning_rate": 8.58801089029818e-06, "loss": 0.5769, "step": 105000 }, { "epoch": 0.57, "learning_rate": 8.533668085013885e-06, "loss": 0.5648, "step": 105500 }, { "epoch": 0.58, "learning_rate": 8.47932527972959e-06, "loss": 0.5686, "step": 106000 }, { "epoch": 0.58, "learning_rate": 8.424982474445297e-06, "loss": 0.572, "step": 106500 }, { "epoch": 0.58, "learning_rate": 8.370639669161003e-06, "loss": 0.5753, "step": 107000 }, { "epoch": 0.58, "learning_rate": 8.316296863876707e-06, "loss": 0.5745, "step": 107500 }, { "epoch": 0.59, "learning_rate": 8.261954058592412e-06, "loss": 0.5713, "step": 108000 }, { "epoch": 0.59, "learning_rate": 8.20761125330812e-06, "loss": 0.5732, "step": 108500 }, { "epoch": 0.59, "learning_rate": 8.153268448023825e-06, "loss": 0.5623, "step": 109000 }, { "epoch": 0.6, "learning_rate": 8.09892564273953e-06, "loss": 0.5679, "step": 109500 }, { "epoch": 0.6, "learning_rate": 8.044582837455236e-06, "loss": 0.5711, "step": 110000 }, { "epoch": 0.6, "learning_rate": 7.990240032170941e-06, "loss": 0.5773, "step": 110500 }, { "epoch": 0.6, "learning_rate": 7.935897226886648e-06, "loss": 0.5788, "step": 111000 }, { "epoch": 0.61, "learning_rate": 7.881554421602352e-06, "loss": 0.5626, "step": 111500 }, { "epoch": 0.61, "learning_rate": 7.827211616318057e-06, "loss": 0.5688, "step": 112000 }, { "epoch": 0.61, "learning_rate": 7.772868811033763e-06, "loss": 0.5576, "step": 112500 }, { "epoch": 0.61, "learning_rate": 7.71852600574947e-06, "loss": 0.5659, "step": 113000 }, { "epoch": 0.62, "learning_rate": 7.664183200465176e-06, "loss": 0.5523, "step": 113500 }, { "epoch": 0.62, "learning_rate": 7.609840395180881e-06, "loss": 0.5633, "step": 114000 }, { "epoch": 0.62, "learning_rate": 7.5554975898965864e-06, "loss": 0.5677, "step": 114500 }, { "epoch": 0.62, "learning_rate": 7.501154784612292e-06, "loss": 0.5604, "step": 115000 }, { "epoch": 0.63, "learning_rate": 7.4468119793279965e-06, "loss": 0.562, "step": 115500 }, { "epoch": 0.63, "learning_rate": 7.392469174043703e-06, "loss": 0.5699, "step": 116000 }, { "epoch": 0.63, "learning_rate": 7.338126368759408e-06, "loss": 0.5636, "step": 116500 }, { "epoch": 0.64, "learning_rate": 7.2837835634751146e-06, "loss": 0.5534, "step": 117000 }, { "epoch": 0.64, "learning_rate": 7.22944075819082e-06, "loss": 0.563, "step": 117500 }, { "epoch": 0.64, "learning_rate": 7.1750979529065255e-06, "loss": 0.5629, "step": 118000 }, { "epoch": 0.64, "learning_rate": 7.120755147622232e-06, "loss": 0.5682, "step": 118500 }, { "epoch": 0.65, "learning_rate": 7.066412342337937e-06, "loss": 0.5704, "step": 119000 }, { "epoch": 0.65, "learning_rate": 7.012069537053643e-06, "loss": 0.5582, "step": 119500 }, { "epoch": 0.65, "learning_rate": 6.957726731769347e-06, "loss": 0.5696, "step": 120000 }, { "epoch": 0.65, "learning_rate": 6.903383926485054e-06, "loss": 0.5649, "step": 120500 }, { "epoch": 0.66, "learning_rate": 6.849041121200759e-06, "loss": 0.5649, "step": 121000 }, { "epoch": 0.66, "learning_rate": 6.7946983159164645e-06, "loss": 0.5575, "step": 121500 }, { "epoch": 0.66, "learning_rate": 6.740355510632171e-06, "loss": 0.5649, "step": 122000 }, { "epoch": 0.67, "learning_rate": 6.686012705347876e-06, "loss": 0.5571, "step": 122500 }, { "epoch": 0.67, "learning_rate": 6.631669900063582e-06, "loss": 0.5639, "step": 123000 }, { "epoch": 0.67, "learning_rate": 6.577327094779288e-06, "loss": 0.5666, "step": 123500 }, { "epoch": 0.67, "learning_rate": 6.522984289494993e-06, "loss": 0.5539, "step": 124000 }, { "epoch": 0.68, "learning_rate": 6.468641484210698e-06, "loss": 0.5486, "step": 124500 }, { "epoch": 0.68, "learning_rate": 6.4142986789264035e-06, "loss": 0.5669, "step": 125000 }, { "epoch": 0.68, "learning_rate": 6.35995587364211e-06, "loss": 0.5627, "step": 125500 }, { "epoch": 0.68, "learning_rate": 6.305613068357815e-06, "loss": 0.554, "step": 126000 }, { "epoch": 0.69, "learning_rate": 6.251270263073521e-06, "loss": 0.5619, "step": 126500 }, { "epoch": 0.69, "learning_rate": 6.196927457789227e-06, "loss": 0.5525, "step": 127000 }, { "epoch": 0.69, "learning_rate": 6.1425846525049325e-06, "loss": 0.5609, "step": 127500 }, { "epoch": 0.7, "learning_rate": 6.088241847220637e-06, "loss": 0.5538, "step": 128000 }, { "epoch": 0.7, "learning_rate": 6.0338990419363434e-06, "loss": 0.555, "step": 128500 }, { "epoch": 0.7, "learning_rate": 5.979556236652049e-06, "loss": 0.5472, "step": 129000 }, { "epoch": 0.7, "learning_rate": 5.925213431367754e-06, "loss": 0.5593, "step": 129500 }, { "epoch": 0.71, "learning_rate": 5.870870626083461e-06, "loss": 0.5555, "step": 130000 }, { "epoch": 0.71, "learning_rate": 5.816527820799166e-06, "loss": 0.5561, "step": 130500 }, { "epoch": 0.71, "learning_rate": 5.7621850155148716e-06, "loss": 0.5585, "step": 131000 }, { "epoch": 0.71, "learning_rate": 5.707842210230578e-06, "loss": 0.5643, "step": 131500 }, { "epoch": 0.72, "learning_rate": 5.6534994049462825e-06, "loss": 0.548, "step": 132000 }, { "epoch": 0.72, "learning_rate": 5.599156599661988e-06, "loss": 0.5565, "step": 132500 }, { "epoch": 0.72, "learning_rate": 5.544813794377693e-06, "loss": 0.5441, "step": 133000 }, { "epoch": 0.73, "learning_rate": 5.4904709890934e-06, "loss": 0.5526, "step": 133500 }, { "epoch": 0.73, "learning_rate": 5.436128183809105e-06, "loss": 0.5505, "step": 134000 }, { "epoch": 0.73, "learning_rate": 5.381785378524811e-06, "loss": 0.5547, "step": 134500 }, { "epoch": 0.73, "learning_rate": 5.327442573240517e-06, "loss": 0.5512, "step": 135000 }, { "epoch": 0.74, "learning_rate": 5.273099767956222e-06, "loss": 0.5522, "step": 135500 }, { "epoch": 0.74, "learning_rate": 5.218756962671927e-06, "loss": 0.55, "step": 136000 }, { "epoch": 0.74, "learning_rate": 5.164414157387632e-06, "loss": 0.5566, "step": 136500 }, { "epoch": 0.74, "learning_rate": 5.110071352103339e-06, "loss": 0.5552, "step": 137000 }, { "epoch": 0.75, "learning_rate": 5.055728546819044e-06, "loss": 0.5499, "step": 137500 }, { "epoch": 0.75, "learning_rate": 5.00138574153475e-06, "loss": 0.5597, "step": 138000 }, { "epoch": 0.75, "learning_rate": 4.947042936250456e-06, "loss": 0.5447, "step": 138500 }, { "epoch": 0.76, "learning_rate": 4.8927001309661605e-06, "loss": 0.5513, "step": 139000 }, { "epoch": 0.76, "learning_rate": 4.838357325681867e-06, "loss": 0.5435, "step": 139500 }, { "epoch": 0.76, "learning_rate": 4.784014520397572e-06, "loss": 0.5485, "step": 140000 }, { "epoch": 0.76, "learning_rate": 4.729671715113278e-06, "loss": 0.5427, "step": 140500 }, { "epoch": 0.77, "learning_rate": 4.675328909828983e-06, "loss": 0.5565, "step": 141000 }, { "epoch": 0.77, "learning_rate": 4.6209861045446895e-06, "loss": 0.5499, "step": 141500 }, { "epoch": 0.77, "learning_rate": 4.566643299260395e-06, "loss": 0.5421, "step": 142000 }, { "epoch": 0.77, "learning_rate": 4.5123004939761e-06, "loss": 0.54, "step": 142500 }, { "epoch": 0.78, "learning_rate": 4.457957688691806e-06, "loss": 0.5435, "step": 143000 }, { "epoch": 0.78, "learning_rate": 4.403614883407511e-06, "loss": 0.5484, "step": 143500 }, { "epoch": 0.78, "learning_rate": 4.349272078123218e-06, "loss": 0.5478, "step": 144000 }, { "epoch": 0.79, "learning_rate": 4.294929272838923e-06, "loss": 0.5519, "step": 144500 }, { "epoch": 0.79, "learning_rate": 4.2405864675546285e-06, "loss": 0.5531, "step": 145000 }, { "epoch": 0.79, "learning_rate": 4.186243662270334e-06, "loss": 0.5411, "step": 145500 }, { "epoch": 0.79, "learning_rate": 4.1319008569860394e-06, "loss": 0.5424, "step": 146000 }, { "epoch": 0.8, "learning_rate": 4.077558051701746e-06, "loss": 0.5506, "step": 146500 }, { "epoch": 0.8, "learning_rate": 4.02321524641745e-06, "loss": 0.5465, "step": 147000 }, { "epoch": 0.8, "learning_rate": 3.968872441133157e-06, "loss": 0.5448, "step": 147500 }, { "epoch": 0.8, "learning_rate": 3.914529635848862e-06, "loss": 0.5347, "step": 148000 }, { "epoch": 0.81, "learning_rate": 3.8601868305645676e-06, "loss": 0.5429, "step": 148500 }, { "epoch": 0.81, "learning_rate": 3.805844025280273e-06, "loss": 0.5401, "step": 149000 }, { "epoch": 0.81, "learning_rate": 3.751501219995979e-06, "loss": 0.5428, "step": 149500 }, { "epoch": 0.82, "learning_rate": 3.6971584147116848e-06, "loss": 0.5414, "step": 150000 }, { "epoch": 0.82, "learning_rate": 3.6428156094273902e-06, "loss": 0.541, "step": 150500 }, { "epoch": 0.82, "learning_rate": 3.5884728041430957e-06, "loss": 0.5396, "step": 151000 }, { "epoch": 0.82, "learning_rate": 3.534129998858801e-06, "loss": 0.5398, "step": 151500 }, { "epoch": 0.83, "learning_rate": 3.479787193574507e-06, "loss": 0.5393, "step": 152000 }, { "epoch": 0.83, "learning_rate": 3.425444388290213e-06, "loss": 0.5378, "step": 152500 }, { "epoch": 0.83, "learning_rate": 3.371101583005918e-06, "loss": 0.5344, "step": 153000 }, { "epoch": 0.83, "learning_rate": 3.316758777721624e-06, "loss": 0.5418, "step": 153500 }, { "epoch": 0.84, "learning_rate": 3.2624159724373293e-06, "loss": 0.5266, "step": 154000 }, { "epoch": 0.84, "learning_rate": 3.208073167153035e-06, "loss": 0.5376, "step": 154500 }, { "epoch": 0.84, "learning_rate": 3.15373036186874e-06, "loss": 0.5471, "step": 155000 }, { "epoch": 0.85, "learning_rate": 3.099387556584446e-06, "loss": 0.5451, "step": 155500 }, { "epoch": 0.85, "learning_rate": 3.045044751300152e-06, "loss": 0.5451, "step": 156000 }, { "epoch": 0.85, "learning_rate": 2.990701946015858e-06, "loss": 0.5423, "step": 156500 }, { "epoch": 0.85, "learning_rate": 2.9363591407315633e-06, "loss": 0.5332, "step": 157000 }, { "epoch": 0.86, "learning_rate": 2.8820163354472687e-06, "loss": 0.5367, "step": 157500 }, { "epoch": 0.86, "learning_rate": 2.827673530162974e-06, "loss": 0.5366, "step": 158000 }, { "epoch": 0.86, "learning_rate": 2.77333072487868e-06, "loss": 0.5362, "step": 158500 }, { "epoch": 0.86, "learning_rate": 2.718987919594386e-06, "loss": 0.5386, "step": 159000 }, { "epoch": 0.87, "learning_rate": 2.664645114310091e-06, "loss": 0.5381, "step": 159500 }, { "epoch": 0.87, "learning_rate": 2.610302309025797e-06, "loss": 0.5415, "step": 160000 }, { "epoch": 0.87, "learning_rate": 2.5559595037415023e-06, "loss": 0.5308, "step": 160500 }, { "epoch": 0.87, "learning_rate": 2.501616698457208e-06, "loss": 0.5298, "step": 161000 }, { "epoch": 0.88, "learning_rate": 2.4472738931729136e-06, "loss": 0.5209, "step": 161500 }, { "epoch": 0.88, "learning_rate": 2.392931087888619e-06, "loss": 0.5374, "step": 162000 }, { "epoch": 0.88, "learning_rate": 2.338588282604325e-06, "loss": 0.5437, "step": 162500 }, { "epoch": 0.89, "learning_rate": 2.2842454773200304e-06, "loss": 0.5353, "step": 163000 }, { "epoch": 0.89, "learning_rate": 2.2299026720357363e-06, "loss": 0.5377, "step": 163500 }, { "epoch": 0.89, "learning_rate": 2.1755598667514418e-06, "loss": 0.5412, "step": 164000 }, { "epoch": 0.89, "learning_rate": 2.121217061467147e-06, "loss": 0.5236, "step": 164500 }, { "epoch": 0.9, "learning_rate": 2.0668742561828527e-06, "loss": 0.5369, "step": 165000 }, { "epoch": 0.9, "learning_rate": 2.0125314508985585e-06, "loss": 0.529, "step": 165500 }, { "epoch": 0.9, "learning_rate": 1.958188645614264e-06, "loss": 0.5338, "step": 166000 }, { "epoch": 0.9, "learning_rate": 1.9038458403299699e-06, "loss": 0.53, "step": 166500 }, { "epoch": 0.91, "learning_rate": 1.8495030350456753e-06, "loss": 0.5254, "step": 167000 }, { "epoch": 0.91, "learning_rate": 1.795160229761381e-06, "loss": 0.5382, "step": 167500 }, { "epoch": 0.91, "learning_rate": 1.7408174244770865e-06, "loss": 0.541, "step": 168000 }, { "epoch": 0.92, "learning_rate": 1.6864746191927921e-06, "loss": 0.5296, "step": 168500 }, { "epoch": 0.92, "learning_rate": 1.6321318139084976e-06, "loss": 0.5304, "step": 169000 }, { "epoch": 0.92, "learning_rate": 1.5777890086242035e-06, "loss": 0.5253, "step": 169500 }, { "epoch": 0.92, "learning_rate": 1.523446203339909e-06, "loss": 0.5189, "step": 170000 }, { "epoch": 0.93, "learning_rate": 1.4691033980556146e-06, "loss": 0.5359, "step": 170500 }, { "epoch": 0.93, "learning_rate": 1.41476059277132e-06, "loss": 0.5368, "step": 171000 }, { "epoch": 0.93, "learning_rate": 1.360417787487026e-06, "loss": 0.5265, "step": 171500 }, { "epoch": 0.93, "learning_rate": 1.3060749822027314e-06, "loss": 0.5289, "step": 172000 }, { "epoch": 0.94, "learning_rate": 1.251732176918437e-06, "loss": 0.5294, "step": 172500 }, { "epoch": 0.94, "learning_rate": 1.1973893716341425e-06, "loss": 0.5293, "step": 173000 }, { "epoch": 0.94, "learning_rate": 1.1430465663498482e-06, "loss": 0.5342, "step": 173500 }, { "epoch": 0.95, "learning_rate": 1.0887037610655538e-06, "loss": 0.5182, "step": 174000 }, { "epoch": 0.95, "learning_rate": 1.0343609557812593e-06, "loss": 0.5311, "step": 174500 }, { "epoch": 0.95, "learning_rate": 9.80018150496965e-07, "loss": 0.5386, "step": 175000 }, { "epoch": 0.95, "learning_rate": 9.256753452126706e-07, "loss": 0.5291, "step": 175500 }, { "epoch": 0.96, "learning_rate": 8.713325399283762e-07, "loss": 0.5233, "step": 176000 }, { "epoch": 0.96, "learning_rate": 8.169897346440818e-07, "loss": 0.5167, "step": 176500 }, { "epoch": 0.96, "learning_rate": 7.626469293597875e-07, "loss": 0.5339, "step": 177000 }, { "epoch": 0.96, "learning_rate": 7.083041240754932e-07, "loss": 0.5263, "step": 177500 }, { "epoch": 0.97, "learning_rate": 6.539613187911987e-07, "loss": 0.5255, "step": 178000 }, { "epoch": 0.97, "learning_rate": 5.996185135069043e-07, "loss": 0.5292, "step": 178500 }, { "epoch": 0.97, "learning_rate": 5.452757082226099e-07, "loss": 0.5247, "step": 179000 }, { "epoch": 0.98, "learning_rate": 4.909329029383155e-07, "loss": 0.5293, "step": 179500 }, { "epoch": 0.98, "learning_rate": 4.3659009765402114e-07, "loss": 0.5278, "step": 180000 }, { "epoch": 0.98, "learning_rate": 3.8224729236972675e-07, "loss": 0.5151, "step": 180500 }, { "epoch": 0.98, "learning_rate": 3.279044870854323e-07, "loss": 0.515, "step": 181000 }, { "epoch": 0.99, "learning_rate": 2.73561681801138e-07, "loss": 0.5305, "step": 181500 }, { "epoch": 0.99, "learning_rate": 2.192188765168436e-07, "loss": 0.5231, "step": 182000 }, { "epoch": 0.99, "learning_rate": 1.6487607123254918e-07, "loss": 0.526, "step": 182500 }, { "epoch": 0.99, "learning_rate": 1.1053326594825478e-07, "loss": 0.5243, "step": 183000 }, { "epoch": 1.0, "learning_rate": 5.6190460663960404e-08, "loss": 0.5192, "step": 183500 }, { "epoch": 1.0, "learning_rate": 1.8476553796660092e-09, "loss": 0.5343, "step": 184000 }, { "epoch": 1.0, "eval_accuracy": 0.7746184738955824, "eval_loss": 0.587454617023468, "eval_runtime": 72.5338, "eval_samples_per_second": 514.932, "eval_steps_per_second": 64.37, "step": 184017 } ], "max_steps": 184017, "num_train_epochs": 1, "total_flos": 3.8733781342346496e+17, "trial_name": null, "trial_params": null }