{ "best_metric": 0.5343035343035343, "best_model_checkpoint": "./final/question/text2sql-t5-base-schema-generator/checkpoint-24000", "epoch": 102.56410256410257, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.337783711615487e-08, "loss": 2.3603, "step": 1 }, { "epoch": 0.03, "learning_rate": 1.335113484646195e-07, "loss": 2.6238, "step": 4 }, { "epoch": 0.07, "learning_rate": 2.67022696929239e-07, "loss": 2.5591, "step": 8 }, { "epoch": 0.1, "learning_rate": 4.005340453938585e-07, "loss": 2.563, "step": 12 }, { "epoch": 0.14, "learning_rate": 5.34045393858478e-07, "loss": 2.7499, "step": 16 }, { "epoch": 0.17, "learning_rate": 6.675567423230975e-07, "loss": 2.4741, "step": 20 }, { "epoch": 0.21, "learning_rate": 8.01068090787717e-07, "loss": 2.3916, "step": 24 }, { "epoch": 0.24, "learning_rate": 9.345794392523364e-07, "loss": 2.568, "step": 28 }, { "epoch": 0.27, "learning_rate": 1.068090787716956e-06, "loss": 2.5685, "step": 32 }, { "epoch": 0.31, "learning_rate": 1.2016021361815755e-06, "loss": 2.4451, "step": 36 }, { "epoch": 0.34, "learning_rate": 1.335113484646195e-06, "loss": 2.6357, "step": 40 }, { "epoch": 0.38, "learning_rate": 1.4686248331108143e-06, "loss": 2.4322, "step": 44 }, { "epoch": 0.41, "learning_rate": 1.602136181575434e-06, "loss": 2.4776, "step": 48 }, { "epoch": 0.44, "learning_rate": 1.7356475300400534e-06, "loss": 2.1708, "step": 52 }, { "epoch": 0.48, "learning_rate": 1.8691588785046728e-06, "loss": 2.3881, "step": 56 }, { "epoch": 0.51, "learning_rate": 2.0026702269692925e-06, "loss": 2.181, "step": 60 }, { "epoch": 0.55, "learning_rate": 2.136181575433912e-06, "loss": 2.2601, "step": 64 }, { "epoch": 0.58, "learning_rate": 2.2696929238985316e-06, "loss": 2.234, "step": 68 }, { "epoch": 0.62, "learning_rate": 2.403204272363151e-06, "loss": 2.3423, "step": 72 }, { "epoch": 0.65, "learning_rate": 2.5367156208277703e-06, "loss": 2.2052, "step": 76 }, { "epoch": 0.68, "learning_rate": 2.67022696929239e-06, "loss": 2.3827, "step": 80 }, { "epoch": 0.72, "learning_rate": 2.8037383177570094e-06, "loss": 1.9671, "step": 84 }, { "epoch": 0.75, "learning_rate": 2.9372496662216287e-06, "loss": 1.9526, "step": 88 }, { "epoch": 0.79, "learning_rate": 3.0707610146862484e-06, "loss": 1.9553, "step": 92 }, { "epoch": 0.82, "learning_rate": 3.204272363150868e-06, "loss": 2.0414, "step": 96 }, { "epoch": 0.85, "learning_rate": 3.337783711615487e-06, "loss": 2.3561, "step": 100 }, { "epoch": 0.89, "learning_rate": 3.471295060080107e-06, "loss": 1.9045, "step": 104 }, { "epoch": 0.92, "learning_rate": 3.6048064085447266e-06, "loss": 1.841, "step": 108 }, { "epoch": 0.96, "learning_rate": 3.7383177570093455e-06, "loss": 1.7683, "step": 112 }, { "epoch": 0.99, "learning_rate": 3.871829105473966e-06, "loss": 1.8482, "step": 116 }, { "epoch": 1.03, "learning_rate": 4.005340453938585e-06, "loss": 1.7921, "step": 120 }, { "epoch": 1.06, "learning_rate": 4.138851802403204e-06, "loss": 1.7806, "step": 124 }, { "epoch": 1.09, "learning_rate": 4.272363150867824e-06, "loss": 1.6736, "step": 128 }, { "epoch": 1.13, "learning_rate": 4.405874499332444e-06, "loss": 1.8067, "step": 132 }, { "epoch": 1.16, "learning_rate": 4.539385847797063e-06, "loss": 1.7831, "step": 136 }, { "epoch": 1.2, "learning_rate": 4.6728971962616825e-06, "loss": 1.7054, "step": 140 }, { "epoch": 1.23, "learning_rate": 4.806408544726302e-06, "loss": 1.63, "step": 144 }, { "epoch": 1.26, "learning_rate": 4.939919893190922e-06, "loss": 1.5281, "step": 148 }, { "epoch": 1.3, "learning_rate": 5.0734312416555405e-06, "loss": 1.5909, "step": 152 }, { "epoch": 1.33, "learning_rate": 5.206942590120161e-06, "loss": 1.6147, "step": 156 }, { "epoch": 1.37, "learning_rate": 5.34045393858478e-06, "loss": 1.4966, "step": 160 }, { "epoch": 1.4, "learning_rate": 5.473965287049399e-06, "loss": 1.3448, "step": 164 }, { "epoch": 1.44, "learning_rate": 5.607476635514019e-06, "loss": 1.449, "step": 168 }, { "epoch": 1.47, "learning_rate": 5.740987983978639e-06, "loss": 1.388, "step": 172 }, { "epoch": 1.5, "learning_rate": 5.874499332443257e-06, "loss": 1.454, "step": 176 }, { "epoch": 1.54, "learning_rate": 6.0080106809078775e-06, "loss": 1.3239, "step": 180 }, { "epoch": 1.57, "learning_rate": 6.141522029372497e-06, "loss": 1.3004, "step": 184 }, { "epoch": 1.61, "learning_rate": 6.275033377837117e-06, "loss": 1.3924, "step": 188 }, { "epoch": 1.64, "learning_rate": 6.408544726301736e-06, "loss": 1.3636, "step": 192 }, { "epoch": 1.68, "learning_rate": 6.542056074766355e-06, "loss": 1.2384, "step": 196 }, { "epoch": 1.71, "learning_rate": 6.675567423230974e-06, "loss": 1.2257, "step": 200 }, { "epoch": 1.74, "learning_rate": 6.809078771695594e-06, "loss": 1.2446, "step": 204 }, { "epoch": 1.78, "learning_rate": 6.942590120160214e-06, "loss": 1.4018, "step": 208 }, { "epoch": 1.81, "learning_rate": 7.076101468624834e-06, "loss": 1.2668, "step": 212 }, { "epoch": 1.85, "learning_rate": 7.209612817089453e-06, "loss": 1.2525, "step": 216 }, { "epoch": 1.88, "learning_rate": 7.343124165554073e-06, "loss": 1.1977, "step": 220 }, { "epoch": 1.91, "learning_rate": 7.476635514018691e-06, "loss": 1.1998, "step": 224 }, { "epoch": 1.95, "learning_rate": 7.610146862483311e-06, "loss": 1.1087, "step": 228 }, { "epoch": 1.98, "learning_rate": 7.743658210947931e-06, "loss": 1.0731, "step": 232 }, { "epoch": 2.02, "learning_rate": 7.87716955941255e-06, "loss": 1.0327, "step": 236 }, { "epoch": 2.05, "learning_rate": 8.01068090787717e-06, "loss": 1.0915, "step": 240 }, { "epoch": 2.09, "learning_rate": 8.14419225634179e-06, "loss": 1.0304, "step": 244 }, { "epoch": 2.12, "learning_rate": 8.277703604806409e-06, "loss": 1.0082, "step": 248 }, { "epoch": 2.15, "learning_rate": 8.411214953271028e-06, "loss": 1.0408, "step": 252 }, { "epoch": 2.19, "learning_rate": 8.544726301735647e-06, "loss": 0.983, "step": 256 }, { "epoch": 2.22, "learning_rate": 8.678237650200267e-06, "loss": 0.9362, "step": 260 }, { "epoch": 2.26, "learning_rate": 8.811748998664888e-06, "loss": 0.9181, "step": 264 }, { "epoch": 2.29, "learning_rate": 8.945260347129507e-06, "loss": 0.8616, "step": 268 }, { "epoch": 2.32, "learning_rate": 9.078771695594126e-06, "loss": 0.8992, "step": 272 }, { "epoch": 2.36, "learning_rate": 9.212283044058744e-06, "loss": 0.8689, "step": 276 }, { "epoch": 2.39, "learning_rate": 9.345794392523365e-06, "loss": 0.8462, "step": 280 }, { "epoch": 2.43, "learning_rate": 9.479305740987984e-06, "loss": 0.9342, "step": 284 }, { "epoch": 2.46, "learning_rate": 9.612817089452604e-06, "loss": 0.8598, "step": 288 }, { "epoch": 2.5, "learning_rate": 9.746328437917223e-06, "loss": 0.8383, "step": 292 }, { "epoch": 2.53, "learning_rate": 9.879839786381844e-06, "loss": 0.7935, "step": 296 }, { "epoch": 2.56, "learning_rate": 1.0013351134846462e-05, "loss": 0.7795, "step": 300 }, { "epoch": 2.6, "learning_rate": 1.0146862483311081e-05, "loss": 0.8016, "step": 304 }, { "epoch": 2.63, "learning_rate": 1.02803738317757e-05, "loss": 0.743, "step": 308 }, { "epoch": 2.67, "learning_rate": 1.0413885180240321e-05, "loss": 0.7807, "step": 312 }, { "epoch": 2.7, "learning_rate": 1.054739652870494e-05, "loss": 0.7376, "step": 316 }, { "epoch": 2.74, "learning_rate": 1.068090787716956e-05, "loss": 0.7415, "step": 320 }, { "epoch": 2.77, "learning_rate": 1.081441922563418e-05, "loss": 0.7283, "step": 324 }, { "epoch": 2.8, "learning_rate": 1.0947930574098799e-05, "loss": 0.6967, "step": 328 }, { "epoch": 2.84, "learning_rate": 1.1081441922563418e-05, "loss": 0.6988, "step": 332 }, { "epoch": 2.87, "learning_rate": 1.1214953271028037e-05, "loss": 0.7329, "step": 336 }, { "epoch": 2.91, "learning_rate": 1.1348464619492657e-05, "loss": 0.6674, "step": 340 }, { "epoch": 2.94, "learning_rate": 1.1481975967957278e-05, "loss": 0.6287, "step": 344 }, { "epoch": 2.97, "learning_rate": 1.1615487316421897e-05, "loss": 0.7225, "step": 348 }, { "epoch": 3.01, "learning_rate": 1.1748998664886515e-05, "loss": 0.6345, "step": 352 }, { "epoch": 3.04, "learning_rate": 1.1882510013351136e-05, "loss": 0.6217, "step": 356 }, { "epoch": 3.08, "learning_rate": 1.2016021361815755e-05, "loss": 0.6306, "step": 360 }, { "epoch": 3.11, "learning_rate": 1.2149532710280374e-05, "loss": 0.5966, "step": 364 }, { "epoch": 3.15, "learning_rate": 1.2283044058744994e-05, "loss": 0.6166, "step": 368 }, { "epoch": 3.18, "learning_rate": 1.2416555407209613e-05, "loss": 0.588, "step": 372 }, { "epoch": 3.21, "learning_rate": 1.2550066755674234e-05, "loss": 0.6105, "step": 376 }, { "epoch": 3.25, "learning_rate": 1.2683578104138852e-05, "loss": 0.5674, "step": 380 }, { "epoch": 3.28, "learning_rate": 1.2817089452603473e-05, "loss": 0.5682, "step": 384 }, { "epoch": 3.32, "learning_rate": 1.295060080106809e-05, "loss": 0.5839, "step": 388 }, { "epoch": 3.35, "learning_rate": 1.308411214953271e-05, "loss": 0.5281, "step": 392 }, { "epoch": 3.38, "learning_rate": 1.321762349799733e-05, "loss": 0.5438, "step": 396 }, { "epoch": 3.42, "learning_rate": 1.3351134846461948e-05, "loss": 0.544, "step": 400 }, { "epoch": 3.45, "learning_rate": 1.348464619492657e-05, "loss": 0.5287, "step": 404 }, { "epoch": 3.49, "learning_rate": 1.3618157543391189e-05, "loss": 0.5177, "step": 408 }, { "epoch": 3.52, "learning_rate": 1.375166889185581e-05, "loss": 0.5275, "step": 412 }, { "epoch": 3.56, "learning_rate": 1.3885180240320427e-05, "loss": 0.5346, "step": 416 }, { "epoch": 3.59, "learning_rate": 1.4018691588785047e-05, "loss": 0.5277, "step": 420 }, { "epoch": 3.62, "learning_rate": 1.4152202937249668e-05, "loss": 0.5483, "step": 424 }, { "epoch": 3.66, "learning_rate": 1.4285714285714285e-05, "loss": 0.5117, "step": 428 }, { "epoch": 3.69, "learning_rate": 1.4419225634178906e-05, "loss": 0.4839, "step": 432 }, { "epoch": 3.73, "learning_rate": 1.4552736982643526e-05, "loss": 0.4601, "step": 436 }, { "epoch": 3.76, "learning_rate": 1.4686248331108147e-05, "loss": 0.4971, "step": 440 }, { "epoch": 3.79, "learning_rate": 1.4819759679572764e-05, "loss": 0.4743, "step": 444 }, { "epoch": 3.83, "learning_rate": 1.4953271028037382e-05, "loss": 0.4872, "step": 448 }, { "epoch": 3.86, "learning_rate": 1.5086782376502003e-05, "loss": 0.4172, "step": 452 }, { "epoch": 3.9, "learning_rate": 1.5220293724966622e-05, "loss": 0.4379, "step": 456 }, { "epoch": 3.93, "learning_rate": 1.5353805073431243e-05, "loss": 0.4265, "step": 460 }, { "epoch": 3.97, "learning_rate": 1.5487316421895863e-05, "loss": 0.4154, "step": 464 }, { "epoch": 4.0, "learning_rate": 1.5620827770360482e-05, "loss": 0.4336, "step": 468 }, { "epoch": 4.03, "learning_rate": 1.57543391188251e-05, "loss": 0.4155, "step": 472 }, { "epoch": 4.07, "learning_rate": 1.588785046728972e-05, "loss": 0.469, "step": 476 }, { "epoch": 4.1, "learning_rate": 1.602136181575434e-05, "loss": 0.443, "step": 480 }, { "epoch": 4.14, "learning_rate": 1.615487316421896e-05, "loss": 0.4061, "step": 484 }, { "epoch": 4.17, "learning_rate": 1.628838451268358e-05, "loss": 0.417, "step": 488 }, { "epoch": 4.21, "learning_rate": 1.6421895861148198e-05, "loss": 0.3833, "step": 492 }, { "epoch": 4.24, "learning_rate": 1.6555407209612817e-05, "loss": 0.4274, "step": 496 }, { "epoch": 4.27, "learning_rate": 1.6688918558077437e-05, "loss": 0.3719, "step": 500 }, { "epoch": 4.31, "learning_rate": 1.6822429906542056e-05, "loss": 0.4054, "step": 504 }, { "epoch": 4.34, "learning_rate": 1.6955941255006675e-05, "loss": 0.3914, "step": 508 }, { "epoch": 4.38, "learning_rate": 1.7089452603471295e-05, "loss": 0.4241, "step": 512 }, { "epoch": 4.41, "learning_rate": 1.7222963951935918e-05, "loss": 0.3763, "step": 516 }, { "epoch": 4.44, "learning_rate": 1.7356475300400533e-05, "loss": 0.3755, "step": 520 }, { "epoch": 4.48, "learning_rate": 1.7489986648865153e-05, "loss": 0.3663, "step": 524 }, { "epoch": 4.51, "learning_rate": 1.7623497997329775e-05, "loss": 0.3577, "step": 528 }, { "epoch": 4.55, "learning_rate": 1.775700934579439e-05, "loss": 0.3447, "step": 532 }, { "epoch": 4.58, "learning_rate": 1.7890520694259014e-05, "loss": 0.3899, "step": 536 }, { "epoch": 4.62, "learning_rate": 1.8024032042723633e-05, "loss": 0.3441, "step": 540 }, { "epoch": 4.65, "learning_rate": 1.8157543391188253e-05, "loss": 0.3935, "step": 544 }, { "epoch": 4.68, "learning_rate": 1.8291054739652872e-05, "loss": 0.3579, "step": 548 }, { "epoch": 4.72, "learning_rate": 1.8424566088117488e-05, "loss": 0.3555, "step": 552 }, { "epoch": 4.75, "learning_rate": 1.855807743658211e-05, "loss": 0.3561, "step": 556 }, { "epoch": 4.79, "learning_rate": 1.869158878504673e-05, "loss": 0.3363, "step": 560 }, { "epoch": 4.82, "learning_rate": 1.882510013351135e-05, "loss": 0.358, "step": 564 }, { "epoch": 4.85, "learning_rate": 1.895861148197597e-05, "loss": 0.3316, "step": 568 }, { "epoch": 4.89, "learning_rate": 1.9092122830440588e-05, "loss": 0.36, "step": 572 }, { "epoch": 4.92, "learning_rate": 1.9225634178905207e-05, "loss": 0.3553, "step": 576 }, { "epoch": 4.96, "learning_rate": 1.9359145527369827e-05, "loss": 0.3348, "step": 580 }, { "epoch": 4.99, "learning_rate": 1.9492656875834446e-05, "loss": 0.3186, "step": 584 }, { "epoch": 5.03, "learning_rate": 1.9626168224299065e-05, "loss": 0.2991, "step": 588 }, { "epoch": 5.06, "learning_rate": 1.9759679572763688e-05, "loss": 0.3197, "step": 592 }, { "epoch": 5.09, "learning_rate": 1.9893190921228304e-05, "loss": 0.3489, "step": 596 }, { "epoch": 5.13, "learning_rate": 2.0026702269692923e-05, "loss": 0.3344, "step": 600 }, { "epoch": 5.16, "learning_rate": 2.0160213618157546e-05, "loss": 0.3118, "step": 604 }, { "epoch": 5.2, "learning_rate": 2.0293724966622162e-05, "loss": 0.2895, "step": 608 }, { "epoch": 5.23, "learning_rate": 2.0427236315086785e-05, "loss": 0.3054, "step": 612 }, { "epoch": 5.26, "learning_rate": 2.05607476635514e-05, "loss": 0.2828, "step": 616 }, { "epoch": 5.3, "learning_rate": 2.0694259012016024e-05, "loss": 0.305, "step": 620 }, { "epoch": 5.33, "learning_rate": 2.0827770360480643e-05, "loss": 0.3166, "step": 624 }, { "epoch": 5.37, "learning_rate": 2.096128170894526e-05, "loss": 0.3252, "step": 628 }, { "epoch": 5.4, "learning_rate": 2.109479305740988e-05, "loss": 0.3304, "step": 632 }, { "epoch": 5.44, "learning_rate": 2.12283044058745e-05, "loss": 0.2743, "step": 636 }, { "epoch": 5.47, "learning_rate": 2.136181575433912e-05, "loss": 0.2986, "step": 640 }, { "epoch": 5.5, "learning_rate": 2.149532710280374e-05, "loss": 0.293, "step": 644 }, { "epoch": 5.54, "learning_rate": 2.162883845126836e-05, "loss": 0.2777, "step": 648 }, { "epoch": 5.57, "learning_rate": 2.1762349799732978e-05, "loss": 0.2572, "step": 652 }, { "epoch": 5.61, "learning_rate": 2.1895861148197598e-05, "loss": 0.2667, "step": 656 }, { "epoch": 5.64, "learning_rate": 2.2029372496662217e-05, "loss": 0.2817, "step": 660 }, { "epoch": 5.68, "learning_rate": 2.2162883845126836e-05, "loss": 0.2712, "step": 664 }, { "epoch": 5.71, "learning_rate": 2.229639519359146e-05, "loss": 0.2583, "step": 668 }, { "epoch": 5.74, "learning_rate": 2.2429906542056075e-05, "loss": 0.2684, "step": 672 }, { "epoch": 5.78, "learning_rate": 2.2563417890520698e-05, "loss": 0.2811, "step": 676 }, { "epoch": 5.81, "learning_rate": 2.2696929238985313e-05, "loss": 0.2657, "step": 680 }, { "epoch": 5.85, "learning_rate": 2.2830440587449933e-05, "loss": 0.2856, "step": 684 }, { "epoch": 5.88, "learning_rate": 2.2963951935914556e-05, "loss": 0.2647, "step": 688 }, { "epoch": 5.91, "learning_rate": 2.309746328437917e-05, "loss": 0.2373, "step": 692 }, { "epoch": 5.95, "learning_rate": 2.3230974632843794e-05, "loss": 0.2789, "step": 696 }, { "epoch": 5.98, "learning_rate": 2.3364485981308414e-05, "loss": 0.2637, "step": 700 }, { "epoch": 6.02, "learning_rate": 2.349799732977303e-05, "loss": 0.2691, "step": 704 }, { "epoch": 6.05, "learning_rate": 2.3631508678237652e-05, "loss": 0.2516, "step": 708 }, { "epoch": 6.09, "learning_rate": 2.376502002670227e-05, "loss": 0.2382, "step": 712 }, { "epoch": 6.12, "learning_rate": 2.389853137516689e-05, "loss": 0.2398, "step": 716 }, { "epoch": 6.15, "learning_rate": 2.403204272363151e-05, "loss": 0.2353, "step": 720 }, { "epoch": 6.19, "learning_rate": 2.416555407209613e-05, "loss": 0.2257, "step": 724 }, { "epoch": 6.22, "learning_rate": 2.429906542056075e-05, "loss": 0.2939, "step": 728 }, { "epoch": 6.26, "learning_rate": 2.4432576769025368e-05, "loss": 0.2483, "step": 732 }, { "epoch": 6.29, "learning_rate": 2.4566088117489988e-05, "loss": 0.2338, "step": 736 }, { "epoch": 6.32, "learning_rate": 2.4699599465954607e-05, "loss": 0.2811, "step": 740 }, { "epoch": 6.36, "learning_rate": 2.4833110814419226e-05, "loss": 0.2147, "step": 744 }, { "epoch": 6.39, "learning_rate": 2.4966622162883846e-05, "loss": 0.1995, "step": 748 }, { "epoch": 6.43, "learning_rate": 2.5100133511348468e-05, "loss": 0.231, "step": 752 }, { "epoch": 6.46, "learning_rate": 2.5233644859813084e-05, "loss": 0.2072, "step": 756 }, { "epoch": 6.5, "learning_rate": 2.5367156208277704e-05, "loss": 0.2304, "step": 760 }, { "epoch": 6.53, "learning_rate": 2.5500667556742326e-05, "loss": 0.2172, "step": 764 }, { "epoch": 6.56, "learning_rate": 2.5634178905206946e-05, "loss": 0.2318, "step": 768 }, { "epoch": 6.6, "learning_rate": 2.576769025367156e-05, "loss": 0.2285, "step": 772 }, { "epoch": 6.63, "learning_rate": 2.590120160213618e-05, "loss": 0.217, "step": 776 }, { "epoch": 6.67, "learning_rate": 2.6034712950600804e-05, "loss": 0.1973, "step": 780 }, { "epoch": 6.7, "learning_rate": 2.616822429906542e-05, "loss": 0.2131, "step": 784 }, { "epoch": 6.74, "learning_rate": 2.630173564753004e-05, "loss": 0.2343, "step": 788 }, { "epoch": 6.77, "learning_rate": 2.643524699599466e-05, "loss": 0.2307, "step": 792 }, { "epoch": 6.8, "learning_rate": 2.656875834445928e-05, "loss": 0.224, "step": 796 }, { "epoch": 6.84, "learning_rate": 2.6702269692923897e-05, "loss": 0.2008, "step": 800 }, { "epoch": 6.87, "learning_rate": 2.683578104138852e-05, "loss": 0.2024, "step": 804 }, { "epoch": 6.91, "learning_rate": 2.696929238985314e-05, "loss": 0.2134, "step": 808 }, { "epoch": 6.94, "learning_rate": 2.7102803738317755e-05, "loss": 0.1942, "step": 812 }, { "epoch": 6.97, "learning_rate": 2.7236315086782378e-05, "loss": 0.2103, "step": 816 }, { "epoch": 7.01, "learning_rate": 2.7369826435246997e-05, "loss": 0.212, "step": 820 }, { "epoch": 7.04, "learning_rate": 2.750333778371162e-05, "loss": 0.2368, "step": 824 }, { "epoch": 7.08, "learning_rate": 2.7636849132176236e-05, "loss": 0.1905, "step": 828 }, { "epoch": 7.11, "learning_rate": 2.7770360480640855e-05, "loss": 0.1892, "step": 832 }, { "epoch": 7.15, "learning_rate": 2.7903871829105478e-05, "loss": 0.2069, "step": 836 }, { "epoch": 7.18, "learning_rate": 2.8037383177570094e-05, "loss": 0.1957, "step": 840 }, { "epoch": 7.21, "learning_rate": 2.8170894526034713e-05, "loss": 0.2093, "step": 844 }, { "epoch": 7.25, "learning_rate": 2.8304405874499336e-05, "loss": 0.2003, "step": 848 }, { "epoch": 7.28, "learning_rate": 2.8437917222963955e-05, "loss": 0.2081, "step": 852 }, { "epoch": 7.32, "learning_rate": 2.857142857142857e-05, "loss": 0.1746, "step": 856 }, { "epoch": 7.35, "learning_rate": 2.8704939919893194e-05, "loss": 0.1738, "step": 860 }, { "epoch": 7.38, "learning_rate": 2.8838451268357813e-05, "loss": 0.1868, "step": 864 }, { "epoch": 7.42, "learning_rate": 2.897196261682243e-05, "loss": 0.1829, "step": 868 }, { "epoch": 7.45, "learning_rate": 2.910547396528705e-05, "loss": 0.1794, "step": 872 }, { "epoch": 7.49, "learning_rate": 2.923898531375167e-05, "loss": 0.1696, "step": 876 }, { "epoch": 7.52, "learning_rate": 2.9372496662216294e-05, "loss": 0.1907, "step": 880 }, { "epoch": 7.56, "learning_rate": 2.9506008010680906e-05, "loss": 0.1813, "step": 884 }, { "epoch": 7.59, "learning_rate": 2.963951935914553e-05, "loss": 0.1519, "step": 888 }, { "epoch": 7.62, "learning_rate": 2.977303070761015e-05, "loss": 0.1903, "step": 892 }, { "epoch": 7.66, "learning_rate": 2.9906542056074764e-05, "loss": 0.1712, "step": 896 }, { "epoch": 7.69, "learning_rate": 3.0040053404539387e-05, "loss": 0.1914, "step": 900 }, { "epoch": 7.73, "learning_rate": 3.0173564753004006e-05, "loss": 0.164, "step": 904 }, { "epoch": 7.76, "learning_rate": 3.030707610146863e-05, "loss": 0.1671, "step": 908 }, { "epoch": 7.79, "learning_rate": 3.0440587449933245e-05, "loss": 0.1688, "step": 912 }, { "epoch": 7.83, "learning_rate": 3.0574098798397864e-05, "loss": 0.1588, "step": 916 }, { "epoch": 7.86, "learning_rate": 3.070761014686249e-05, "loss": 0.1685, "step": 920 }, { "epoch": 7.9, "learning_rate": 3.08411214953271e-05, "loss": 0.1704, "step": 924 }, { "epoch": 7.93, "learning_rate": 3.0974632843791726e-05, "loss": 0.1467, "step": 928 }, { "epoch": 7.97, "learning_rate": 3.110814419225634e-05, "loss": 0.1631, "step": 932 }, { "epoch": 8.0, "learning_rate": 3.1241655540720964e-05, "loss": 0.1584, "step": 936 }, { "epoch": 8.03, "learning_rate": 3.137516688918558e-05, "loss": 0.1595, "step": 940 }, { "epoch": 8.07, "learning_rate": 3.15086782376502e-05, "loss": 0.1576, "step": 944 }, { "epoch": 8.1, "learning_rate": 3.1642189586114826e-05, "loss": 0.173, "step": 948 }, { "epoch": 8.14, "learning_rate": 3.177570093457944e-05, "loss": 0.1775, "step": 952 }, { "epoch": 8.17, "learning_rate": 3.190921228304406e-05, "loss": 0.1542, "step": 956 }, { "epoch": 8.21, "learning_rate": 3.204272363150868e-05, "loss": 0.129, "step": 960 }, { "epoch": 8.24, "learning_rate": 3.2176234979973296e-05, "loss": 0.1671, "step": 964 }, { "epoch": 8.27, "learning_rate": 3.230974632843792e-05, "loss": 0.1541, "step": 968 }, { "epoch": 8.31, "learning_rate": 3.244325767690254e-05, "loss": 0.1524, "step": 972 }, { "epoch": 8.34, "learning_rate": 3.257676902536716e-05, "loss": 0.1559, "step": 976 }, { "epoch": 8.38, "learning_rate": 3.2710280373831774e-05, "loss": 0.1447, "step": 980 }, { "epoch": 8.41, "learning_rate": 3.2843791722296396e-05, "loss": 0.1596, "step": 984 }, { "epoch": 8.44, "learning_rate": 3.297730307076102e-05, "loss": 0.1287, "step": 988 }, { "epoch": 8.48, "learning_rate": 3.3110814419225635e-05, "loss": 0.1448, "step": 992 }, { "epoch": 8.51, "learning_rate": 3.324432576769025e-05, "loss": 0.1479, "step": 996 }, { "epoch": 8.55, "learning_rate": 3.3377837116154874e-05, "loss": 0.1587, "step": 1000 }, { "epoch": 8.55, "eval_exact_match": 0.35654885654885654, "eval_loss": 0.4734349548816681, "eval_runtime": 95.1537, "eval_samples_per_second": 10.11, "step": 1000 }, { "epoch": 8.58, "learning_rate": 3.3511348464619496e-05, "loss": 0.1445, "step": 1004 }, { "epoch": 8.62, "learning_rate": 3.364485981308411e-05, "loss": 0.1414, "step": 1008 }, { "epoch": 8.65, "learning_rate": 3.3778371161548735e-05, "loss": 0.153, "step": 1012 }, { "epoch": 8.68, "learning_rate": 3.391188251001335e-05, "loss": 0.1389, "step": 1016 }, { "epoch": 8.72, "learning_rate": 3.404539385847797e-05, "loss": 0.1528, "step": 1020 }, { "epoch": 8.75, "learning_rate": 3.417890520694259e-05, "loss": 0.1281, "step": 1024 }, { "epoch": 8.79, "learning_rate": 3.431241655540721e-05, "loss": 0.1358, "step": 1028 }, { "epoch": 8.82, "learning_rate": 3.4445927903871835e-05, "loss": 0.1392, "step": 1032 }, { "epoch": 8.85, "learning_rate": 3.457943925233645e-05, "loss": 0.115, "step": 1036 }, { "epoch": 8.89, "learning_rate": 3.471295060080107e-05, "loss": 0.1376, "step": 1040 }, { "epoch": 8.92, "learning_rate": 3.484646194926569e-05, "loss": 0.1347, "step": 1044 }, { "epoch": 8.96, "learning_rate": 3.4979973297730306e-05, "loss": 0.1198, "step": 1048 }, { "epoch": 8.99, "learning_rate": 3.511348464619493e-05, "loss": 0.1278, "step": 1052 }, { "epoch": 9.03, "learning_rate": 3.524699599465955e-05, "loss": 0.141, "step": 1056 }, { "epoch": 9.06, "learning_rate": 3.538050734312417e-05, "loss": 0.1191, "step": 1060 }, { "epoch": 9.09, "learning_rate": 3.551401869158878e-05, "loss": 0.1298, "step": 1064 }, { "epoch": 9.13, "learning_rate": 3.5647530040053406e-05, "loss": 0.1373, "step": 1068 }, { "epoch": 9.16, "learning_rate": 3.578104138851803e-05, "loss": 0.1241, "step": 1072 }, { "epoch": 9.2, "learning_rate": 3.5914552736982644e-05, "loss": 0.126, "step": 1076 }, { "epoch": 9.23, "learning_rate": 3.604806408544727e-05, "loss": 0.1065, "step": 1080 }, { "epoch": 9.26, "learning_rate": 3.618157543391188e-05, "loss": 0.1143, "step": 1084 }, { "epoch": 9.3, "learning_rate": 3.6315086782376506e-05, "loss": 0.1323, "step": 1088 }, { "epoch": 9.33, "learning_rate": 3.644859813084112e-05, "loss": 0.1351, "step": 1092 }, { "epoch": 9.37, "learning_rate": 3.6582109479305744e-05, "loss": 0.1231, "step": 1096 }, { "epoch": 9.4, "learning_rate": 3.671562082777037e-05, "loss": 0.123, "step": 1100 }, { "epoch": 9.44, "learning_rate": 3.6849132176234976e-05, "loss": 0.1214, "step": 1104 }, { "epoch": 9.47, "learning_rate": 3.69826435246996e-05, "loss": 0.115, "step": 1108 }, { "epoch": 9.5, "learning_rate": 3.711615487316422e-05, "loss": 0.1416, "step": 1112 }, { "epoch": 9.54, "learning_rate": 3.7249666221628844e-05, "loss": 0.1126, "step": 1116 }, { "epoch": 9.57, "learning_rate": 3.738317757009346e-05, "loss": 0.1179, "step": 1120 }, { "epoch": 9.61, "learning_rate": 3.7516688918558076e-05, "loss": 0.1122, "step": 1124 }, { "epoch": 9.64, "learning_rate": 3.76502002670227e-05, "loss": 0.1116, "step": 1128 }, { "epoch": 9.68, "learning_rate": 3.7783711615487315e-05, "loss": 0.1148, "step": 1132 }, { "epoch": 9.71, "learning_rate": 3.791722296395194e-05, "loss": 0.1102, "step": 1136 }, { "epoch": 9.74, "learning_rate": 3.805073431241656e-05, "loss": 0.1213, "step": 1140 }, { "epoch": 9.78, "learning_rate": 3.8184245660881176e-05, "loss": 0.1105, "step": 1144 }, { "epoch": 9.81, "learning_rate": 3.831775700934579e-05, "loss": 0.1012, "step": 1148 }, { "epoch": 9.85, "learning_rate": 3.8451268357810415e-05, "loss": 0.128, "step": 1152 }, { "epoch": 9.88, "learning_rate": 3.858477970627504e-05, "loss": 0.1074, "step": 1156 }, { "epoch": 9.91, "learning_rate": 3.8718291054739654e-05, "loss": 0.1104, "step": 1160 }, { "epoch": 9.95, "learning_rate": 3.8851802403204276e-05, "loss": 0.1231, "step": 1164 }, { "epoch": 9.98, "learning_rate": 3.898531375166889e-05, "loss": 0.1078, "step": 1168 }, { "epoch": 10.02, "learning_rate": 3.9118825100133515e-05, "loss": 0.1061, "step": 1172 }, { "epoch": 10.05, "learning_rate": 3.925233644859813e-05, "loss": 0.1007, "step": 1176 }, { "epoch": 10.09, "learning_rate": 3.9385847797062754e-05, "loss": 0.0987, "step": 1180 }, { "epoch": 10.12, "learning_rate": 3.9519359145527376e-05, "loss": 0.121, "step": 1184 }, { "epoch": 10.15, "learning_rate": 3.965287049399199e-05, "loss": 0.1, "step": 1188 }, { "epoch": 10.19, "learning_rate": 3.978638184245661e-05, "loss": 0.1001, "step": 1192 }, { "epoch": 10.22, "learning_rate": 3.991989319092123e-05, "loss": 0.1033, "step": 1196 }, { "epoch": 10.26, "learning_rate": 4.005340453938585e-05, "loss": 0.0859, "step": 1200 }, { "epoch": 10.29, "learning_rate": 4.018691588785047e-05, "loss": 0.1105, "step": 1204 }, { "epoch": 10.32, "learning_rate": 4.032042723631509e-05, "loss": 0.1025, "step": 1208 }, { "epoch": 10.36, "learning_rate": 4.045393858477971e-05, "loss": 0.0994, "step": 1212 }, { "epoch": 10.39, "learning_rate": 4.0587449933244324e-05, "loss": 0.1133, "step": 1216 }, { "epoch": 10.43, "learning_rate": 4.072096128170895e-05, "loss": 0.0973, "step": 1220 }, { "epoch": 10.46, "learning_rate": 4.085447263017357e-05, "loss": 0.0917, "step": 1224 }, { "epoch": 10.5, "learning_rate": 4.0987983978638186e-05, "loss": 0.0861, "step": 1228 }, { "epoch": 10.53, "learning_rate": 4.11214953271028e-05, "loss": 0.0975, "step": 1232 }, { "epoch": 10.56, "learning_rate": 4.1255006675567424e-05, "loss": 0.1049, "step": 1236 }, { "epoch": 10.6, "learning_rate": 4.138851802403205e-05, "loss": 0.1036, "step": 1240 }, { "epoch": 10.63, "learning_rate": 4.152202937249666e-05, "loss": 0.0842, "step": 1244 }, { "epoch": 10.67, "learning_rate": 4.1655540720961286e-05, "loss": 0.1029, "step": 1248 }, { "epoch": 10.7, "learning_rate": 4.17890520694259e-05, "loss": 0.0906, "step": 1252 }, { "epoch": 10.74, "learning_rate": 4.192256341789052e-05, "loss": 0.0986, "step": 1256 }, { "epoch": 10.77, "learning_rate": 4.205607476635514e-05, "loss": 0.0958, "step": 1260 }, { "epoch": 10.8, "learning_rate": 4.218958611481976e-05, "loss": 0.0898, "step": 1264 }, { "epoch": 10.84, "learning_rate": 4.2323097463284386e-05, "loss": 0.0979, "step": 1268 }, { "epoch": 10.87, "learning_rate": 4.2456608811749e-05, "loss": 0.0911, "step": 1272 }, { "epoch": 10.91, "learning_rate": 4.259012016021362e-05, "loss": 0.0909, "step": 1276 }, { "epoch": 10.94, "learning_rate": 4.272363150867824e-05, "loss": 0.0909, "step": 1280 }, { "epoch": 10.97, "learning_rate": 4.2857142857142856e-05, "loss": 0.0849, "step": 1284 }, { "epoch": 11.01, "learning_rate": 4.299065420560748e-05, "loss": 0.095, "step": 1288 }, { "epoch": 11.04, "learning_rate": 4.31241655540721e-05, "loss": 0.0726, "step": 1292 }, { "epoch": 11.08, "learning_rate": 4.325767690253672e-05, "loss": 0.0996, "step": 1296 }, { "epoch": 11.11, "learning_rate": 4.3391188251001334e-05, "loss": 0.0783, "step": 1300 }, { "epoch": 11.15, "learning_rate": 4.3524699599465956e-05, "loss": 0.082, "step": 1304 }, { "epoch": 11.18, "learning_rate": 4.365821094793058e-05, "loss": 0.0966, "step": 1308 }, { "epoch": 11.21, "learning_rate": 4.3791722296395195e-05, "loss": 0.0815, "step": 1312 }, { "epoch": 11.25, "learning_rate": 4.392523364485982e-05, "loss": 0.0783, "step": 1316 }, { "epoch": 11.28, "learning_rate": 4.4058744993324434e-05, "loss": 0.0772, "step": 1320 }, { "epoch": 11.32, "learning_rate": 4.4192256341789056e-05, "loss": 0.0763, "step": 1324 }, { "epoch": 11.35, "learning_rate": 4.432576769025367e-05, "loss": 0.0859, "step": 1328 }, { "epoch": 11.38, "learning_rate": 4.4459279038718295e-05, "loss": 0.0764, "step": 1332 }, { "epoch": 11.42, "learning_rate": 4.459279038718292e-05, "loss": 0.0784, "step": 1336 }, { "epoch": 11.45, "learning_rate": 4.472630173564753e-05, "loss": 0.0779, "step": 1340 }, { "epoch": 11.49, "learning_rate": 4.485981308411215e-05, "loss": 0.0706, "step": 1344 }, { "epoch": 11.52, "learning_rate": 4.499332443257677e-05, "loss": 0.0837, "step": 1348 }, { "epoch": 11.56, "learning_rate": 4.5126835781041395e-05, "loss": 0.0881, "step": 1352 }, { "epoch": 11.59, "learning_rate": 4.526034712950601e-05, "loss": 0.0766, "step": 1356 }, { "epoch": 11.62, "learning_rate": 4.539385847797063e-05, "loss": 0.0777, "step": 1360 }, { "epoch": 11.66, "learning_rate": 4.552736982643525e-05, "loss": 0.0893, "step": 1364 }, { "epoch": 11.69, "learning_rate": 4.5660881174899866e-05, "loss": 0.0801, "step": 1368 }, { "epoch": 11.73, "learning_rate": 4.579439252336449e-05, "loss": 0.0758, "step": 1372 }, { "epoch": 11.76, "learning_rate": 4.592790387182911e-05, "loss": 0.0781, "step": 1376 }, { "epoch": 11.79, "learning_rate": 4.606141522029373e-05, "loss": 0.0725, "step": 1380 }, { "epoch": 11.83, "learning_rate": 4.619492656875834e-05, "loss": 0.0707, "step": 1384 }, { "epoch": 11.86, "learning_rate": 4.6328437917222966e-05, "loss": 0.073, "step": 1388 }, { "epoch": 11.9, "learning_rate": 4.646194926568759e-05, "loss": 0.0707, "step": 1392 }, { "epoch": 11.93, "learning_rate": 4.6595460614152204e-05, "loss": 0.0692, "step": 1396 }, { "epoch": 11.97, "learning_rate": 4.672897196261683e-05, "loss": 0.0793, "step": 1400 }, { "epoch": 12.0, "learning_rate": 4.686248331108144e-05, "loss": 0.076, "step": 1404 }, { "epoch": 12.03, "learning_rate": 4.699599465954606e-05, "loss": 0.073, "step": 1408 }, { "epoch": 12.07, "learning_rate": 4.712950600801068e-05, "loss": 0.0662, "step": 1412 }, { "epoch": 12.1, "learning_rate": 4.7263017356475304e-05, "loss": 0.0678, "step": 1416 }, { "epoch": 12.14, "learning_rate": 4.739652870493993e-05, "loss": 0.0677, "step": 1420 }, { "epoch": 12.17, "learning_rate": 4.753004005340454e-05, "loss": 0.0697, "step": 1424 }, { "epoch": 12.21, "learning_rate": 4.766355140186916e-05, "loss": 0.0655, "step": 1428 }, { "epoch": 12.24, "learning_rate": 4.779706275033378e-05, "loss": 0.0676, "step": 1432 }, { "epoch": 12.27, "learning_rate": 4.79305740987984e-05, "loss": 0.0618, "step": 1436 }, { "epoch": 12.31, "learning_rate": 4.806408544726302e-05, "loss": 0.0716, "step": 1440 }, { "epoch": 12.34, "learning_rate": 4.819759679572764e-05, "loss": 0.0745, "step": 1444 }, { "epoch": 12.38, "learning_rate": 4.833110814419226e-05, "loss": 0.0706, "step": 1448 }, { "epoch": 12.41, "learning_rate": 4.8464619492656875e-05, "loss": 0.0709, "step": 1452 }, { "epoch": 12.44, "learning_rate": 4.85981308411215e-05, "loss": 0.064, "step": 1456 }, { "epoch": 12.48, "learning_rate": 4.873164218958612e-05, "loss": 0.0654, "step": 1460 }, { "epoch": 12.51, "learning_rate": 4.8865153538050736e-05, "loss": 0.0687, "step": 1464 }, { "epoch": 12.55, "learning_rate": 4.899866488651535e-05, "loss": 0.0764, "step": 1468 }, { "epoch": 12.58, "learning_rate": 4.9132176234979975e-05, "loss": 0.0666, "step": 1472 }, { "epoch": 12.62, "learning_rate": 4.92656875834446e-05, "loss": 0.069, "step": 1476 }, { "epoch": 12.65, "learning_rate": 4.9399198931909214e-05, "loss": 0.0684, "step": 1480 }, { "epoch": 12.68, "learning_rate": 4.9532710280373836e-05, "loss": 0.0606, "step": 1484 }, { "epoch": 12.72, "learning_rate": 4.966622162883845e-05, "loss": 0.0658, "step": 1488 }, { "epoch": 12.75, "learning_rate": 4.979973297730307e-05, "loss": 0.0677, "step": 1492 }, { "epoch": 12.79, "learning_rate": 4.993324432576769e-05, "loss": 0.0651, "step": 1496 }, { "epoch": 12.82, "learning_rate": 5.0066755674232314e-05, "loss": 0.0679, "step": 1500 }, { "epoch": 12.85, "learning_rate": 5.0200267022696936e-05, "loss": 0.0582, "step": 1504 }, { "epoch": 12.89, "learning_rate": 5.033377837116155e-05, "loss": 0.047, "step": 1508 }, { "epoch": 12.92, "learning_rate": 5.046728971962617e-05, "loss": 0.0541, "step": 1512 }, { "epoch": 12.96, "learning_rate": 5.0600801068090784e-05, "loss": 0.0634, "step": 1516 }, { "epoch": 12.99, "learning_rate": 5.073431241655541e-05, "loss": 0.061, "step": 1520 }, { "epoch": 13.03, "learning_rate": 5.086782376502003e-05, "loss": 0.0724, "step": 1524 }, { "epoch": 13.06, "learning_rate": 5.100133511348465e-05, "loss": 0.058, "step": 1528 }, { "epoch": 13.09, "learning_rate": 5.113484646194927e-05, "loss": 0.0561, "step": 1532 }, { "epoch": 13.13, "learning_rate": 5.126835781041389e-05, "loss": 0.058, "step": 1536 }, { "epoch": 13.16, "learning_rate": 5.14018691588785e-05, "loss": 0.0563, "step": 1540 }, { "epoch": 13.2, "learning_rate": 5.153538050734312e-05, "loss": 0.06, "step": 1544 }, { "epoch": 13.23, "learning_rate": 5.1668891855807746e-05, "loss": 0.0575, "step": 1548 }, { "epoch": 13.26, "learning_rate": 5.180240320427236e-05, "loss": 0.0602, "step": 1552 }, { "epoch": 13.3, "learning_rate": 5.1935914552736984e-05, "loss": 0.0645, "step": 1556 }, { "epoch": 13.33, "learning_rate": 5.206942590120161e-05, "loss": 0.061, "step": 1560 }, { "epoch": 13.37, "learning_rate": 5.220293724966623e-05, "loss": 0.0522, "step": 1564 }, { "epoch": 13.4, "learning_rate": 5.233644859813084e-05, "loss": 0.0489, "step": 1568 }, { "epoch": 13.44, "learning_rate": 5.246995994659546e-05, "loss": 0.0543, "step": 1572 }, { "epoch": 13.47, "learning_rate": 5.260347129506008e-05, "loss": 0.0581, "step": 1576 }, { "epoch": 13.5, "learning_rate": 5.27369826435247e-05, "loss": 0.0564, "step": 1580 }, { "epoch": 13.54, "learning_rate": 5.287049399198932e-05, "loss": 0.0569, "step": 1584 }, { "epoch": 13.57, "learning_rate": 5.3004005340453946e-05, "loss": 0.0568, "step": 1588 }, { "epoch": 13.61, "learning_rate": 5.313751668891856e-05, "loss": 0.0524, "step": 1592 }, { "epoch": 13.64, "learning_rate": 5.327102803738318e-05, "loss": 0.0515, "step": 1596 }, { "epoch": 13.68, "learning_rate": 5.3404539385847794e-05, "loss": 0.0517, "step": 1600 }, { "epoch": 13.71, "learning_rate": 5.3538050734312416e-05, "loss": 0.062, "step": 1604 }, { "epoch": 13.74, "learning_rate": 5.367156208277704e-05, "loss": 0.0501, "step": 1608 }, { "epoch": 13.78, "learning_rate": 5.380507343124166e-05, "loss": 0.0555, "step": 1612 }, { "epoch": 13.81, "learning_rate": 5.393858477970628e-05, "loss": 0.0587, "step": 1616 }, { "epoch": 13.85, "learning_rate": 5.40720961281709e-05, "loss": 0.0568, "step": 1620 }, { "epoch": 13.88, "learning_rate": 5.420560747663551e-05, "loss": 0.0502, "step": 1624 }, { "epoch": 13.91, "learning_rate": 5.433911882510013e-05, "loss": 0.0512, "step": 1628 }, { "epoch": 13.95, "learning_rate": 5.4472630173564755e-05, "loss": 0.0551, "step": 1632 }, { "epoch": 13.98, "learning_rate": 5.460614152202938e-05, "loss": 0.0509, "step": 1636 }, { "epoch": 14.02, "learning_rate": 5.4739652870493994e-05, "loss": 0.0587, "step": 1640 }, { "epoch": 14.05, "learning_rate": 5.4873164218958616e-05, "loss": 0.0472, "step": 1644 }, { "epoch": 14.09, "learning_rate": 5.500667556742324e-05, "loss": 0.0514, "step": 1648 }, { "epoch": 14.12, "learning_rate": 5.514018691588785e-05, "loss": 0.0434, "step": 1652 }, { "epoch": 14.15, "learning_rate": 5.527369826435247e-05, "loss": 0.0548, "step": 1656 }, { "epoch": 14.19, "learning_rate": 5.540720961281709e-05, "loss": 0.0514, "step": 1660 }, { "epoch": 14.22, "learning_rate": 5.554072096128171e-05, "loss": 0.0511, "step": 1664 }, { "epoch": 14.26, "learning_rate": 5.567423230974633e-05, "loss": 0.0581, "step": 1668 }, { "epoch": 14.29, "learning_rate": 5.5807743658210955e-05, "loss": 0.0444, "step": 1672 }, { "epoch": 14.32, "learning_rate": 5.594125500667558e-05, "loss": 0.0597, "step": 1676 }, { "epoch": 14.36, "learning_rate": 5.607476635514019e-05, "loss": 0.0542, "step": 1680 }, { "epoch": 14.39, "learning_rate": 5.62082777036048e-05, "loss": 0.0452, "step": 1684 }, { "epoch": 14.43, "learning_rate": 5.6341789052069426e-05, "loss": 0.0511, "step": 1688 }, { "epoch": 14.46, "learning_rate": 5.647530040053405e-05, "loss": 0.0468, "step": 1692 }, { "epoch": 14.5, "learning_rate": 5.660881174899867e-05, "loss": 0.0488, "step": 1696 }, { "epoch": 14.53, "learning_rate": 5.674232309746329e-05, "loss": 0.0429, "step": 1700 }, { "epoch": 14.56, "learning_rate": 5.687583444592791e-05, "loss": 0.0441, "step": 1704 }, { "epoch": 14.6, "learning_rate": 5.700934579439252e-05, "loss": 0.0515, "step": 1708 }, { "epoch": 14.63, "learning_rate": 5.714285714285714e-05, "loss": 0.0462, "step": 1712 }, { "epoch": 14.67, "learning_rate": 5.7276368491321764e-05, "loss": 0.0416, "step": 1716 }, { "epoch": 14.7, "learning_rate": 5.740987983978639e-05, "loss": 0.0509, "step": 1720 }, { "epoch": 14.74, "learning_rate": 5.7543391188251e-05, "loss": 0.0376, "step": 1724 }, { "epoch": 14.77, "learning_rate": 5.7676902536715626e-05, "loss": 0.0408, "step": 1728 }, { "epoch": 14.8, "learning_rate": 5.781041388518025e-05, "loss": 0.047, "step": 1732 }, { "epoch": 14.84, "learning_rate": 5.794392523364486e-05, "loss": 0.05, "step": 1736 }, { "epoch": 14.87, "learning_rate": 5.807743658210948e-05, "loss": 0.0406, "step": 1740 }, { "epoch": 14.91, "learning_rate": 5.82109479305741e-05, "loss": 0.0408, "step": 1744 }, { "epoch": 14.94, "learning_rate": 5.834445927903872e-05, "loss": 0.0472, "step": 1748 }, { "epoch": 14.97, "learning_rate": 5.847797062750334e-05, "loss": 0.0468, "step": 1752 }, { "epoch": 15.01, "learning_rate": 5.8611481975967965e-05, "loss": 0.0552, "step": 1756 }, { "epoch": 15.04, "learning_rate": 5.874499332443259e-05, "loss": 0.0395, "step": 1760 }, { "epoch": 15.08, "learning_rate": 5.8878504672897196e-05, "loss": 0.0378, "step": 1764 }, { "epoch": 15.11, "learning_rate": 5.901201602136181e-05, "loss": 0.0389, "step": 1768 }, { "epoch": 15.15, "learning_rate": 5.9145527369826435e-05, "loss": 0.0375, "step": 1772 }, { "epoch": 15.18, "learning_rate": 5.927903871829106e-05, "loss": 0.0478, "step": 1776 }, { "epoch": 15.21, "learning_rate": 5.941255006675568e-05, "loss": 0.0348, "step": 1780 }, { "epoch": 15.25, "learning_rate": 5.95460614152203e-05, "loss": 0.04, "step": 1784 }, { "epoch": 15.28, "learning_rate": 5.967957276368492e-05, "loss": 0.0459, "step": 1788 }, { "epoch": 15.32, "learning_rate": 5.981308411214953e-05, "loss": 0.0428, "step": 1792 }, { "epoch": 15.35, "learning_rate": 5.994659546061415e-05, "loss": 0.0408, "step": 1796 }, { "epoch": 15.38, "learning_rate": 6.0080106809078774e-05, "loss": 0.0428, "step": 1800 }, { "epoch": 15.42, "learning_rate": 6.0213618157543397e-05, "loss": 0.0366, "step": 1804 }, { "epoch": 15.45, "learning_rate": 6.034712950600801e-05, "loss": 0.0474, "step": 1808 }, { "epoch": 15.49, "learning_rate": 6.0480640854472635e-05, "loss": 0.0432, "step": 1812 }, { "epoch": 15.52, "learning_rate": 6.061415220293726e-05, "loss": 0.0309, "step": 1816 }, { "epoch": 15.56, "learning_rate": 6.074766355140187e-05, "loss": 0.0418, "step": 1820 }, { "epoch": 15.59, "learning_rate": 6.088117489986649e-05, "loss": 0.0387, "step": 1824 }, { "epoch": 15.62, "learning_rate": 6.101468624833111e-05, "loss": 0.0433, "step": 1828 }, { "epoch": 15.66, "learning_rate": 6.114819759679573e-05, "loss": 0.0363, "step": 1832 }, { "epoch": 15.69, "learning_rate": 6.128170894526035e-05, "loss": 0.0438, "step": 1836 }, { "epoch": 15.73, "learning_rate": 6.141522029372497e-05, "loss": 0.0396, "step": 1840 }, { "epoch": 15.76, "learning_rate": 6.15487316421896e-05, "loss": 0.0319, "step": 1844 }, { "epoch": 15.79, "learning_rate": 6.16822429906542e-05, "loss": 0.0409, "step": 1848 }, { "epoch": 15.83, "learning_rate": 6.181575433911883e-05, "loss": 0.0412, "step": 1852 }, { "epoch": 15.86, "learning_rate": 6.194926568758345e-05, "loss": 0.0395, "step": 1856 }, { "epoch": 15.9, "learning_rate": 6.208277703604807e-05, "loss": 0.0372, "step": 1860 }, { "epoch": 15.93, "learning_rate": 6.221628838451268e-05, "loss": 0.0427, "step": 1864 }, { "epoch": 15.97, "learning_rate": 6.23497997329773e-05, "loss": 0.0407, "step": 1868 }, { "epoch": 16.0, "learning_rate": 6.248331108144193e-05, "loss": 0.0417, "step": 1872 }, { "epoch": 16.03, "learning_rate": 6.261682242990654e-05, "loss": 0.0361, "step": 1876 }, { "epoch": 16.07, "learning_rate": 6.275033377837116e-05, "loss": 0.0367, "step": 1880 }, { "epoch": 16.1, "learning_rate": 6.288384512683578e-05, "loss": 0.0411, "step": 1884 }, { "epoch": 16.14, "learning_rate": 6.30173564753004e-05, "loss": 0.0433, "step": 1888 }, { "epoch": 16.17, "learning_rate": 6.315086782376503e-05, "loss": 0.0355, "step": 1892 }, { "epoch": 16.21, "learning_rate": 6.328437917222965e-05, "loss": 0.0365, "step": 1896 }, { "epoch": 16.24, "learning_rate": 6.341789052069427e-05, "loss": 0.0378, "step": 1900 }, { "epoch": 16.27, "learning_rate": 6.355140186915888e-05, "loss": 0.0369, "step": 1904 }, { "epoch": 16.31, "learning_rate": 6.368491321762349e-05, "loss": 0.0341, "step": 1908 }, { "epoch": 16.34, "learning_rate": 6.381842456608812e-05, "loss": 0.0329, "step": 1912 }, { "epoch": 16.38, "learning_rate": 6.395193591455274e-05, "loss": 0.0377, "step": 1916 }, { "epoch": 16.41, "learning_rate": 6.408544726301736e-05, "loss": 0.0372, "step": 1920 }, { "epoch": 16.44, "learning_rate": 6.421895861148198e-05, "loss": 0.0326, "step": 1924 }, { "epoch": 16.48, "learning_rate": 6.435246995994659e-05, "loss": 0.0353, "step": 1928 }, { "epoch": 16.51, "learning_rate": 6.448598130841122e-05, "loss": 0.0338, "step": 1932 }, { "epoch": 16.55, "learning_rate": 6.461949265687584e-05, "loss": 0.0377, "step": 1936 }, { "epoch": 16.58, "learning_rate": 6.475300400534046e-05, "loss": 0.039, "step": 1940 }, { "epoch": 16.62, "learning_rate": 6.488651535380508e-05, "loss": 0.0354, "step": 1944 }, { "epoch": 16.65, "learning_rate": 6.502002670226969e-05, "loss": 0.0377, "step": 1948 }, { "epoch": 16.68, "learning_rate": 6.515353805073432e-05, "loss": 0.0379, "step": 1952 }, { "epoch": 16.72, "learning_rate": 6.528704939919892e-05, "loss": 0.034, "step": 1956 }, { "epoch": 16.75, "learning_rate": 6.542056074766355e-05, "loss": 0.0309, "step": 1960 }, { "epoch": 16.79, "learning_rate": 6.555407209612817e-05, "loss": 0.0367, "step": 1964 }, { "epoch": 16.82, "learning_rate": 6.568758344459279e-05, "loss": 0.0303, "step": 1968 }, { "epoch": 16.85, "learning_rate": 6.582109479305742e-05, "loss": 0.0269, "step": 1972 }, { "epoch": 16.89, "learning_rate": 6.595460614152204e-05, "loss": 0.0352, "step": 1976 }, { "epoch": 16.92, "learning_rate": 6.608811748998666e-05, "loss": 0.0372, "step": 1980 }, { "epoch": 16.96, "learning_rate": 6.622162883845127e-05, "loss": 0.0355, "step": 1984 }, { "epoch": 16.99, "learning_rate": 6.635514018691589e-05, "loss": 0.0359, "step": 1988 }, { "epoch": 17.03, "learning_rate": 6.64886515353805e-05, "loss": 0.0377, "step": 1992 }, { "epoch": 17.06, "learning_rate": 6.662216288384512e-05, "loss": 0.0301, "step": 1996 }, { "epoch": 17.09, "learning_rate": 6.675567423230975e-05, "loss": 0.0362, "step": 2000 }, { "epoch": 17.09, "eval_exact_match": 0.446985446985447, "eval_loss": 0.5254557728767395, "eval_runtime": 86.8762, "eval_samples_per_second": 11.073, "step": 2000 }, { "epoch": 17.13, "learning_rate": 6.688918558077437e-05, "loss": 0.0301, "step": 2004 }, { "epoch": 17.16, "learning_rate": 6.702269692923899e-05, "loss": 0.0302, "step": 2008 }, { "epoch": 17.2, "learning_rate": 6.71562082777036e-05, "loss": 0.0299, "step": 2012 }, { "epoch": 17.23, "learning_rate": 6.728971962616822e-05, "loss": 0.0341, "step": 2016 }, { "epoch": 17.26, "learning_rate": 6.742323097463285e-05, "loss": 0.0311, "step": 2020 }, { "epoch": 17.3, "learning_rate": 6.755674232309747e-05, "loss": 0.0277, "step": 2024 }, { "epoch": 17.33, "learning_rate": 6.769025367156209e-05, "loss": 0.0366, "step": 2028 }, { "epoch": 17.37, "learning_rate": 6.78237650200267e-05, "loss": 0.0338, "step": 2032 }, { "epoch": 17.4, "learning_rate": 6.795727636849132e-05, "loss": 0.0433, "step": 2036 }, { "epoch": 17.44, "learning_rate": 6.809078771695593e-05, "loss": 0.0266, "step": 2040 }, { "epoch": 17.47, "learning_rate": 6.822429906542056e-05, "loss": 0.0312, "step": 2044 }, { "epoch": 17.5, "learning_rate": 6.835781041388518e-05, "loss": 0.0337, "step": 2048 }, { "epoch": 17.54, "learning_rate": 6.84913217623498e-05, "loss": 0.036, "step": 2052 }, { "epoch": 17.57, "learning_rate": 6.862483311081442e-05, "loss": 0.0397, "step": 2056 }, { "epoch": 17.61, "learning_rate": 6.875834445927905e-05, "loss": 0.0328, "step": 2060 }, { "epoch": 17.64, "learning_rate": 6.889185580774367e-05, "loss": 0.0318, "step": 2064 }, { "epoch": 17.68, "learning_rate": 6.902536715620828e-05, "loss": 0.0326, "step": 2068 }, { "epoch": 17.71, "learning_rate": 6.91588785046729e-05, "loss": 0.026, "step": 2072 }, { "epoch": 17.74, "learning_rate": 6.929238985313752e-05, "loss": 0.0295, "step": 2076 }, { "epoch": 17.78, "learning_rate": 6.942590120160213e-05, "loss": 0.0328, "step": 2080 }, { "epoch": 17.81, "learning_rate": 6.955941255006676e-05, "loss": 0.0286, "step": 2084 }, { "epoch": 17.85, "learning_rate": 6.969292389853138e-05, "loss": 0.0308, "step": 2088 }, { "epoch": 17.88, "learning_rate": 6.9826435246996e-05, "loss": 0.0301, "step": 2092 }, { "epoch": 17.91, "learning_rate": 6.995994659546061e-05, "loss": 0.0271, "step": 2096 }, { "epoch": 17.95, "learning_rate": 7.009345794392523e-05, "loss": 0.0303, "step": 2100 }, { "epoch": 17.98, "learning_rate": 7.022696929238986e-05, "loss": 0.0236, "step": 2104 }, { "epoch": 18.02, "learning_rate": 7.036048064085448e-05, "loss": 0.0297, "step": 2108 }, { "epoch": 18.05, "learning_rate": 7.04939919893191e-05, "loss": 0.0216, "step": 2112 }, { "epoch": 18.09, "learning_rate": 7.062750333778372e-05, "loss": 0.026, "step": 2116 }, { "epoch": 18.12, "learning_rate": 7.076101468624833e-05, "loss": 0.0299, "step": 2120 }, { "epoch": 18.15, "learning_rate": 7.089452603471294e-05, "loss": 0.0303, "step": 2124 }, { "epoch": 18.19, "learning_rate": 7.102803738317757e-05, "loss": 0.0256, "step": 2128 }, { "epoch": 18.22, "learning_rate": 7.116154873164219e-05, "loss": 0.0304, "step": 2132 }, { "epoch": 18.26, "learning_rate": 7.129506008010681e-05, "loss": 0.0241, "step": 2136 }, { "epoch": 18.29, "learning_rate": 7.142857142857143e-05, "loss": 0.0294, "step": 2140 }, { "epoch": 18.32, "learning_rate": 7.156208277703606e-05, "loss": 0.0279, "step": 2144 }, { "epoch": 18.36, "learning_rate": 7.169559412550068e-05, "loss": 0.0332, "step": 2148 }, { "epoch": 18.39, "learning_rate": 7.182910547396529e-05, "loss": 0.0315, "step": 2152 }, { "epoch": 18.43, "learning_rate": 7.196261682242991e-05, "loss": 0.0215, "step": 2156 }, { "epoch": 18.46, "learning_rate": 7.209612817089453e-05, "loss": 0.032, "step": 2160 }, { "epoch": 18.5, "learning_rate": 7.222963951935914e-05, "loss": 0.0257, "step": 2164 }, { "epoch": 18.53, "learning_rate": 7.236315086782377e-05, "loss": 0.0287, "step": 2168 }, { "epoch": 18.56, "learning_rate": 7.249666221628839e-05, "loss": 0.0284, "step": 2172 }, { "epoch": 18.6, "learning_rate": 7.263017356475301e-05, "loss": 0.0316, "step": 2176 }, { "epoch": 18.63, "learning_rate": 7.276368491321762e-05, "loss": 0.0268, "step": 2180 }, { "epoch": 18.67, "learning_rate": 7.289719626168224e-05, "loss": 0.0291, "step": 2184 }, { "epoch": 18.7, "learning_rate": 7.303070761014687e-05, "loss": 0.0284, "step": 2188 }, { "epoch": 18.74, "learning_rate": 7.316421895861149e-05, "loss": 0.0239, "step": 2192 }, { "epoch": 18.77, "learning_rate": 7.329773030707611e-05, "loss": 0.0247, "step": 2196 }, { "epoch": 18.8, "learning_rate": 7.343124165554073e-05, "loss": 0.0192, "step": 2200 }, { "epoch": 18.84, "learning_rate": 7.356475300400534e-05, "loss": 0.0236, "step": 2204 }, { "epoch": 18.87, "learning_rate": 7.369826435246995e-05, "loss": 0.0235, "step": 2208 }, { "epoch": 18.91, "learning_rate": 7.383177570093458e-05, "loss": 0.0313, "step": 2212 }, { "epoch": 18.94, "learning_rate": 7.39652870493992e-05, "loss": 0.0292, "step": 2216 }, { "epoch": 18.97, "learning_rate": 7.409879839786382e-05, "loss": 0.0295, "step": 2220 }, { "epoch": 19.01, "learning_rate": 7.423230974632844e-05, "loss": 0.0251, "step": 2224 }, { "epoch": 19.04, "learning_rate": 7.436582109479307e-05, "loss": 0.0245, "step": 2228 }, { "epoch": 19.08, "learning_rate": 7.449933244325769e-05, "loss": 0.0205, "step": 2232 }, { "epoch": 19.11, "learning_rate": 7.46328437917223e-05, "loss": 0.0219, "step": 2236 }, { "epoch": 19.15, "learning_rate": 7.476635514018692e-05, "loss": 0.0262, "step": 2240 }, { "epoch": 19.18, "learning_rate": 7.489986648865154e-05, "loss": 0.0237, "step": 2244 }, { "epoch": 19.21, "learning_rate": 7.503337783711615e-05, "loss": 0.024, "step": 2248 }, { "epoch": 19.25, "learning_rate": 7.516688918558078e-05, "loss": 0.0244, "step": 2252 }, { "epoch": 19.28, "learning_rate": 7.53004005340454e-05, "loss": 0.0257, "step": 2256 }, { "epoch": 19.32, "learning_rate": 7.543391188251002e-05, "loss": 0.0238, "step": 2260 }, { "epoch": 19.35, "learning_rate": 7.556742323097463e-05, "loss": 0.0306, "step": 2264 }, { "epoch": 19.38, "learning_rate": 7.570093457943925e-05, "loss": 0.0274, "step": 2268 }, { "epoch": 19.42, "learning_rate": 7.583444592790388e-05, "loss": 0.0236, "step": 2272 }, { "epoch": 19.45, "learning_rate": 7.59679572763685e-05, "loss": 0.0282, "step": 2276 }, { "epoch": 19.49, "learning_rate": 7.610146862483312e-05, "loss": 0.0274, "step": 2280 }, { "epoch": 19.52, "learning_rate": 7.623497997329774e-05, "loss": 0.0228, "step": 2284 }, { "epoch": 19.56, "learning_rate": 7.636849132176235e-05, "loss": 0.0242, "step": 2288 }, { "epoch": 19.59, "learning_rate": 7.650200267022698e-05, "loss": 0.0209, "step": 2292 }, { "epoch": 19.62, "learning_rate": 7.663551401869158e-05, "loss": 0.0224, "step": 2296 }, { "epoch": 19.66, "learning_rate": 7.676902536715621e-05, "loss": 0.0221, "step": 2300 }, { "epoch": 19.69, "learning_rate": 7.690253671562083e-05, "loss": 0.0209, "step": 2304 }, { "epoch": 19.73, "learning_rate": 7.703604806408545e-05, "loss": 0.021, "step": 2308 }, { "epoch": 19.76, "learning_rate": 7.716955941255008e-05, "loss": 0.023, "step": 2312 }, { "epoch": 19.79, "learning_rate": 7.73030707610147e-05, "loss": 0.0221, "step": 2316 }, { "epoch": 19.83, "learning_rate": 7.743658210947931e-05, "loss": 0.0222, "step": 2320 }, { "epoch": 19.86, "learning_rate": 7.757009345794393e-05, "loss": 0.0233, "step": 2324 }, { "epoch": 19.9, "learning_rate": 7.770360480640855e-05, "loss": 0.0213, "step": 2328 }, { "epoch": 19.93, "learning_rate": 7.783711615487318e-05, "loss": 0.0216, "step": 2332 }, { "epoch": 19.97, "learning_rate": 7.797062750333778e-05, "loss": 0.0225, "step": 2336 }, { "epoch": 20.0, "learning_rate": 7.810413885180241e-05, "loss": 0.0263, "step": 2340 }, { "epoch": 20.03, "learning_rate": 7.823765020026703e-05, "loss": 0.019, "step": 2344 }, { "epoch": 20.07, "learning_rate": 7.837116154873164e-05, "loss": 0.0232, "step": 2348 }, { "epoch": 20.1, "learning_rate": 7.850467289719626e-05, "loss": 0.0274, "step": 2352 }, { "epoch": 20.14, "learning_rate": 7.863818424566088e-05, "loss": 0.0227, "step": 2356 }, { "epoch": 20.17, "learning_rate": 7.877169559412551e-05, "loss": 0.0229, "step": 2360 }, { "epoch": 20.21, "learning_rate": 7.890520694259013e-05, "loss": 0.0222, "step": 2364 }, { "epoch": 20.24, "learning_rate": 7.903871829105475e-05, "loss": 0.0241, "step": 2368 }, { "epoch": 20.27, "learning_rate": 7.917222963951936e-05, "loss": 0.0236, "step": 2372 }, { "epoch": 20.31, "learning_rate": 7.930574098798398e-05, "loss": 0.0204, "step": 2376 }, { "epoch": 20.34, "learning_rate": 7.94392523364486e-05, "loss": 0.0221, "step": 2380 }, { "epoch": 20.38, "learning_rate": 7.957276368491322e-05, "loss": 0.0165, "step": 2384 }, { "epoch": 20.41, "learning_rate": 7.970627503337784e-05, "loss": 0.0171, "step": 2388 }, { "epoch": 20.44, "learning_rate": 7.983978638184246e-05, "loss": 0.0206, "step": 2392 }, { "epoch": 20.48, "learning_rate": 7.997329773030708e-05, "loss": 0.0207, "step": 2396 }, { "epoch": 20.51, "learning_rate": 8.01068090787717e-05, "loss": 0.0176, "step": 2400 }, { "epoch": 20.55, "learning_rate": 8.024032042723632e-05, "loss": 0.0198, "step": 2404 }, { "epoch": 20.58, "learning_rate": 8.037383177570094e-05, "loss": 0.0249, "step": 2408 }, { "epoch": 20.62, "learning_rate": 8.050734312416556e-05, "loss": 0.02, "step": 2412 }, { "epoch": 20.65, "learning_rate": 8.064085447263018e-05, "loss": 0.0216, "step": 2416 }, { "epoch": 20.68, "learning_rate": 8.07743658210948e-05, "loss": 0.0259, "step": 2420 }, { "epoch": 20.72, "learning_rate": 8.090787716955942e-05, "loss": 0.0226, "step": 2424 }, { "epoch": 20.75, "learning_rate": 8.104138851802403e-05, "loss": 0.025, "step": 2428 }, { "epoch": 20.79, "learning_rate": 8.117489986648865e-05, "loss": 0.0201, "step": 2432 }, { "epoch": 20.82, "learning_rate": 8.130841121495327e-05, "loss": 0.0225, "step": 2436 }, { "epoch": 20.85, "learning_rate": 8.14419225634179e-05, "loss": 0.0217, "step": 2440 }, { "epoch": 20.89, "learning_rate": 8.157543391188252e-05, "loss": 0.0217, "step": 2444 }, { "epoch": 20.92, "learning_rate": 8.170894526034714e-05, "loss": 0.0199, "step": 2448 }, { "epoch": 20.96, "learning_rate": 8.184245660881176e-05, "loss": 0.0194, "step": 2452 }, { "epoch": 20.99, "learning_rate": 8.197596795727637e-05, "loss": 0.0223, "step": 2456 }, { "epoch": 21.03, "learning_rate": 8.2109479305741e-05, "loss": 0.0181, "step": 2460 }, { "epoch": 21.06, "learning_rate": 8.22429906542056e-05, "loss": 0.0187, "step": 2464 }, { "epoch": 21.09, "learning_rate": 8.237650200267023e-05, "loss": 0.0177, "step": 2468 }, { "epoch": 21.13, "learning_rate": 8.251001335113485e-05, "loss": 0.0204, "step": 2472 }, { "epoch": 21.16, "learning_rate": 8.264352469959947e-05, "loss": 0.0186, "step": 2476 }, { "epoch": 21.2, "learning_rate": 8.27770360480641e-05, "loss": 0.02, "step": 2480 }, { "epoch": 21.23, "learning_rate": 8.29105473965287e-05, "loss": 0.0203, "step": 2484 }, { "epoch": 21.26, "learning_rate": 8.304405874499333e-05, "loss": 0.0219, "step": 2488 }, { "epoch": 21.3, "learning_rate": 8.317757009345795e-05, "loss": 0.0197, "step": 2492 }, { "epoch": 21.33, "learning_rate": 8.331108144192257e-05, "loss": 0.0167, "step": 2496 }, { "epoch": 21.37, "learning_rate": 8.34445927903872e-05, "loss": 0.0163, "step": 2500 }, { "epoch": 21.4, "learning_rate": 8.35781041388518e-05, "loss": 0.0157, "step": 2504 }, { "epoch": 21.44, "learning_rate": 8.371161548731643e-05, "loss": 0.0235, "step": 2508 }, { "epoch": 21.47, "learning_rate": 8.384512683578104e-05, "loss": 0.0212, "step": 2512 }, { "epoch": 21.5, "learning_rate": 8.397863818424566e-05, "loss": 0.0187, "step": 2516 }, { "epoch": 21.54, "learning_rate": 8.411214953271028e-05, "loss": 0.015, "step": 2520 }, { "epoch": 21.57, "learning_rate": 8.42456608811749e-05, "loss": 0.0163, "step": 2524 }, { "epoch": 21.61, "learning_rate": 8.437917222963953e-05, "loss": 0.0167, "step": 2528 }, { "epoch": 21.64, "learning_rate": 8.451268357810415e-05, "loss": 0.019, "step": 2532 }, { "epoch": 21.68, "learning_rate": 8.464619492656877e-05, "loss": 0.017, "step": 2536 }, { "epoch": 21.71, "learning_rate": 8.477970627503338e-05, "loss": 0.0211, "step": 2540 }, { "epoch": 21.74, "learning_rate": 8.4913217623498e-05, "loss": 0.0154, "step": 2544 }, { "epoch": 21.78, "learning_rate": 8.504672897196261e-05, "loss": 0.0236, "step": 2548 }, { "epoch": 21.81, "learning_rate": 8.518024032042724e-05, "loss": 0.0182, "step": 2552 }, { "epoch": 21.85, "learning_rate": 8.531375166889186e-05, "loss": 0.0207, "step": 2556 }, { "epoch": 21.88, "learning_rate": 8.544726301735648e-05, "loss": 0.0171, "step": 2560 }, { "epoch": 21.91, "learning_rate": 8.55807743658211e-05, "loss": 0.0187, "step": 2564 }, { "epoch": 21.95, "learning_rate": 8.571428571428571e-05, "loss": 0.0175, "step": 2568 }, { "epoch": 21.98, "learning_rate": 8.584779706275034e-05, "loss": 0.0176, "step": 2572 }, { "epoch": 22.02, "learning_rate": 8.598130841121496e-05, "loss": 0.0159, "step": 2576 }, { "epoch": 22.05, "learning_rate": 8.611481975967958e-05, "loss": 0.0172, "step": 2580 }, { "epoch": 22.09, "learning_rate": 8.62483311081442e-05, "loss": 0.017, "step": 2584 }, { "epoch": 22.12, "learning_rate": 8.638184245660881e-05, "loss": 0.0187, "step": 2588 }, { "epoch": 22.15, "learning_rate": 8.651535380507344e-05, "loss": 0.0171, "step": 2592 }, { "epoch": 22.19, "learning_rate": 8.664886515353804e-05, "loss": 0.0175, "step": 2596 }, { "epoch": 22.22, "learning_rate": 8.678237650200267e-05, "loss": 0.0168, "step": 2600 }, { "epoch": 22.26, "learning_rate": 8.691588785046729e-05, "loss": 0.0182, "step": 2604 }, { "epoch": 22.29, "learning_rate": 8.704939919893191e-05, "loss": 0.0211, "step": 2608 }, { "epoch": 22.32, "learning_rate": 8.718291054739654e-05, "loss": 0.0226, "step": 2612 }, { "epoch": 22.36, "learning_rate": 8.731642189586116e-05, "loss": 0.0118, "step": 2616 }, { "epoch": 22.39, "learning_rate": 8.744993324432578e-05, "loss": 0.0179, "step": 2620 }, { "epoch": 22.43, "learning_rate": 8.758344459279039e-05, "loss": 0.0161, "step": 2624 }, { "epoch": 22.46, "learning_rate": 8.771695594125501e-05, "loss": 0.016, "step": 2628 }, { "epoch": 22.5, "learning_rate": 8.785046728971964e-05, "loss": 0.0194, "step": 2632 }, { "epoch": 22.53, "learning_rate": 8.798397863818424e-05, "loss": 0.0161, "step": 2636 }, { "epoch": 22.56, "learning_rate": 8.811748998664887e-05, "loss": 0.0177, "step": 2640 }, { "epoch": 22.6, "learning_rate": 8.825100133511349e-05, "loss": 0.0165, "step": 2644 }, { "epoch": 22.63, "learning_rate": 8.838451268357811e-05, "loss": 0.0147, "step": 2648 }, { "epoch": 22.67, "learning_rate": 8.851802403204272e-05, "loss": 0.0148, "step": 2652 }, { "epoch": 22.7, "learning_rate": 8.865153538050734e-05, "loss": 0.0209, "step": 2656 }, { "epoch": 22.74, "learning_rate": 8.878504672897197e-05, "loss": 0.0135, "step": 2660 }, { "epoch": 22.77, "learning_rate": 8.891855807743659e-05, "loss": 0.0192, "step": 2664 }, { "epoch": 22.8, "learning_rate": 8.905206942590121e-05, "loss": 0.0183, "step": 2668 }, { "epoch": 22.84, "learning_rate": 8.918558077436584e-05, "loss": 0.019, "step": 2672 }, { "epoch": 22.87, "learning_rate": 8.931909212283044e-05, "loss": 0.0151, "step": 2676 }, { "epoch": 22.91, "learning_rate": 8.945260347129505e-05, "loss": 0.0127, "step": 2680 }, { "epoch": 22.94, "learning_rate": 8.958611481975968e-05, "loss": 0.0169, "step": 2684 }, { "epoch": 22.97, "learning_rate": 8.97196261682243e-05, "loss": 0.0143, "step": 2688 }, { "epoch": 23.01, "learning_rate": 8.985313751668892e-05, "loss": 0.0195, "step": 2692 }, { "epoch": 23.04, "learning_rate": 8.998664886515354e-05, "loss": 0.0151, "step": 2696 }, { "epoch": 23.08, "learning_rate": 9.012016021361817e-05, "loss": 0.0124, "step": 2700 }, { "epoch": 23.11, "learning_rate": 9.025367156208279e-05, "loss": 0.0156, "step": 2704 }, { "epoch": 23.15, "learning_rate": 9.03871829105474e-05, "loss": 0.0157, "step": 2708 }, { "epoch": 23.18, "learning_rate": 9.052069425901202e-05, "loss": 0.0169, "step": 2712 }, { "epoch": 23.21, "learning_rate": 9.065420560747664e-05, "loss": 0.018, "step": 2716 }, { "epoch": 23.25, "learning_rate": 9.078771695594125e-05, "loss": 0.0156, "step": 2720 }, { "epoch": 23.28, "learning_rate": 9.092122830440588e-05, "loss": 0.0147, "step": 2724 }, { "epoch": 23.32, "learning_rate": 9.10547396528705e-05, "loss": 0.0145, "step": 2728 }, { "epoch": 23.35, "learning_rate": 9.118825100133512e-05, "loss": 0.0122, "step": 2732 }, { "epoch": 23.38, "learning_rate": 9.132176234979973e-05, "loss": 0.0126, "step": 2736 }, { "epoch": 23.42, "learning_rate": 9.145527369826435e-05, "loss": 0.0147, "step": 2740 }, { "epoch": 23.45, "learning_rate": 9.158878504672898e-05, "loss": 0.0209, "step": 2744 }, { "epoch": 23.49, "learning_rate": 9.17222963951936e-05, "loss": 0.0159, "step": 2748 }, { "epoch": 23.52, "learning_rate": 9.185580774365822e-05, "loss": 0.0116, "step": 2752 }, { "epoch": 23.56, "learning_rate": 9.198931909212284e-05, "loss": 0.0136, "step": 2756 }, { "epoch": 23.59, "learning_rate": 9.212283044058745e-05, "loss": 0.016, "step": 2760 }, { "epoch": 23.62, "learning_rate": 9.225634178905206e-05, "loss": 0.0152, "step": 2764 }, { "epoch": 23.66, "learning_rate": 9.238985313751669e-05, "loss": 0.0125, "step": 2768 }, { "epoch": 23.69, "learning_rate": 9.252336448598131e-05, "loss": 0.0166, "step": 2772 }, { "epoch": 23.73, "learning_rate": 9.265687583444593e-05, "loss": 0.013, "step": 2776 }, { "epoch": 23.76, "learning_rate": 9.279038718291055e-05, "loss": 0.0162, "step": 2780 }, { "epoch": 23.79, "learning_rate": 9.292389853137518e-05, "loss": 0.0128, "step": 2784 }, { "epoch": 23.83, "learning_rate": 9.305740987983979e-05, "loss": 0.0104, "step": 2788 }, { "epoch": 23.86, "learning_rate": 9.319092122830441e-05, "loss": 0.0143, "step": 2792 }, { "epoch": 23.9, "learning_rate": 9.332443257676903e-05, "loss": 0.0143, "step": 2796 }, { "epoch": 23.93, "learning_rate": 9.345794392523365e-05, "loss": 0.0162, "step": 2800 }, { "epoch": 23.97, "learning_rate": 9.359145527369826e-05, "loss": 0.0167, "step": 2804 }, { "epoch": 24.0, "learning_rate": 9.372496662216289e-05, "loss": 0.0143, "step": 2808 }, { "epoch": 24.03, "learning_rate": 9.385847797062751e-05, "loss": 0.015, "step": 2812 }, { "epoch": 24.07, "learning_rate": 9.399198931909212e-05, "loss": 0.012, "step": 2816 }, { "epoch": 24.1, "learning_rate": 9.412550066755674e-05, "loss": 0.0107, "step": 2820 }, { "epoch": 24.14, "learning_rate": 9.425901201602136e-05, "loss": 0.0114, "step": 2824 }, { "epoch": 24.17, "learning_rate": 9.439252336448599e-05, "loss": 0.0133, "step": 2828 }, { "epoch": 24.21, "learning_rate": 9.452603471295061e-05, "loss": 0.0135, "step": 2832 }, { "epoch": 24.24, "learning_rate": 9.465954606141523e-05, "loss": 0.0157, "step": 2836 }, { "epoch": 24.27, "learning_rate": 9.479305740987985e-05, "loss": 0.0134, "step": 2840 }, { "epoch": 24.31, "learning_rate": 9.492656875834446e-05, "loss": 0.0155, "step": 2844 }, { "epoch": 24.34, "learning_rate": 9.506008010680909e-05, "loss": 0.0098, "step": 2848 }, { "epoch": 24.38, "learning_rate": 9.51935914552737e-05, "loss": 0.0121, "step": 2852 }, { "epoch": 24.41, "learning_rate": 9.532710280373832e-05, "loss": 0.0147, "step": 2856 }, { "epoch": 24.44, "learning_rate": 9.546061415220294e-05, "loss": 0.0128, "step": 2860 }, { "epoch": 24.48, "learning_rate": 9.559412550066756e-05, "loss": 0.0153, "step": 2864 }, { "epoch": 24.51, "learning_rate": 9.572763684913219e-05, "loss": 0.0118, "step": 2868 }, { "epoch": 24.55, "learning_rate": 9.58611481975968e-05, "loss": 0.0139, "step": 2872 }, { "epoch": 24.58, "learning_rate": 9.599465954606142e-05, "loss": 0.0115, "step": 2876 }, { "epoch": 24.62, "learning_rate": 9.612817089452604e-05, "loss": 0.0127, "step": 2880 }, { "epoch": 24.65, "learning_rate": 9.626168224299066e-05, "loss": 0.0127, "step": 2884 }, { "epoch": 24.68, "learning_rate": 9.639519359145529e-05, "loss": 0.0121, "step": 2888 }, { "epoch": 24.72, "learning_rate": 9.65287049399199e-05, "loss": 0.011, "step": 2892 }, { "epoch": 24.75, "learning_rate": 9.666221628838452e-05, "loss": 0.0126, "step": 2896 }, { "epoch": 24.79, "learning_rate": 9.679572763684913e-05, "loss": 0.0112, "step": 2900 }, { "epoch": 24.82, "learning_rate": 9.692923898531375e-05, "loss": 0.0152, "step": 2904 }, { "epoch": 24.85, "learning_rate": 9.706275033377837e-05, "loss": 0.014, "step": 2908 }, { "epoch": 24.89, "learning_rate": 9.7196261682243e-05, "loss": 0.013, "step": 2912 }, { "epoch": 24.92, "learning_rate": 9.732977303070762e-05, "loss": 0.0113, "step": 2916 }, { "epoch": 24.96, "learning_rate": 9.746328437917224e-05, "loss": 0.0125, "step": 2920 }, { "epoch": 24.99, "learning_rate": 9.759679572763686e-05, "loss": 0.012, "step": 2924 }, { "epoch": 25.03, "learning_rate": 9.773030707610147e-05, "loss": 0.0127, "step": 2928 }, { "epoch": 25.06, "learning_rate": 9.78638184245661e-05, "loss": 0.0107, "step": 2932 }, { "epoch": 25.09, "learning_rate": 9.79973297730307e-05, "loss": 0.0094, "step": 2936 }, { "epoch": 25.13, "learning_rate": 9.813084112149533e-05, "loss": 0.0148, "step": 2940 }, { "epoch": 25.16, "learning_rate": 9.826435246995995e-05, "loss": 0.0133, "step": 2944 }, { "epoch": 25.2, "learning_rate": 9.839786381842457e-05, "loss": 0.0105, "step": 2948 }, { "epoch": 25.23, "learning_rate": 9.85313751668892e-05, "loss": 0.0114, "step": 2952 }, { "epoch": 25.26, "learning_rate": 9.86648865153538e-05, "loss": 0.0133, "step": 2956 }, { "epoch": 25.3, "learning_rate": 9.879839786381843e-05, "loss": 0.01, "step": 2960 }, { "epoch": 25.33, "learning_rate": 9.893190921228305e-05, "loss": 0.0107, "step": 2964 }, { "epoch": 25.37, "learning_rate": 9.906542056074767e-05, "loss": 0.0138, "step": 2968 }, { "epoch": 25.4, "learning_rate": 9.91989319092123e-05, "loss": 0.0114, "step": 2972 }, { "epoch": 25.44, "learning_rate": 9.93324432576769e-05, "loss": 0.0128, "step": 2976 }, { "epoch": 25.47, "learning_rate": 9.946595460614153e-05, "loss": 0.0095, "step": 2980 }, { "epoch": 25.5, "learning_rate": 9.959946595460614e-05, "loss": 0.0113, "step": 2984 }, { "epoch": 25.54, "learning_rate": 9.973297730307076e-05, "loss": 0.0093, "step": 2988 }, { "epoch": 25.57, "learning_rate": 9.986648865153538e-05, "loss": 0.0118, "step": 2992 }, { "epoch": 25.61, "learning_rate": 0.0001, "loss": 0.0119, "step": 2996 }, { "epoch": 25.64, "learning_rate": 9.99999945668858e-05, "loss": 0.0147, "step": 3000 }, { "epoch": 25.64, "eval_exact_match": 0.4625779625779626, "eval_loss": 0.6119692325592041, "eval_runtime": 85.6703, "eval_samples_per_second": 11.229, "step": 3000 }, { "epoch": 25.68, "learning_rate": 9.99999782675444e-05, "loss": 0.0108, "step": 3004 }, { "epoch": 25.71, "learning_rate": 9.999995110197932e-05, "loss": 0.0127, "step": 3008 }, { "epoch": 25.74, "learning_rate": 9.999991307019647e-05, "loss": 0.0102, "step": 3012 }, { "epoch": 25.78, "learning_rate": 9.999986417220411e-05, "loss": 0.0167, "step": 3016 }, { "epoch": 25.81, "learning_rate": 9.999980440801289e-05, "loss": 0.0135, "step": 3020 }, { "epoch": 25.85, "learning_rate": 9.999973377763576e-05, "loss": 0.0114, "step": 3024 }, { "epoch": 25.88, "learning_rate": 9.999965228108811e-05, "loss": 0.0118, "step": 3028 }, { "epoch": 25.91, "learning_rate": 9.999955991838763e-05, "loss": 0.0123, "step": 3032 }, { "epoch": 25.95, "learning_rate": 9.99994566895544e-05, "loss": 0.0136, "step": 3036 }, { "epoch": 25.98, "learning_rate": 9.999934259461086e-05, "loss": 0.0105, "step": 3040 }, { "epoch": 26.02, "learning_rate": 9.999921763358177e-05, "loss": 0.0088, "step": 3044 }, { "epoch": 26.05, "learning_rate": 9.999908180649433e-05, "loss": 0.0111, "step": 3048 }, { "epoch": 26.09, "learning_rate": 9.999893511337803e-05, "loss": 0.0089, "step": 3052 }, { "epoch": 26.12, "learning_rate": 9.999877755426476e-05, "loss": 0.0081, "step": 3056 }, { "epoch": 26.15, "learning_rate": 9.99986091291888e-05, "loss": 0.0105, "step": 3060 }, { "epoch": 26.19, "learning_rate": 9.999842983818668e-05, "loss": 0.0115, "step": 3064 }, { "epoch": 26.22, "learning_rate": 9.99982396812974e-05, "loss": 0.0082, "step": 3068 }, { "epoch": 26.26, "learning_rate": 9.99980386585623e-05, "loss": 0.0141, "step": 3072 }, { "epoch": 26.29, "learning_rate": 9.999782677002505e-05, "loss": 0.0092, "step": 3076 }, { "epoch": 26.32, "learning_rate": 9.999760401573169e-05, "loss": 0.0117, "step": 3080 }, { "epoch": 26.36, "learning_rate": 9.999737039573065e-05, "loss": 0.0092, "step": 3084 }, { "epoch": 26.39, "learning_rate": 9.99971259100727e-05, "loss": 0.011, "step": 3088 }, { "epoch": 26.43, "learning_rate": 9.999687055881095e-05, "loss": 0.0115, "step": 3092 }, { "epoch": 26.46, "learning_rate": 9.999660434200093e-05, "loss": 0.007, "step": 3096 }, { "epoch": 26.5, "learning_rate": 9.999632725970047e-05, "loss": 0.0127, "step": 3100 }, { "epoch": 26.53, "learning_rate": 9.999603931196979e-05, "loss": 0.0122, "step": 3104 }, { "epoch": 26.56, "learning_rate": 9.999574049887146e-05, "loss": 0.0111, "step": 3108 }, { "epoch": 26.6, "learning_rate": 9.999543082047044e-05, "loss": 0.0138, "step": 3112 }, { "epoch": 26.63, "learning_rate": 9.999511027683402e-05, "loss": 0.0079, "step": 3116 }, { "epoch": 26.67, "learning_rate": 9.999477886803186e-05, "loss": 0.0105, "step": 3120 }, { "epoch": 26.7, "learning_rate": 9.999443659413598e-05, "loss": 0.0112, "step": 3124 }, { "epoch": 26.74, "learning_rate": 9.99940834552208e-05, "loss": 0.0124, "step": 3128 }, { "epoch": 26.77, "learning_rate": 9.9993719451363e-05, "loss": 0.0089, "step": 3132 }, { "epoch": 26.8, "learning_rate": 9.999334458264173e-05, "loss": 0.0087, "step": 3136 }, { "epoch": 26.84, "learning_rate": 9.999295884913846e-05, "loss": 0.0126, "step": 3140 }, { "epoch": 26.87, "learning_rate": 9.9992562250937e-05, "loss": 0.009, "step": 3144 }, { "epoch": 26.91, "learning_rate": 9.999215478812358e-05, "loss": 0.011, "step": 3148 }, { "epoch": 26.94, "learning_rate": 9.99917364607867e-05, "loss": 0.0114, "step": 3152 }, { "epoch": 26.97, "learning_rate": 9.999130726901729e-05, "loss": 0.0098, "step": 3156 }, { "epoch": 27.01, "learning_rate": 9.999086721290864e-05, "loss": 0.0096, "step": 3160 }, { "epoch": 27.04, "learning_rate": 9.999041629255637e-05, "loss": 0.0122, "step": 3164 }, { "epoch": 27.08, "learning_rate": 9.99899545080585e-05, "loss": 0.0073, "step": 3168 }, { "epoch": 27.11, "learning_rate": 9.998948185951535e-05, "loss": 0.0094, "step": 3172 }, { "epoch": 27.15, "learning_rate": 9.998899834702964e-05, "loss": 0.008, "step": 3176 }, { "epoch": 27.18, "learning_rate": 9.99885039707065e-05, "loss": 0.0086, "step": 3180 }, { "epoch": 27.21, "learning_rate": 9.99879987306533e-05, "loss": 0.0122, "step": 3184 }, { "epoch": 27.25, "learning_rate": 9.99874826269799e-05, "loss": 0.0103, "step": 3188 }, { "epoch": 27.28, "learning_rate": 9.998695565979844e-05, "loss": 0.0085, "step": 3192 }, { "epoch": 27.32, "learning_rate": 9.998641782922342e-05, "loss": 0.0083, "step": 3196 }, { "epoch": 27.35, "learning_rate": 9.998586913537177e-05, "loss": 0.0111, "step": 3200 }, { "epoch": 27.38, "learning_rate": 9.99853095783627e-05, "loss": 0.0077, "step": 3204 }, { "epoch": 27.42, "learning_rate": 9.99847391583178e-05, "loss": 0.012, "step": 3208 }, { "epoch": 27.45, "learning_rate": 9.998415787536111e-05, "loss": 0.0117, "step": 3212 }, { "epoch": 27.49, "learning_rate": 9.998356572961887e-05, "loss": 0.0099, "step": 3216 }, { "epoch": 27.52, "learning_rate": 9.998296272121983e-05, "loss": 0.0098, "step": 3220 }, { "epoch": 27.56, "learning_rate": 9.998234885029501e-05, "loss": 0.0066, "step": 3224 }, { "epoch": 27.59, "learning_rate": 9.998172411697781e-05, "loss": 0.0109, "step": 3228 }, { "epoch": 27.62, "learning_rate": 9.998108852140402e-05, "loss": 0.0069, "step": 3232 }, { "epoch": 27.66, "learning_rate": 9.998044206371177e-05, "loss": 0.0079, "step": 3236 }, { "epoch": 27.69, "learning_rate": 9.997978474404154e-05, "loss": 0.0117, "step": 3240 }, { "epoch": 27.73, "learning_rate": 9.997911656253618e-05, "loss": 0.009, "step": 3244 }, { "epoch": 27.76, "learning_rate": 9.997843751934093e-05, "loss": 0.0104, "step": 3248 }, { "epoch": 27.79, "learning_rate": 9.997774761460332e-05, "loss": 0.0116, "step": 3252 }, { "epoch": 27.83, "learning_rate": 9.997704684847332e-05, "loss": 0.0079, "step": 3256 }, { "epoch": 27.86, "learning_rate": 9.997633522110322e-05, "loss": 0.0105, "step": 3260 }, { "epoch": 27.9, "learning_rate": 9.997561273264764e-05, "loss": 0.0091, "step": 3264 }, { "epoch": 27.93, "learning_rate": 9.997487938326362e-05, "loss": 0.0069, "step": 3268 }, { "epoch": 27.97, "learning_rate": 9.997413517311055e-05, "loss": 0.0114, "step": 3272 }, { "epoch": 28.0, "learning_rate": 9.997338010235013e-05, "loss": 0.0084, "step": 3276 }, { "epoch": 28.03, "learning_rate": 9.99726141711465e-05, "loss": 0.0074, "step": 3280 }, { "epoch": 28.07, "learning_rate": 9.997183737966606e-05, "loss": 0.0082, "step": 3284 }, { "epoch": 28.1, "learning_rate": 9.997104972807768e-05, "loss": 0.0112, "step": 3288 }, { "epoch": 28.14, "learning_rate": 9.997025121655248e-05, "loss": 0.0125, "step": 3292 }, { "epoch": 28.17, "learning_rate": 9.996944184526405e-05, "loss": 0.0116, "step": 3296 }, { "epoch": 28.21, "learning_rate": 9.996862161438825e-05, "loss": 0.0083, "step": 3300 }, { "epoch": 28.24, "learning_rate": 9.996779052410337e-05, "loss": 0.0099, "step": 3304 }, { "epoch": 28.27, "learning_rate": 9.996694857459e-05, "loss": 0.0095, "step": 3308 }, { "epoch": 28.31, "learning_rate": 9.99660957660311e-05, "loss": 0.0057, "step": 3312 }, { "epoch": 28.34, "learning_rate": 9.996523209861204e-05, "loss": 0.0047, "step": 3316 }, { "epoch": 28.38, "learning_rate": 9.996435757252052e-05, "loss": 0.0108, "step": 3320 }, { "epoch": 28.41, "learning_rate": 9.996347218794656e-05, "loss": 0.0081, "step": 3324 }, { "epoch": 28.44, "learning_rate": 9.99625759450826e-05, "loss": 0.0095, "step": 3328 }, { "epoch": 28.48, "learning_rate": 9.996166884412342e-05, "loss": 0.0069, "step": 3332 }, { "epoch": 28.51, "learning_rate": 9.996075088526615e-05, "loss": 0.0106, "step": 3336 }, { "epoch": 28.55, "learning_rate": 9.995982206871029e-05, "loss": 0.009, "step": 3340 }, { "epoch": 28.58, "learning_rate": 9.995888239465768e-05, "loss": 0.0081, "step": 3344 }, { "epoch": 28.62, "learning_rate": 9.995793186331253e-05, "loss": 0.0075, "step": 3348 }, { "epoch": 28.65, "learning_rate": 9.995697047488142e-05, "loss": 0.0068, "step": 3352 }, { "epoch": 28.68, "learning_rate": 9.99559982295733e-05, "loss": 0.0093, "step": 3356 }, { "epoch": 28.72, "learning_rate": 9.995501512759946e-05, "loss": 0.0068, "step": 3360 }, { "epoch": 28.75, "learning_rate": 9.995402116917352e-05, "loss": 0.0098, "step": 3364 }, { "epoch": 28.79, "learning_rate": 9.995301635451155e-05, "loss": 0.0096, "step": 3368 }, { "epoch": 28.82, "learning_rate": 9.995200068383186e-05, "loss": 0.0102, "step": 3372 }, { "epoch": 28.85, "learning_rate": 9.995097415735521e-05, "loss": 0.0081, "step": 3376 }, { "epoch": 28.89, "learning_rate": 9.994993677530468e-05, "loss": 0.0099, "step": 3380 }, { "epoch": 28.92, "learning_rate": 9.994888853790574e-05, "loss": 0.0122, "step": 3384 }, { "epoch": 28.96, "learning_rate": 9.994782944538618e-05, "loss": 0.0105, "step": 3388 }, { "epoch": 28.99, "learning_rate": 9.994675949797616e-05, "loss": 0.007, "step": 3392 }, { "epoch": 29.03, "learning_rate": 9.994567869590822e-05, "loss": 0.0078, "step": 3396 }, { "epoch": 29.06, "learning_rate": 9.994458703941723e-05, "loss": 0.0104, "step": 3400 }, { "epoch": 29.09, "learning_rate": 9.994348452874046e-05, "loss": 0.0082, "step": 3404 }, { "epoch": 29.13, "learning_rate": 9.99423711641175e-05, "loss": 0.006, "step": 3408 }, { "epoch": 29.16, "learning_rate": 9.994124694579028e-05, "loss": 0.0098, "step": 3412 }, { "epoch": 29.2, "learning_rate": 9.994011187400317e-05, "loss": 0.0094, "step": 3416 }, { "epoch": 29.23, "learning_rate": 9.993896594900281e-05, "loss": 0.0062, "step": 3420 }, { "epoch": 29.26, "learning_rate": 9.993780917103825e-05, "loss": 0.0067, "step": 3424 }, { "epoch": 29.3, "learning_rate": 9.993664154036091e-05, "loss": 0.0069, "step": 3428 }, { "epoch": 29.33, "learning_rate": 9.993546305722452e-05, "loss": 0.0079, "step": 3432 }, { "epoch": 29.37, "learning_rate": 9.99342737218852e-05, "loss": 0.0086, "step": 3436 }, { "epoch": 29.4, "learning_rate": 9.993307353460142e-05, "loss": 0.0068, "step": 3440 }, { "epoch": 29.44, "learning_rate": 9.993186249563401e-05, "loss": 0.0079, "step": 3444 }, { "epoch": 29.47, "learning_rate": 9.993064060524615e-05, "loss": 0.0081, "step": 3448 }, { "epoch": 29.5, "learning_rate": 9.992940786370342e-05, "loss": 0.0072, "step": 3452 }, { "epoch": 29.54, "learning_rate": 9.992816427127368e-05, "loss": 0.006, "step": 3456 }, { "epoch": 29.57, "learning_rate": 9.992690982822721e-05, "loss": 0.0094, "step": 3460 }, { "epoch": 29.61, "learning_rate": 9.992564453483667e-05, "loss": 0.0078, "step": 3464 }, { "epoch": 29.64, "learning_rate": 9.992436839137696e-05, "loss": 0.0045, "step": 3468 }, { "epoch": 29.68, "learning_rate": 9.992308139812549e-05, "loss": 0.0055, "step": 3472 }, { "epoch": 29.71, "learning_rate": 9.992178355536192e-05, "loss": 0.0059, "step": 3476 }, { "epoch": 29.74, "learning_rate": 9.99204748633683e-05, "loss": 0.0087, "step": 3480 }, { "epoch": 29.78, "learning_rate": 9.991915532242908e-05, "loss": 0.0082, "step": 3484 }, { "epoch": 29.81, "learning_rate": 9.991782493283097e-05, "loss": 0.0094, "step": 3488 }, { "epoch": 29.85, "learning_rate": 9.991648369486314e-05, "loss": 0.0077, "step": 3492 }, { "epoch": 29.88, "learning_rate": 9.991513160881706e-05, "loss": 0.008, "step": 3496 }, { "epoch": 29.91, "learning_rate": 9.991376867498658e-05, "loss": 0.0089, "step": 3500 }, { "epoch": 29.95, "learning_rate": 9.991239489366788e-05, "loss": 0.0089, "step": 3504 }, { "epoch": 29.98, "learning_rate": 9.991101026515952e-05, "loss": 0.0076, "step": 3508 }, { "epoch": 30.02, "learning_rate": 9.990961478976244e-05, "loss": 0.0082, "step": 3512 }, { "epoch": 30.05, "learning_rate": 9.990820846777988e-05, "loss": 0.0099, "step": 3516 }, { "epoch": 30.09, "learning_rate": 9.990679129951749e-05, "loss": 0.0075, "step": 3520 }, { "epoch": 30.12, "learning_rate": 9.990536328528324e-05, "loss": 0.0061, "step": 3524 }, { "epoch": 30.15, "learning_rate": 9.990392442538747e-05, "loss": 0.0067, "step": 3528 }, { "epoch": 30.19, "learning_rate": 9.990247472014291e-05, "loss": 0.0074, "step": 3532 }, { "epoch": 30.22, "learning_rate": 9.990101416986457e-05, "loss": 0.0051, "step": 3536 }, { "epoch": 30.26, "learning_rate": 9.98995427748699e-05, "loss": 0.0063, "step": 3540 }, { "epoch": 30.29, "learning_rate": 9.989806053547866e-05, "loss": 0.0062, "step": 3544 }, { "epoch": 30.32, "learning_rate": 9.989656745201298e-05, "loss": 0.0071, "step": 3548 }, { "epoch": 30.36, "learning_rate": 9.989506352479734e-05, "loss": 0.0062, "step": 3552 }, { "epoch": 30.39, "learning_rate": 9.989354875415857e-05, "loss": 0.0066, "step": 3556 }, { "epoch": 30.43, "learning_rate": 9.989202314042588e-05, "loss": 0.0071, "step": 3560 }, { "epoch": 30.46, "learning_rate": 9.989048668393082e-05, "loss": 0.0087, "step": 3564 }, { "epoch": 30.5, "learning_rate": 9.988893938500731e-05, "loss": 0.0075, "step": 3568 }, { "epoch": 30.53, "learning_rate": 9.988738124399159e-05, "loss": 0.0102, "step": 3572 }, { "epoch": 30.56, "learning_rate": 9.988581226122231e-05, "loss": 0.0075, "step": 3576 }, { "epoch": 30.6, "learning_rate": 9.988423243704042e-05, "loss": 0.0095, "step": 3580 }, { "epoch": 30.63, "learning_rate": 9.988264177178929e-05, "loss": 0.0061, "step": 3584 }, { "epoch": 30.67, "learning_rate": 9.988104026581458e-05, "loss": 0.0063, "step": 3588 }, { "epoch": 30.7, "learning_rate": 9.987942791946436e-05, "loss": 0.0057, "step": 3592 }, { "epoch": 30.74, "learning_rate": 9.987780473308901e-05, "loss": 0.0054, "step": 3596 }, { "epoch": 30.77, "learning_rate": 9.98761707070413e-05, "loss": 0.0053, "step": 3600 }, { "epoch": 30.8, "learning_rate": 9.987452584167635e-05, "loss": 0.0096, "step": 3604 }, { "epoch": 30.84, "learning_rate": 9.987287013735161e-05, "loss": 0.0066, "step": 3608 }, { "epoch": 30.87, "learning_rate": 9.987120359442693e-05, "loss": 0.0088, "step": 3612 }, { "epoch": 30.91, "learning_rate": 9.986952621326448e-05, "loss": 0.0064, "step": 3616 }, { "epoch": 30.94, "learning_rate": 9.986783799422879e-05, "loss": 0.0091, "step": 3620 }, { "epoch": 30.97, "learning_rate": 9.986613893768675e-05, "loss": 0.0064, "step": 3624 }, { "epoch": 31.01, "learning_rate": 9.986442904400763e-05, "loss": 0.0065, "step": 3628 }, { "epoch": 31.04, "learning_rate": 9.9862708313563e-05, "loss": 0.0055, "step": 3632 }, { "epoch": 31.08, "learning_rate": 9.986097674672683e-05, "loss": 0.0041, "step": 3636 }, { "epoch": 31.11, "learning_rate": 9.985923434387545e-05, "loss": 0.0072, "step": 3640 }, { "epoch": 31.15, "learning_rate": 9.98574811053875e-05, "loss": 0.0084, "step": 3644 }, { "epoch": 31.18, "learning_rate": 9.985571703164402e-05, "loss": 0.0074, "step": 3648 }, { "epoch": 31.21, "learning_rate": 9.985394212302836e-05, "loss": 0.0073, "step": 3652 }, { "epoch": 31.25, "learning_rate": 9.98521563799263e-05, "loss": 0.0059, "step": 3656 }, { "epoch": 31.28, "learning_rate": 9.985035980272588e-05, "loss": 0.0065, "step": 3660 }, { "epoch": 31.32, "learning_rate": 9.984855239181755e-05, "loss": 0.0058, "step": 3664 }, { "epoch": 31.35, "learning_rate": 9.984673414759411e-05, "loss": 0.0069, "step": 3668 }, { "epoch": 31.38, "learning_rate": 9.984490507045073e-05, "loss": 0.0068, "step": 3672 }, { "epoch": 31.42, "learning_rate": 9.984306516078488e-05, "loss": 0.006, "step": 3676 }, { "epoch": 31.45, "learning_rate": 9.984121441899644e-05, "loss": 0.0071, "step": 3680 }, { "epoch": 31.49, "learning_rate": 9.983935284548761e-05, "loss": 0.0052, "step": 3684 }, { "epoch": 31.52, "learning_rate": 9.983748044066296e-05, "loss": 0.0055, "step": 3688 }, { "epoch": 31.56, "learning_rate": 9.983559720492941e-05, "loss": 0.0065, "step": 3692 }, { "epoch": 31.59, "learning_rate": 9.983370313869622e-05, "loss": 0.0063, "step": 3696 }, { "epoch": 31.62, "learning_rate": 9.983179824237505e-05, "loss": 0.0045, "step": 3700 }, { "epoch": 31.66, "learning_rate": 9.982988251637983e-05, "loss": 0.0054, "step": 3704 }, { "epoch": 31.69, "learning_rate": 9.982795596112695e-05, "loss": 0.0062, "step": 3708 }, { "epoch": 31.73, "learning_rate": 9.982601857703507e-05, "loss": 0.009, "step": 3712 }, { "epoch": 31.76, "learning_rate": 9.982407036452523e-05, "loss": 0.0089, "step": 3716 }, { "epoch": 31.79, "learning_rate": 9.982211132402082e-05, "loss": 0.0065, "step": 3720 }, { "epoch": 31.83, "learning_rate": 9.98201414559476e-05, "loss": 0.0053, "step": 3724 }, { "epoch": 31.86, "learning_rate": 9.981816076073368e-05, "loss": 0.0073, "step": 3728 }, { "epoch": 31.9, "learning_rate": 9.981616923880947e-05, "loss": 0.0063, "step": 3732 }, { "epoch": 31.93, "learning_rate": 9.981416689060784e-05, "loss": 0.0052, "step": 3736 }, { "epoch": 31.97, "learning_rate": 9.981215371656388e-05, "loss": 0.0062, "step": 3740 }, { "epoch": 32.0, "learning_rate": 9.981012971711516e-05, "loss": 0.0058, "step": 3744 }, { "epoch": 32.03, "learning_rate": 9.980809489270152e-05, "loss": 0.0065, "step": 3748 }, { "epoch": 32.07, "learning_rate": 9.980604924376518e-05, "loss": 0.0044, "step": 3752 }, { "epoch": 32.1, "learning_rate": 9.98039927707507e-05, "loss": 0.0059, "step": 3756 }, { "epoch": 32.14, "learning_rate": 9.980192547410503e-05, "loss": 0.0069, "step": 3760 }, { "epoch": 32.17, "learning_rate": 9.979984735427741e-05, "loss": 0.006, "step": 3764 }, { "epoch": 32.21, "learning_rate": 9.979775841171949e-05, "loss": 0.0053, "step": 3768 }, { "epoch": 32.24, "learning_rate": 9.979565864688524e-05, "loss": 0.005, "step": 3772 }, { "epoch": 32.27, "learning_rate": 9.9793548060231e-05, "loss": 0.0054, "step": 3776 }, { "epoch": 32.31, "learning_rate": 9.979142665221544e-05, "loss": 0.0042, "step": 3780 }, { "epoch": 32.34, "learning_rate": 9.978929442329959e-05, "loss": 0.0063, "step": 3784 }, { "epoch": 32.38, "learning_rate": 9.978715137394685e-05, "loss": 0.0053, "step": 3788 }, { "epoch": 32.41, "learning_rate": 9.978499750462295e-05, "loss": 0.0052, "step": 3792 }, { "epoch": 32.44, "learning_rate": 9.978283281579598e-05, "loss": 0.005, "step": 3796 }, { "epoch": 32.48, "learning_rate": 9.978065730793638e-05, "loss": 0.0068, "step": 3800 }, { "epoch": 32.51, "learning_rate": 9.977847098151694e-05, "loss": 0.0057, "step": 3804 }, { "epoch": 32.55, "learning_rate": 9.97762738370128e-05, "loss": 0.0074, "step": 3808 }, { "epoch": 32.58, "learning_rate": 9.977406587490146e-05, "loss": 0.005, "step": 3812 }, { "epoch": 32.62, "learning_rate": 9.977184709566277e-05, "loss": 0.0055, "step": 3816 }, { "epoch": 32.65, "learning_rate": 9.976961749977892e-05, "loss": 0.0075, "step": 3820 }, { "epoch": 32.68, "learning_rate": 9.976737708773445e-05, "loss": 0.0045, "step": 3824 }, { "epoch": 32.72, "learning_rate": 9.976512586001625e-05, "loss": 0.0066, "step": 3828 }, { "epoch": 32.75, "learning_rate": 9.976286381711357e-05, "loss": 0.0064, "step": 3832 }, { "epoch": 32.79, "learning_rate": 9.976059095951804e-05, "loss": 0.0086, "step": 3836 }, { "epoch": 32.82, "learning_rate": 9.975830728772355e-05, "loss": 0.0069, "step": 3840 }, { "epoch": 32.85, "learning_rate": 9.975601280222647e-05, "loss": 0.006, "step": 3844 }, { "epoch": 32.89, "learning_rate": 9.975370750352538e-05, "loss": 0.0046, "step": 3848 }, { "epoch": 32.92, "learning_rate": 9.975139139212131e-05, "loss": 0.0065, "step": 3852 }, { "epoch": 32.96, "learning_rate": 9.97490644685176e-05, "loss": 0.007, "step": 3856 }, { "epoch": 32.99, "learning_rate": 9.974672673321995e-05, "loss": 0.0056, "step": 3860 }, { "epoch": 33.03, "learning_rate": 9.974437818673642e-05, "loss": 0.0051, "step": 3864 }, { "epoch": 33.06, "learning_rate": 9.974201882957739e-05, "loss": 0.0041, "step": 3868 }, { "epoch": 33.09, "learning_rate": 9.97396486622556e-05, "loss": 0.0049, "step": 3872 }, { "epoch": 33.13, "learning_rate": 9.973726768528618e-05, "loss": 0.006, "step": 3876 }, { "epoch": 33.16, "learning_rate": 9.973487589918655e-05, "loss": 0.0065, "step": 3880 }, { "epoch": 33.2, "learning_rate": 9.973247330447649e-05, "loss": 0.0057, "step": 3884 }, { "epoch": 33.23, "learning_rate": 9.973005990167816e-05, "loss": 0.0059, "step": 3888 }, { "epoch": 33.26, "learning_rate": 9.972763569131606e-05, "loss": 0.0059, "step": 3892 }, { "epoch": 33.3, "learning_rate": 9.972520067391702e-05, "loss": 0.0051, "step": 3896 }, { "epoch": 33.33, "learning_rate": 9.972275485001024e-05, "loss": 0.0061, "step": 3900 }, { "epoch": 33.37, "learning_rate": 9.972029822012722e-05, "loss": 0.006, "step": 3904 }, { "epoch": 33.4, "learning_rate": 9.971783078480191e-05, "loss": 0.0049, "step": 3908 }, { "epoch": 33.44, "learning_rate": 9.97153525445705e-05, "loss": 0.004, "step": 3912 }, { "epoch": 33.47, "learning_rate": 9.971286349997156e-05, "loss": 0.0072, "step": 3916 }, { "epoch": 33.5, "learning_rate": 9.971036365154604e-05, "loss": 0.0062, "step": 3920 }, { "epoch": 33.54, "learning_rate": 9.970785299983725e-05, "loss": 0.0049, "step": 3924 }, { "epoch": 33.57, "learning_rate": 9.970533154539077e-05, "loss": 0.0048, "step": 3928 }, { "epoch": 33.61, "learning_rate": 9.970279928875458e-05, "loss": 0.0048, "step": 3932 }, { "epoch": 33.64, "learning_rate": 9.970025623047902e-05, "loss": 0.0066, "step": 3936 }, { "epoch": 33.68, "learning_rate": 9.969770237111676e-05, "loss": 0.0068, "step": 3940 }, { "epoch": 33.71, "learning_rate": 9.969513771122278e-05, "loss": 0.0052, "step": 3944 }, { "epoch": 33.74, "learning_rate": 9.96925622513545e-05, "loss": 0.0049, "step": 3948 }, { "epoch": 33.78, "learning_rate": 9.96899759920716e-05, "loss": 0.0068, "step": 3952 }, { "epoch": 33.81, "learning_rate": 9.968737893393612e-05, "loss": 0.0072, "step": 3956 }, { "epoch": 33.85, "learning_rate": 9.96847710775125e-05, "loss": 0.0118, "step": 3960 }, { "epoch": 33.88, "learning_rate": 9.968215242336746e-05, "loss": 0.0051, "step": 3964 }, { "epoch": 33.91, "learning_rate": 9.967952297207013e-05, "loss": 0.0039, "step": 3968 }, { "epoch": 33.95, "learning_rate": 9.967688272419193e-05, "loss": 0.0049, "step": 3972 }, { "epoch": 33.98, "learning_rate": 9.967423168030665e-05, "loss": 0.0058, "step": 3976 }, { "epoch": 34.02, "learning_rate": 9.967156984099044e-05, "loss": 0.0049, "step": 3980 }, { "epoch": 34.05, "learning_rate": 9.966889720682177e-05, "loss": 0.005, "step": 3984 }, { "epoch": 34.09, "learning_rate": 9.96662137783815e-05, "loss": 0.0055, "step": 3988 }, { "epoch": 34.12, "learning_rate": 9.966351955625277e-05, "loss": 0.0038, "step": 3992 }, { "epoch": 34.15, "learning_rate": 9.96608145410211e-05, "loss": 0.0038, "step": 3996 }, { "epoch": 34.19, "learning_rate": 9.965809873327439e-05, "loss": 0.0054, "step": 4000 }, { "epoch": 34.19, "eval_exact_match": 0.4896049896049896, "eval_loss": 0.668160617351532, "eval_runtime": 89.1426, "eval_samples_per_second": 10.792, "step": 4000 }, { "epoch": 34.22, "learning_rate": 9.965537213360282e-05, "loss": 0.0065, "step": 4004 }, { "epoch": 34.26, "learning_rate": 9.965263474259896e-05, "loss": 0.006, "step": 4008 }, { "epoch": 34.29, "learning_rate": 9.964988656085771e-05, "loss": 0.0042, "step": 4012 }, { "epoch": 34.32, "learning_rate": 9.964712758897632e-05, "loss": 0.0054, "step": 4016 }, { "epoch": 34.36, "learning_rate": 9.964435782755437e-05, "loss": 0.0047, "step": 4020 }, { "epoch": 34.39, "learning_rate": 9.964157727719381e-05, "loss": 0.0032, "step": 4024 }, { "epoch": 34.43, "learning_rate": 9.963878593849893e-05, "loss": 0.0052, "step": 4028 }, { "epoch": 34.46, "learning_rate": 9.963598381207632e-05, "loss": 0.0039, "step": 4032 }, { "epoch": 34.5, "learning_rate": 9.963317089853502e-05, "loss": 0.0038, "step": 4036 }, { "epoch": 34.53, "learning_rate": 9.963034719848626e-05, "loss": 0.0049, "step": 4040 }, { "epoch": 34.56, "learning_rate": 9.962751271254375e-05, "loss": 0.0105, "step": 4044 }, { "epoch": 34.6, "learning_rate": 9.96246674413235e-05, "loss": 0.0042, "step": 4048 }, { "epoch": 34.63, "learning_rate": 9.962181138544383e-05, "loss": 0.0053, "step": 4052 }, { "epoch": 34.67, "learning_rate": 9.961894454552545e-05, "loss": 0.0056, "step": 4056 }, { "epoch": 34.7, "learning_rate": 9.961606692219137e-05, "loss": 0.0054, "step": 4060 }, { "epoch": 34.74, "learning_rate": 9.961317851606701e-05, "loss": 0.0051, "step": 4064 }, { "epoch": 34.77, "learning_rate": 9.961027932778005e-05, "loss": 0.0037, "step": 4068 }, { "epoch": 34.8, "learning_rate": 9.960736935796058e-05, "loss": 0.004, "step": 4072 }, { "epoch": 34.84, "learning_rate": 9.9604448607241e-05, "loss": 0.0058, "step": 4076 }, { "epoch": 34.87, "learning_rate": 9.960151707625605e-05, "loss": 0.0044, "step": 4080 }, { "epoch": 34.91, "learning_rate": 9.959857476564285e-05, "loss": 0.0056, "step": 4084 }, { "epoch": 34.94, "learning_rate": 9.959562167604082e-05, "loss": 0.0066, "step": 4088 }, { "epoch": 34.97, "learning_rate": 9.959265780809172e-05, "loss": 0.0058, "step": 4092 }, { "epoch": 35.01, "learning_rate": 9.958968316243972e-05, "loss": 0.0067, "step": 4096 }, { "epoch": 35.04, "learning_rate": 9.958669773973123e-05, "loss": 0.0067, "step": 4100 }, { "epoch": 35.08, "learning_rate": 9.958370154061511e-05, "loss": 0.0054, "step": 4104 }, { "epoch": 35.11, "learning_rate": 9.958069456574246e-05, "loss": 0.0042, "step": 4108 }, { "epoch": 35.15, "learning_rate": 9.957767681576679e-05, "loss": 0.0037, "step": 4112 }, { "epoch": 35.18, "learning_rate": 9.957464829134391e-05, "loss": 0.0043, "step": 4116 }, { "epoch": 35.21, "learning_rate": 9.957160899313205e-05, "loss": 0.0041, "step": 4120 }, { "epoch": 35.25, "learning_rate": 9.956855892179167e-05, "loss": 0.0047, "step": 4124 }, { "epoch": 35.28, "learning_rate": 9.956549807798563e-05, "loss": 0.0033, "step": 4128 }, { "epoch": 35.32, "learning_rate": 9.956242646237914e-05, "loss": 0.0049, "step": 4132 }, { "epoch": 35.35, "learning_rate": 9.955934407563974e-05, "loss": 0.0048, "step": 4136 }, { "epoch": 35.38, "learning_rate": 9.95562509184373e-05, "loss": 0.005, "step": 4140 }, { "epoch": 35.42, "learning_rate": 9.955314699144406e-05, "loss": 0.0036, "step": 4144 }, { "epoch": 35.45, "learning_rate": 9.955003229533455e-05, "loss": 0.0044, "step": 4148 }, { "epoch": 35.49, "learning_rate": 9.954690683078569e-05, "loss": 0.0039, "step": 4152 }, { "epoch": 35.52, "learning_rate": 9.954377059847669e-05, "loss": 0.004, "step": 4156 }, { "epoch": 35.56, "learning_rate": 9.954062359908918e-05, "loss": 0.0055, "step": 4160 }, { "epoch": 35.59, "learning_rate": 9.953746583330703e-05, "loss": 0.0043, "step": 4164 }, { "epoch": 35.62, "learning_rate": 9.953429730181653e-05, "loss": 0.0056, "step": 4168 }, { "epoch": 35.66, "learning_rate": 9.953111800530628e-05, "loss": 0.0053, "step": 4172 }, { "epoch": 35.69, "learning_rate": 9.952792794446722e-05, "loss": 0.003, "step": 4176 }, { "epoch": 35.73, "learning_rate": 9.952472711999261e-05, "loss": 0.0057, "step": 4180 }, { "epoch": 35.76, "learning_rate": 9.952151553257809e-05, "loss": 0.0052, "step": 4184 }, { "epoch": 35.79, "learning_rate": 9.951829318292159e-05, "loss": 0.0046, "step": 4188 }, { "epoch": 35.83, "learning_rate": 9.951506007172343e-05, "loss": 0.0028, "step": 4192 }, { "epoch": 35.86, "learning_rate": 9.951181619968624e-05, "loss": 0.0056, "step": 4196 }, { "epoch": 35.9, "learning_rate": 9.950856156751498e-05, "loss": 0.0029, "step": 4200 }, { "epoch": 35.93, "learning_rate": 9.950529617591698e-05, "loss": 0.0046, "step": 4204 }, { "epoch": 35.97, "learning_rate": 9.950202002560187e-05, "loss": 0.0031, "step": 4208 }, { "epoch": 36.0, "learning_rate": 9.949873311728166e-05, "loss": 0.0036, "step": 4212 }, { "epoch": 36.03, "learning_rate": 9.949543545167066e-05, "loss": 0.0046, "step": 4216 }, { "epoch": 36.07, "learning_rate": 9.949212702948552e-05, "loss": 0.0026, "step": 4220 }, { "epoch": 36.1, "learning_rate": 9.948880785144528e-05, "loss": 0.0036, "step": 4224 }, { "epoch": 36.14, "learning_rate": 9.948547791827126e-05, "loss": 0.0046, "step": 4228 }, { "epoch": 36.17, "learning_rate": 9.948213723068712e-05, "loss": 0.0025, "step": 4232 }, { "epoch": 36.21, "learning_rate": 9.94787857894189e-05, "loss": 0.0043, "step": 4236 }, { "epoch": 36.24, "learning_rate": 9.947542359519492e-05, "loss": 0.0045, "step": 4240 }, { "epoch": 36.27, "learning_rate": 9.947205064874591e-05, "loss": 0.0057, "step": 4244 }, { "epoch": 36.31, "learning_rate": 9.946866695080486e-05, "loss": 0.0023, "step": 4248 }, { "epoch": 36.34, "learning_rate": 9.946527250210715e-05, "loss": 0.0033, "step": 4252 }, { "epoch": 36.38, "learning_rate": 9.946186730339047e-05, "loss": 0.0053, "step": 4256 }, { "epoch": 36.41, "learning_rate": 9.945845135539483e-05, "loss": 0.0028, "step": 4260 }, { "epoch": 36.44, "learning_rate": 9.945502465886263e-05, "loss": 0.0037, "step": 4264 }, { "epoch": 36.48, "learning_rate": 9.945158721453857e-05, "loss": 0.007, "step": 4268 }, { "epoch": 36.51, "learning_rate": 9.94481390231697e-05, "loss": 0.0027, "step": 4272 }, { "epoch": 36.55, "learning_rate": 9.944468008550536e-05, "loss": 0.0021, "step": 4276 }, { "epoch": 36.58, "learning_rate": 9.94412104022973e-05, "loss": 0.0047, "step": 4280 }, { "epoch": 36.62, "learning_rate": 9.943772997429954e-05, "loss": 0.0056, "step": 4284 }, { "epoch": 36.65, "learning_rate": 9.943423880226848e-05, "loss": 0.0046, "step": 4288 }, { "epoch": 36.68, "learning_rate": 9.943073688696285e-05, "loss": 0.0053, "step": 4292 }, { "epoch": 36.72, "learning_rate": 9.942722422914367e-05, "loss": 0.0032, "step": 4296 }, { "epoch": 36.75, "learning_rate": 9.942370082957435e-05, "loss": 0.0034, "step": 4300 }, { "epoch": 36.79, "learning_rate": 9.942016668902058e-05, "loss": 0.0041, "step": 4304 }, { "epoch": 36.82, "learning_rate": 9.941662180825048e-05, "loss": 0.004, "step": 4308 }, { "epoch": 36.85, "learning_rate": 9.941306618803436e-05, "loss": 0.0058, "step": 4312 }, { "epoch": 36.89, "learning_rate": 9.940949982914502e-05, "loss": 0.0031, "step": 4316 }, { "epoch": 36.92, "learning_rate": 9.940592273235744e-05, "loss": 0.0065, "step": 4320 }, { "epoch": 36.96, "learning_rate": 9.94023348984491e-05, "loss": 0.0041, "step": 4324 }, { "epoch": 36.99, "learning_rate": 9.939873632819964e-05, "loss": 0.0054, "step": 4328 }, { "epoch": 37.03, "learning_rate": 9.939512702239116e-05, "loss": 0.0051, "step": 4332 }, { "epoch": 37.06, "learning_rate": 9.939150698180804e-05, "loss": 0.0032, "step": 4336 }, { "epoch": 37.09, "learning_rate": 9.938787620723702e-05, "loss": 0.0047, "step": 4340 }, { "epoch": 37.13, "learning_rate": 9.938423469946713e-05, "loss": 0.006, "step": 4344 }, { "epoch": 37.16, "learning_rate": 9.938058245928978e-05, "loss": 0.0044, "step": 4348 }, { "epoch": 37.2, "learning_rate": 9.937691948749869e-05, "loss": 0.0051, "step": 4352 }, { "epoch": 37.23, "learning_rate": 9.93732457848899e-05, "loss": 0.0065, "step": 4356 }, { "epoch": 37.26, "learning_rate": 9.936956135226181e-05, "loss": 0.0022, "step": 4360 }, { "epoch": 37.3, "learning_rate": 9.936586619041514e-05, "loss": 0.0027, "step": 4364 }, { "epoch": 37.33, "learning_rate": 9.936216030015291e-05, "loss": 0.0039, "step": 4368 }, { "epoch": 37.37, "learning_rate": 9.935844368228054e-05, "loss": 0.0032, "step": 4372 }, { "epoch": 37.4, "learning_rate": 9.935471633760573e-05, "loss": 0.0056, "step": 4376 }, { "epoch": 37.44, "learning_rate": 9.935097826693851e-05, "loss": 0.0026, "step": 4380 }, { "epoch": 37.47, "learning_rate": 9.934722947109125e-05, "loss": 0.003, "step": 4384 }, { "epoch": 37.5, "learning_rate": 9.934346995087868e-05, "loss": 0.0032, "step": 4388 }, { "epoch": 37.54, "learning_rate": 9.933969970711783e-05, "loss": 0.0028, "step": 4392 }, { "epoch": 37.57, "learning_rate": 9.933591874062806e-05, "loss": 0.004, "step": 4396 }, { "epoch": 37.61, "learning_rate": 9.933212705223107e-05, "loss": 0.0041, "step": 4400 }, { "epoch": 37.64, "learning_rate": 9.932832464275088e-05, "loss": 0.004, "step": 4404 }, { "epoch": 37.68, "learning_rate": 9.932451151301386e-05, "loss": 0.0051, "step": 4408 }, { "epoch": 37.71, "learning_rate": 9.932068766384868e-05, "loss": 0.0033, "step": 4412 }, { "epoch": 37.74, "learning_rate": 9.931685309608636e-05, "loss": 0.003, "step": 4416 }, { "epoch": 37.78, "learning_rate": 9.931300781056027e-05, "loss": 0.0048, "step": 4420 }, { "epoch": 37.81, "learning_rate": 9.930915180810605e-05, "loss": 0.0047, "step": 4424 }, { "epoch": 37.85, "learning_rate": 9.930528508956172e-05, "loss": 0.0037, "step": 4428 }, { "epoch": 37.88, "learning_rate": 9.930140765576763e-05, "loss": 0.0055, "step": 4432 }, { "epoch": 37.91, "learning_rate": 9.929751950756641e-05, "loss": 0.0041, "step": 4436 }, { "epoch": 37.95, "learning_rate": 9.929362064580307e-05, "loss": 0.004, "step": 4440 }, { "epoch": 37.98, "learning_rate": 9.928971107132493e-05, "loss": 0.0047, "step": 4444 }, { "epoch": 38.02, "learning_rate": 9.928579078498161e-05, "loss": 0.0033, "step": 4448 }, { "epoch": 38.05, "learning_rate": 9.928185978762512e-05, "loss": 0.003, "step": 4452 }, { "epoch": 38.09, "learning_rate": 9.927791808010976e-05, "loss": 0.004, "step": 4456 }, { "epoch": 38.12, "learning_rate": 9.927396566329212e-05, "loss": 0.0041, "step": 4460 }, { "epoch": 38.15, "learning_rate": 9.92700025380312e-05, "loss": 0.0055, "step": 4464 }, { "epoch": 38.19, "learning_rate": 9.926602870518826e-05, "loss": 0.0038, "step": 4468 }, { "epoch": 38.22, "learning_rate": 9.926204416562692e-05, "loss": 0.005, "step": 4472 }, { "epoch": 38.26, "learning_rate": 9.925804892021313e-05, "loss": 0.0057, "step": 4476 }, { "epoch": 38.29, "learning_rate": 9.925404296981513e-05, "loss": 0.0034, "step": 4480 }, { "epoch": 38.32, "learning_rate": 9.925002631530353e-05, "loss": 0.0039, "step": 4484 }, { "epoch": 38.36, "learning_rate": 9.924599895755126e-05, "loss": 0.0037, "step": 4488 }, { "epoch": 38.39, "learning_rate": 9.924196089743352e-05, "loss": 0.0038, "step": 4492 }, { "epoch": 38.43, "learning_rate": 9.923791213582791e-05, "loss": 0.0027, "step": 4496 }, { "epoch": 38.46, "learning_rate": 9.923385267361434e-05, "loss": 0.0042, "step": 4500 }, { "epoch": 38.5, "learning_rate": 9.9229782511675e-05, "loss": 0.0054, "step": 4504 }, { "epoch": 38.53, "learning_rate": 9.922570165089445e-05, "loss": 0.0038, "step": 4508 }, { "epoch": 38.56, "learning_rate": 9.922161009215956e-05, "loss": 0.004, "step": 4512 }, { "epoch": 38.6, "learning_rate": 9.921750783635952e-05, "loss": 0.003, "step": 4516 }, { "epoch": 38.63, "learning_rate": 9.921339488438585e-05, "loss": 0.0033, "step": 4520 }, { "epoch": 38.67, "learning_rate": 9.920927123713242e-05, "loss": 0.0029, "step": 4524 }, { "epoch": 38.7, "learning_rate": 9.920513689549537e-05, "loss": 0.0035, "step": 4528 }, { "epoch": 38.74, "learning_rate": 9.920099186037321e-05, "loss": 0.0051, "step": 4532 }, { "epoch": 38.77, "learning_rate": 9.919683613266677e-05, "loss": 0.0037, "step": 4536 }, { "epoch": 38.8, "learning_rate": 9.919266971327916e-05, "loss": 0.0032, "step": 4540 }, { "epoch": 38.84, "learning_rate": 9.918849260311586e-05, "loss": 0.0044, "step": 4544 }, { "epoch": 38.87, "learning_rate": 9.918430480308466e-05, "loss": 0.0022, "step": 4548 }, { "epoch": 38.91, "learning_rate": 9.918010631409568e-05, "loss": 0.0035, "step": 4552 }, { "epoch": 38.94, "learning_rate": 9.917589713706135e-05, "loss": 0.0048, "step": 4556 }, { "epoch": 38.97, "learning_rate": 9.91716772728964e-05, "loss": 0.0029, "step": 4560 }, { "epoch": 39.01, "learning_rate": 9.916744672251795e-05, "loss": 0.0055, "step": 4564 }, { "epoch": 39.04, "learning_rate": 9.91632054868454e-05, "loss": 0.0036, "step": 4568 }, { "epoch": 39.08, "learning_rate": 9.915895356680043e-05, "loss": 0.0035, "step": 4572 }, { "epoch": 39.11, "learning_rate": 9.915469096330714e-05, "loss": 0.0036, "step": 4576 }, { "epoch": 39.15, "learning_rate": 9.915041767729188e-05, "loss": 0.002, "step": 4580 }, { "epoch": 39.18, "learning_rate": 9.914613370968333e-05, "loss": 0.0031, "step": 4584 }, { "epoch": 39.21, "learning_rate": 9.914183906141252e-05, "loss": 0.0043, "step": 4588 }, { "epoch": 39.25, "learning_rate": 9.913753373341274e-05, "loss": 0.0044, "step": 4592 }, { "epoch": 39.28, "learning_rate": 9.91332177266197e-05, "loss": 0.0039, "step": 4596 }, { "epoch": 39.32, "learning_rate": 9.912889104197134e-05, "loss": 0.004, "step": 4600 }, { "epoch": 39.35, "learning_rate": 9.912455368040797e-05, "loss": 0.0034, "step": 4604 }, { "epoch": 39.38, "learning_rate": 9.912020564287218e-05, "loss": 0.0032, "step": 4608 }, { "epoch": 39.42, "learning_rate": 9.911584693030895e-05, "loss": 0.004, "step": 4612 }, { "epoch": 39.45, "learning_rate": 9.91114775436655e-05, "loss": 0.0057, "step": 4616 }, { "epoch": 39.49, "learning_rate": 9.910709748389141e-05, "loss": 0.0039, "step": 4620 }, { "epoch": 39.52, "learning_rate": 9.910270675193858e-05, "loss": 0.0025, "step": 4624 }, { "epoch": 39.56, "learning_rate": 9.909830534876123e-05, "loss": 0.0036, "step": 4628 }, { "epoch": 39.59, "learning_rate": 9.909389327531588e-05, "loss": 0.0035, "step": 4632 }, { "epoch": 39.62, "learning_rate": 9.908947053256139e-05, "loss": 0.0049, "step": 4636 }, { "epoch": 39.66, "learning_rate": 9.908503712145892e-05, "loss": 0.0037, "step": 4640 }, { "epoch": 39.69, "learning_rate": 9.908059304297198e-05, "loss": 0.0029, "step": 4644 }, { "epoch": 39.73, "learning_rate": 9.907613829806637e-05, "loss": 0.0036, "step": 4648 }, { "epoch": 39.76, "learning_rate": 9.907167288771019e-05, "loss": 0.0044, "step": 4652 }, { "epoch": 39.79, "learning_rate": 9.90671968128739e-05, "loss": 0.0043, "step": 4656 }, { "epoch": 39.83, "learning_rate": 9.90627100745303e-05, "loss": 0.0073, "step": 4660 }, { "epoch": 39.86, "learning_rate": 9.90582126736544e-05, "loss": 0.003, "step": 4664 }, { "epoch": 39.9, "learning_rate": 9.905370461122366e-05, "loss": 0.0029, "step": 4668 }, { "epoch": 39.93, "learning_rate": 9.904918588821775e-05, "loss": 0.0045, "step": 4672 }, { "epoch": 39.97, "learning_rate": 9.904465650561869e-05, "loss": 0.0047, "step": 4676 }, { "epoch": 40.0, "learning_rate": 9.904011646441087e-05, "loss": 0.0046, "step": 4680 }, { "epoch": 40.03, "learning_rate": 9.903556576558093e-05, "loss": 0.0058, "step": 4684 }, { "epoch": 40.07, "learning_rate": 9.903100441011783e-05, "loss": 0.0035, "step": 4688 }, { "epoch": 40.1, "learning_rate": 9.90264323990129e-05, "loss": 0.0069, "step": 4692 }, { "epoch": 40.14, "learning_rate": 9.902184973325975e-05, "loss": 0.0032, "step": 4696 }, { "epoch": 40.17, "learning_rate": 9.901725641385425e-05, "loss": 0.0029, "step": 4700 }, { "epoch": 40.21, "learning_rate": 9.90126524417947e-05, "loss": 0.0033, "step": 4704 }, { "epoch": 40.24, "learning_rate": 9.900803781808164e-05, "loss": 0.0023, "step": 4708 }, { "epoch": 40.27, "learning_rate": 9.900341254371794e-05, "loss": 0.0029, "step": 4712 }, { "epoch": 40.31, "learning_rate": 9.899877661970877e-05, "loss": 0.0033, "step": 4716 }, { "epoch": 40.34, "learning_rate": 9.899413004706166e-05, "loss": 0.0025, "step": 4720 }, { "epoch": 40.38, "learning_rate": 9.898947282678642e-05, "loss": 0.0042, "step": 4724 }, { "epoch": 40.41, "learning_rate": 9.898480495989514e-05, "loss": 0.0042, "step": 4728 }, { "epoch": 40.44, "learning_rate": 9.898012644740229e-05, "loss": 0.0021, "step": 4732 }, { "epoch": 40.48, "learning_rate": 9.897543729032463e-05, "loss": 0.0033, "step": 4736 }, { "epoch": 40.51, "learning_rate": 9.897073748968124e-05, "loss": 0.005, "step": 4740 }, { "epoch": 40.55, "learning_rate": 9.896602704649346e-05, "loss": 0.0048, "step": 4744 }, { "epoch": 40.58, "learning_rate": 9.896130596178503e-05, "loss": 0.0023, "step": 4748 }, { "epoch": 40.62, "learning_rate": 9.895657423658193e-05, "loss": 0.0027, "step": 4752 }, { "epoch": 40.65, "learning_rate": 9.895183187191251e-05, "loss": 0.0038, "step": 4756 }, { "epoch": 40.68, "learning_rate": 9.894707886880735e-05, "loss": 0.0034, "step": 4760 }, { "epoch": 40.72, "learning_rate": 9.894231522829944e-05, "loss": 0.0034, "step": 4764 }, { "epoch": 40.75, "learning_rate": 9.893754095142403e-05, "loss": 0.0039, "step": 4768 }, { "epoch": 40.79, "learning_rate": 9.893275603921865e-05, "loss": 0.0024, "step": 4772 }, { "epoch": 40.82, "learning_rate": 9.892796049272324e-05, "loss": 0.002, "step": 4776 }, { "epoch": 40.85, "learning_rate": 9.892315431297994e-05, "loss": 0.0036, "step": 4780 }, { "epoch": 40.89, "learning_rate": 9.891833750103328e-05, "loss": 0.0037, "step": 4784 }, { "epoch": 40.92, "learning_rate": 9.891351005793005e-05, "loss": 0.004, "step": 4788 }, { "epoch": 40.96, "learning_rate": 9.890867198471938e-05, "loss": 0.0036, "step": 4792 }, { "epoch": 40.99, "learning_rate": 9.890382328245271e-05, "loss": 0.0057, "step": 4796 }, { "epoch": 41.03, "learning_rate": 9.889896395218378e-05, "loss": 0.0028, "step": 4800 }, { "epoch": 41.06, "learning_rate": 9.889409399496865e-05, "loss": 0.0018, "step": 4804 }, { "epoch": 41.09, "learning_rate": 9.888921341186566e-05, "loss": 0.0027, "step": 4808 }, { "epoch": 41.13, "learning_rate": 9.888432220393549e-05, "loss": 0.0047, "step": 4812 }, { "epoch": 41.16, "learning_rate": 9.887942037224111e-05, "loss": 0.0037, "step": 4816 }, { "epoch": 41.2, "learning_rate": 9.887450791784782e-05, "loss": 0.0034, "step": 4820 }, { "epoch": 41.23, "learning_rate": 9.886958484182324e-05, "loss": 0.0037, "step": 4824 }, { "epoch": 41.26, "learning_rate": 9.886465114523724e-05, "loss": 0.0048, "step": 4828 }, { "epoch": 41.3, "learning_rate": 9.885970682916204e-05, "loss": 0.0028, "step": 4832 }, { "epoch": 41.33, "learning_rate": 9.885475189467217e-05, "loss": 0.0036, "step": 4836 }, { "epoch": 41.37, "learning_rate": 9.884978634284445e-05, "loss": 0.0055, "step": 4840 }, { "epoch": 41.4, "learning_rate": 9.884481017475801e-05, "loss": 0.0049, "step": 4844 }, { "epoch": 41.44, "learning_rate": 9.883982339149433e-05, "loss": 0.0031, "step": 4848 }, { "epoch": 41.47, "learning_rate": 9.883482599413712e-05, "loss": 0.0038, "step": 4852 }, { "epoch": 41.5, "learning_rate": 9.882981798377247e-05, "loss": 0.0017, "step": 4856 }, { "epoch": 41.54, "learning_rate": 9.882479936148872e-05, "loss": 0.004, "step": 4860 }, { "epoch": 41.57, "learning_rate": 9.881977012837653e-05, "loss": 0.0024, "step": 4864 }, { "epoch": 41.61, "learning_rate": 9.88147302855289e-05, "loss": 0.003, "step": 4868 }, { "epoch": 41.64, "learning_rate": 9.880967983404111e-05, "loss": 0.0016, "step": 4872 }, { "epoch": 41.68, "learning_rate": 9.880461877501075e-05, "loss": 0.0047, "step": 4876 }, { "epoch": 41.71, "learning_rate": 9.879954710953769e-05, "loss": 0.0038, "step": 4880 }, { "epoch": 41.74, "learning_rate": 9.879446483872414e-05, "loss": 0.0022, "step": 4884 }, { "epoch": 41.78, "learning_rate": 9.878937196367462e-05, "loss": 0.004, "step": 4888 }, { "epoch": 41.81, "learning_rate": 9.878426848549589e-05, "loss": 0.0031, "step": 4892 }, { "epoch": 41.85, "learning_rate": 9.877915440529711e-05, "loss": 0.0027, "step": 4896 }, { "epoch": 41.88, "learning_rate": 9.877402972418968e-05, "loss": 0.003, "step": 4900 }, { "epoch": 41.91, "learning_rate": 9.876889444328731e-05, "loss": 0.0024, "step": 4904 }, { "epoch": 41.95, "learning_rate": 9.876374856370603e-05, "loss": 0.0033, "step": 4908 }, { "epoch": 41.98, "learning_rate": 9.875859208656418e-05, "loss": 0.003, "step": 4912 }, { "epoch": 42.02, "learning_rate": 9.875342501298235e-05, "loss": 0.003, "step": 4916 }, { "epoch": 42.05, "learning_rate": 9.874824734408351e-05, "loss": 0.0038, "step": 4920 }, { "epoch": 42.09, "learning_rate": 9.874305908099288e-05, "loss": 0.0034, "step": 4924 }, { "epoch": 42.12, "learning_rate": 9.8737860224838e-05, "loss": 0.0038, "step": 4928 }, { "epoch": 42.15, "learning_rate": 9.87326507767487e-05, "loss": 0.0023, "step": 4932 }, { "epoch": 42.19, "learning_rate": 9.872743073785713e-05, "loss": 0.0036, "step": 4936 }, { "epoch": 42.22, "learning_rate": 9.872220010929772e-05, "loss": 0.0052, "step": 4940 }, { "epoch": 42.26, "learning_rate": 9.871695889220725e-05, "loss": 0.0019, "step": 4944 }, { "epoch": 42.29, "learning_rate": 9.871170708772471e-05, "loss": 0.0019, "step": 4948 }, { "epoch": 42.32, "learning_rate": 9.87064446969915e-05, "loss": 0.0055, "step": 4952 }, { "epoch": 42.36, "learning_rate": 9.870117172115123e-05, "loss": 0.0031, "step": 4956 }, { "epoch": 42.39, "learning_rate": 9.869588816134987e-05, "loss": 0.0028, "step": 4960 }, { "epoch": 42.43, "learning_rate": 9.869059401873564e-05, "loss": 0.0028, "step": 4964 }, { "epoch": 42.46, "learning_rate": 9.868528929445912e-05, "loss": 0.0019, "step": 4968 }, { "epoch": 42.5, "learning_rate": 9.867997398967313e-05, "loss": 0.002, "step": 4972 }, { "epoch": 42.53, "learning_rate": 9.867464810553283e-05, "loss": 0.0035, "step": 4976 }, { "epoch": 42.56, "learning_rate": 9.866931164319568e-05, "loss": 0.0044, "step": 4980 }, { "epoch": 42.6, "learning_rate": 9.866396460382139e-05, "loss": 0.0069, "step": 4984 }, { "epoch": 42.63, "learning_rate": 9.865860698857204e-05, "loss": 0.0025, "step": 4988 }, { "epoch": 42.67, "learning_rate": 9.865323879861194e-05, "loss": 0.0032, "step": 4992 }, { "epoch": 42.7, "learning_rate": 9.864786003510773e-05, "loss": 0.0047, "step": 4996 }, { "epoch": 42.74, "learning_rate": 9.864247069922838e-05, "loss": 0.0027, "step": 5000 }, { "epoch": 42.74, "eval_exact_match": 0.4875259875259875, "eval_loss": 0.7059731483459473, "eval_runtime": 86.6231, "eval_samples_per_second": 11.106, "step": 5000 }, { "epoch": 21.38, "learning_rate": 8.352528793189786e-05, "loss": 0.0021, "step": 5004 }, { "epoch": 21.4, "learning_rate": 8.359205474878986e-05, "loss": 0.002, "step": 5008 }, { "epoch": 21.42, "learning_rate": 8.365882156568185e-05, "loss": 0.0031, "step": 5012 }, { "epoch": 21.44, "learning_rate": 8.372558838257387e-05, "loss": 0.0031, "step": 5016 }, { "epoch": 21.45, "learning_rate": 8.379235519946587e-05, "loss": 0.0036, "step": 5020 }, { "epoch": 21.47, "learning_rate": 8.385912201635787e-05, "loss": 0.0018, "step": 5024 }, { "epoch": 21.49, "learning_rate": 8.392588883324987e-05, "loss": 0.0016, "step": 5028 }, { "epoch": 21.5, "learning_rate": 8.399265565014188e-05, "loss": 0.0026, "step": 5032 }, { "epoch": 21.52, "learning_rate": 8.405942246703389e-05, "loss": 0.0018, "step": 5036 }, { "epoch": 21.54, "learning_rate": 8.41261892839259e-05, "loss": 0.0022, "step": 5040 }, { "epoch": 21.56, "learning_rate": 8.41929561008179e-05, "loss": 0.0008, "step": 5044 }, { "epoch": 21.57, "learning_rate": 8.42597229177099e-05, "loss": 0.0035, "step": 5048 }, { "epoch": 21.59, "learning_rate": 8.43264897346019e-05, "loss": 0.0017, "step": 5052 }, { "epoch": 21.61, "learning_rate": 8.439325655149392e-05, "loss": 0.004, "step": 5056 }, { "epoch": 21.62, "learning_rate": 8.446002336838592e-05, "loss": 0.0023, "step": 5060 }, { "epoch": 21.64, "learning_rate": 8.452679018527791e-05, "loss": 0.0032, "step": 5064 }, { "epoch": 21.66, "learning_rate": 8.459355700216993e-05, "loss": 0.003, "step": 5068 }, { "epoch": 21.68, "learning_rate": 8.466032381906193e-05, "loss": 0.0038, "step": 5072 }, { "epoch": 21.69, "learning_rate": 8.472709063595393e-05, "loss": 0.0053, "step": 5076 }, { "epoch": 21.71, "learning_rate": 8.479385745284594e-05, "loss": 0.0018, "step": 5080 }, { "epoch": 21.73, "learning_rate": 8.486062426973794e-05, "loss": 0.0018, "step": 5084 }, { "epoch": 21.74, "learning_rate": 8.492739108662995e-05, "loss": 0.0059, "step": 5088 }, { "epoch": 21.76, "learning_rate": 8.499415790352196e-05, "loss": 0.002, "step": 5092 }, { "epoch": 21.78, "learning_rate": 8.506092472041396e-05, "loss": 0.0042, "step": 5096 }, { "epoch": 21.79, "learning_rate": 8.512769153730596e-05, "loss": 0.0048, "step": 5100 }, { "epoch": 21.81, "learning_rate": 8.519445835419796e-05, "loss": 0.002, "step": 5104 }, { "epoch": 21.83, "learning_rate": 8.526122517108998e-05, "loss": 0.0034, "step": 5108 }, { "epoch": 21.85, "learning_rate": 8.532799198798198e-05, "loss": 0.0044, "step": 5112 }, { "epoch": 21.86, "learning_rate": 8.539475880487399e-05, "loss": 0.0045, "step": 5116 }, { "epoch": 21.88, "learning_rate": 8.546152562176597e-05, "loss": 0.002, "step": 5120 }, { "epoch": 21.9, "learning_rate": 8.552829243865799e-05, "loss": 0.0023, "step": 5124 }, { "epoch": 21.91, "learning_rate": 8.559505925555e-05, "loss": 0.0027, "step": 5128 }, { "epoch": 21.93, "learning_rate": 8.5661826072442e-05, "loss": 0.0055, "step": 5132 }, { "epoch": 21.95, "learning_rate": 8.5728592889334e-05, "loss": 0.0035, "step": 5136 }, { "epoch": 21.97, "learning_rate": 8.579535970622602e-05, "loss": 0.0038, "step": 5140 }, { "epoch": 21.98, "learning_rate": 8.586212652311802e-05, "loss": 0.0031, "step": 5144 }, { "epoch": 22.0, "learning_rate": 8.592889334001002e-05, "loss": 0.0043, "step": 5148 }, { "epoch": 22.02, "learning_rate": 8.599566015690202e-05, "loss": 0.0034, "step": 5152 }, { "epoch": 22.03, "learning_rate": 8.606242697379403e-05, "loss": 0.0042, "step": 5156 }, { "epoch": 22.05, "learning_rate": 8.612919379068604e-05, "loss": 0.0022, "step": 5160 }, { "epoch": 22.07, "learning_rate": 8.619596060757804e-05, "loss": 0.0039, "step": 5164 }, { "epoch": 22.09, "learning_rate": 8.626272742447005e-05, "loss": 0.0026, "step": 5168 }, { "epoch": 22.1, "learning_rate": 8.632949424136205e-05, "loss": 0.0022, "step": 5172 }, { "epoch": 22.12, "learning_rate": 8.639626105825405e-05, "loss": 0.0055, "step": 5176 }, { "epoch": 22.14, "learning_rate": 8.646302787514605e-05, "loss": 0.0034, "step": 5180 }, { "epoch": 22.15, "learning_rate": 8.652979469203806e-05, "loss": 0.0043, "step": 5184 }, { "epoch": 22.17, "learning_rate": 8.659656150893006e-05, "loss": 0.0033, "step": 5188 }, { "epoch": 22.19, "learning_rate": 8.666332832582206e-05, "loss": 0.0072, "step": 5192 }, { "epoch": 22.21, "learning_rate": 8.673009514271408e-05, "loss": 0.0042, "step": 5196 }, { "epoch": 22.22, "learning_rate": 8.679686195960608e-05, "loss": 0.0062, "step": 5200 }, { "epoch": 22.24, "learning_rate": 8.686362877649808e-05, "loss": 0.0025, "step": 5204 }, { "epoch": 22.26, "learning_rate": 8.693039559339009e-05, "loss": 0.0044, "step": 5208 }, { "epoch": 22.27, "learning_rate": 8.699716241028209e-05, "loss": 0.0037, "step": 5212 }, { "epoch": 22.29, "learning_rate": 8.70639292271741e-05, "loss": 0.002, "step": 5216 }, { "epoch": 22.31, "learning_rate": 8.713069604406611e-05, "loss": 0.0024, "step": 5220 }, { "epoch": 22.32, "learning_rate": 8.719746286095811e-05, "loss": 0.0018, "step": 5224 }, { "epoch": 22.34, "learning_rate": 8.726422967785011e-05, "loss": 0.0025, "step": 5228 }, { "epoch": 22.36, "learning_rate": 8.733099649474212e-05, "loss": 0.0051, "step": 5232 }, { "epoch": 22.38, "learning_rate": 8.739776331163412e-05, "loss": 0.0027, "step": 5236 }, { "epoch": 22.39, "learning_rate": 8.746453012852612e-05, "loss": 0.0043, "step": 5240 }, { "epoch": 22.41, "learning_rate": 8.753129694541812e-05, "loss": 0.0048, "step": 5244 }, { "epoch": 22.43, "learning_rate": 8.759806376231014e-05, "loss": 0.0014, "step": 5248 }, { "epoch": 22.44, "learning_rate": 8.766483057920214e-05, "loss": 0.0019, "step": 5252 }, { "epoch": 22.46, "learning_rate": 8.773159739609415e-05, "loss": 0.0038, "step": 5256 }, { "epoch": 22.48, "learning_rate": 8.779836421298615e-05, "loss": 0.0026, "step": 5260 }, { "epoch": 22.5, "learning_rate": 8.786513102987815e-05, "loss": 0.0027, "step": 5264 }, { "epoch": 22.51, "learning_rate": 8.793189784677017e-05, "loss": 0.0042, "step": 5268 }, { "epoch": 22.53, "learning_rate": 8.799866466366217e-05, "loss": 0.0026, "step": 5272 }, { "epoch": 22.55, "learning_rate": 8.806543148055417e-05, "loss": 0.0037, "step": 5276 }, { "epoch": 22.56, "learning_rate": 8.813219829744617e-05, "loss": 0.0058, "step": 5280 }, { "epoch": 22.58, "learning_rate": 8.819896511433818e-05, "loss": 0.004, "step": 5284 }, { "epoch": 22.6, "learning_rate": 8.826573193123018e-05, "loss": 0.0026, "step": 5288 }, { "epoch": 22.62, "learning_rate": 8.833249874812218e-05, "loss": 0.0037, "step": 5292 }, { "epoch": 22.63, "learning_rate": 8.839926556501418e-05, "loss": 0.0024, "step": 5296 }, { "epoch": 22.65, "learning_rate": 8.84660323819062e-05, "loss": 0.0026, "step": 5300 }, { "epoch": 22.67, "learning_rate": 8.85327991987982e-05, "loss": 0.0032, "step": 5304 }, { "epoch": 22.68, "learning_rate": 8.85995660156902e-05, "loss": 0.0031, "step": 5308 }, { "epoch": 22.7, "learning_rate": 8.866633283258221e-05, "loss": 0.0068, "step": 5312 }, { "epoch": 22.72, "learning_rate": 8.873309964947421e-05, "loss": 0.0037, "step": 5316 }, { "epoch": 22.74, "learning_rate": 8.879986646636623e-05, "loss": 0.0068, "step": 5320 }, { "epoch": 22.75, "learning_rate": 8.886663328325823e-05, "loss": 0.0025, "step": 5324 }, { "epoch": 22.77, "learning_rate": 8.893340010015023e-05, "loss": 0.0056, "step": 5328 }, { "epoch": 22.79, "learning_rate": 8.900016691704224e-05, "loss": 0.0053, "step": 5332 }, { "epoch": 22.8, "learning_rate": 8.906693373393424e-05, "loss": 0.0053, "step": 5336 }, { "epoch": 22.82, "learning_rate": 8.913370055082625e-05, "loss": 0.003, "step": 5340 }, { "epoch": 22.84, "learning_rate": 8.920046736771824e-05, "loss": 0.0052, "step": 5344 }, { "epoch": 22.85, "learning_rate": 8.926723418461025e-05, "loss": 0.0072, "step": 5348 }, { "epoch": 22.87, "learning_rate": 8.933400100150225e-05, "loss": 0.0034, "step": 5352 }, { "epoch": 22.89, "learning_rate": 8.940076781839426e-05, "loss": 0.0027, "step": 5356 }, { "epoch": 22.91, "learning_rate": 8.946753463528627e-05, "loss": 0.0032, "step": 5360 }, { "epoch": 22.92, "learning_rate": 8.953430145217827e-05, "loss": 0.0038, "step": 5364 }, { "epoch": 22.94, "learning_rate": 8.960106826907027e-05, "loss": 0.0036, "step": 5368 }, { "epoch": 22.96, "learning_rate": 8.966783508596228e-05, "loss": 0.003, "step": 5372 }, { "epoch": 22.97, "learning_rate": 8.973460190285429e-05, "loss": 0.0026, "step": 5376 }, { "epoch": 22.99, "learning_rate": 8.98013687197463e-05, "loss": 0.0054, "step": 5380 }, { "epoch": 23.01, "learning_rate": 8.98681355366383e-05, "loss": 0.0027, "step": 5384 }, { "epoch": 23.03, "learning_rate": 8.99349023535303e-05, "loss": 0.0022, "step": 5388 }, { "epoch": 23.04, "learning_rate": 9.000166917042232e-05, "loss": 0.0048, "step": 5392 }, { "epoch": 23.06, "learning_rate": 9.006843598731432e-05, "loss": 0.0034, "step": 5396 }, { "epoch": 23.08, "learning_rate": 9.013520280420631e-05, "loss": 0.0033, "step": 5400 }, { "epoch": 23.09, "learning_rate": 9.020196962109831e-05, "loss": 0.0023, "step": 5404 }, { "epoch": 23.11, "learning_rate": 9.026873643799033e-05, "loss": 0.0026, "step": 5408 }, { "epoch": 23.13, "learning_rate": 9.033550325488233e-05, "loss": 0.002, "step": 5412 }, { "epoch": 23.15, "learning_rate": 9.040227007177433e-05, "loss": 0.0022, "step": 5416 }, { "epoch": 23.16, "learning_rate": 9.046903688866633e-05, "loss": 0.0075, "step": 5420 }, { "epoch": 23.18, "learning_rate": 9.053580370555834e-05, "loss": 0.0057, "step": 5424 }, { "epoch": 23.2, "learning_rate": 9.060257052245035e-05, "loss": 0.0016, "step": 5428 }, { "epoch": 23.21, "learning_rate": 9.066933733934235e-05, "loss": 0.0063, "step": 5432 }, { "epoch": 23.23, "learning_rate": 9.073610415623436e-05, "loss": 0.003, "step": 5436 }, { "epoch": 23.25, "learning_rate": 9.080287097312636e-05, "loss": 0.004, "step": 5440 }, { "epoch": 23.26, "learning_rate": 9.086963779001836e-05, "loss": 0.0035, "step": 5444 }, { "epoch": 23.28, "learning_rate": 9.093640460691038e-05, "loss": 0.0033, "step": 5448 }, { "epoch": 23.3, "learning_rate": 9.100317142380237e-05, "loss": 0.0062, "step": 5452 }, { "epoch": 23.32, "learning_rate": 9.106993824069437e-05, "loss": 0.0029, "step": 5456 }, { "epoch": 23.33, "learning_rate": 9.113670505758639e-05, "loss": 0.0055, "step": 5460 }, { "epoch": 23.35, "learning_rate": 9.120347187447839e-05, "loss": 0.0037, "step": 5464 }, { "epoch": 23.37, "learning_rate": 9.127023869137039e-05, "loss": 0.0021, "step": 5468 }, { "epoch": 23.38, "learning_rate": 9.13370055082624e-05, "loss": 0.0035, "step": 5472 }, { "epoch": 23.4, "learning_rate": 9.14037723251544e-05, "loss": 0.0022, "step": 5476 }, { "epoch": 23.42, "learning_rate": 9.147053914204641e-05, "loss": 0.0019, "step": 5480 }, { "epoch": 23.44, "learning_rate": 9.153730595893842e-05, "loss": 0.004, "step": 5484 }, { "epoch": 23.45, "learning_rate": 9.160407277583042e-05, "loss": 0.0026, "step": 5488 }, { "epoch": 23.47, "learning_rate": 9.167083959272242e-05, "loss": 0.0039, "step": 5492 }, { "epoch": 23.49, "learning_rate": 9.173760640961442e-05, "loss": 0.0048, "step": 5496 }, { "epoch": 23.5, "learning_rate": 9.180437322650644e-05, "loss": 0.0026, "step": 5500 }, { "epoch": 23.52, "learning_rate": 9.187114004339844e-05, "loss": 0.005, "step": 5504 }, { "epoch": 23.54, "learning_rate": 9.193790686029043e-05, "loss": 0.0035, "step": 5508 }, { "epoch": 23.56, "learning_rate": 9.200467367718243e-05, "loss": 0.0015, "step": 5512 }, { "epoch": 23.57, "learning_rate": 9.207144049407445e-05, "loss": 0.005, "step": 5516 }, { "epoch": 23.59, "learning_rate": 9.213820731096645e-05, "loss": 0.0016, "step": 5520 }, { "epoch": 23.61, "learning_rate": 9.220497412785846e-05, "loss": 0.0032, "step": 5524 }, { "epoch": 23.62, "learning_rate": 9.227174094475046e-05, "loss": 0.0027, "step": 5528 }, { "epoch": 23.64, "learning_rate": 9.233850776164246e-05, "loss": 0.0033, "step": 5532 }, { "epoch": 23.66, "learning_rate": 9.240527457853448e-05, "loss": 0.0021, "step": 5536 }, { "epoch": 23.68, "learning_rate": 9.247204139542648e-05, "loss": 0.0024, "step": 5540 }, { "epoch": 23.69, "learning_rate": 9.253880821231848e-05, "loss": 0.0036, "step": 5544 }, { "epoch": 23.71, "learning_rate": 9.260557502921048e-05, "loss": 0.0051, "step": 5548 }, { "epoch": 23.73, "learning_rate": 9.26723418461025e-05, "loss": 0.0022, "step": 5552 }, { "epoch": 23.74, "learning_rate": 9.27391086629945e-05, "loss": 0.0042, "step": 5556 }, { "epoch": 23.76, "learning_rate": 9.28058754798865e-05, "loss": 0.003, "step": 5560 }, { "epoch": 23.78, "learning_rate": 9.28726422967785e-05, "loss": 0.0024, "step": 5564 }, { "epoch": 23.79, "learning_rate": 9.293940911367051e-05, "loss": 0.0043, "step": 5568 }, { "epoch": 23.81, "learning_rate": 9.300617593056251e-05, "loss": 0.0047, "step": 5572 }, { "epoch": 23.83, "learning_rate": 9.307294274745452e-05, "loss": 0.0029, "step": 5576 }, { "epoch": 23.85, "learning_rate": 9.313970956434652e-05, "loss": 0.0028, "step": 5580 }, { "epoch": 23.86, "learning_rate": 9.320647638123852e-05, "loss": 0.0057, "step": 5584 }, { "epoch": 23.88, "learning_rate": 9.327324319813054e-05, "loss": 0.0042, "step": 5588 }, { "epoch": 23.9, "learning_rate": 9.334001001502254e-05, "loss": 0.0024, "step": 5592 }, { "epoch": 23.91, "learning_rate": 9.340677683191454e-05, "loss": 0.0037, "step": 5596 }, { "epoch": 23.93, "learning_rate": 9.347354364880655e-05, "loss": 0.005, "step": 5600 }, { "epoch": 23.95, "learning_rate": 9.354031046569855e-05, "loss": 0.006, "step": 5604 }, { "epoch": 23.97, "learning_rate": 9.360707728259056e-05, "loss": 0.0077, "step": 5608 }, { "epoch": 23.98, "learning_rate": 9.367384409948257e-05, "loss": 0.0032, "step": 5612 }, { "epoch": 24.0, "learning_rate": 9.374061091637457e-05, "loss": 0.0043, "step": 5616 }, { "epoch": 24.02, "learning_rate": 9.380737773326657e-05, "loss": 0.0035, "step": 5620 }, { "epoch": 24.03, "learning_rate": 9.387414455015858e-05, "loss": 0.0032, "step": 5624 }, { "epoch": 24.05, "learning_rate": 9.394091136705058e-05, "loss": 0.0015, "step": 5628 }, { "epoch": 24.07, "learning_rate": 9.400767818394258e-05, "loss": 0.0036, "step": 5632 }, { "epoch": 24.09, "learning_rate": 9.407444500083458e-05, "loss": 0.0022, "step": 5636 }, { "epoch": 24.1, "learning_rate": 9.41412118177266e-05, "loss": 0.0055, "step": 5640 }, { "epoch": 24.12, "learning_rate": 9.42079786346186e-05, "loss": 0.0033, "step": 5644 }, { "epoch": 24.14, "learning_rate": 9.42747454515106e-05, "loss": 0.0061, "step": 5648 }, { "epoch": 24.15, "learning_rate": 9.434151226840261e-05, "loss": 0.0037, "step": 5652 }, { "epoch": 24.17, "learning_rate": 9.440827908529461e-05, "loss": 0.004, "step": 5656 }, { "epoch": 24.19, "learning_rate": 9.447504590218663e-05, "loss": 0.0035, "step": 5660 }, { "epoch": 24.21, "learning_rate": 9.454181271907863e-05, "loss": 0.005, "step": 5664 }, { "epoch": 24.22, "learning_rate": 9.460857953597063e-05, "loss": 0.002, "step": 5668 }, { "epoch": 24.24, "learning_rate": 9.467534635286263e-05, "loss": 0.0067, "step": 5672 }, { "epoch": 24.26, "learning_rate": 9.474211316975464e-05, "loss": 0.005, "step": 5676 }, { "epoch": 24.27, "learning_rate": 9.480887998664664e-05, "loss": 0.0032, "step": 5680 }, { "epoch": 24.29, "learning_rate": 9.487564680353864e-05, "loss": 0.0039, "step": 5684 }, { "epoch": 24.31, "learning_rate": 9.494241362043064e-05, "loss": 0.0032, "step": 5688 }, { "epoch": 24.32, "learning_rate": 9.500918043732265e-05, "loss": 0.0042, "step": 5692 }, { "epoch": 24.34, "learning_rate": 9.507594725421466e-05, "loss": 0.0044, "step": 5696 }, { "epoch": 24.36, "learning_rate": 9.514271407110667e-05, "loss": 0.0024, "step": 5700 }, { "epoch": 24.38, "learning_rate": 9.520948088799867e-05, "loss": 0.005, "step": 5704 }, { "epoch": 24.39, "learning_rate": 9.527624770489067e-05, "loss": 0.0025, "step": 5708 }, { "epoch": 24.41, "learning_rate": 9.534301452178269e-05, "loss": 0.0047, "step": 5712 }, { "epoch": 24.43, "learning_rate": 9.540978133867469e-05, "loss": 0.003, "step": 5716 }, { "epoch": 24.44, "learning_rate": 9.547654815556669e-05, "loss": 0.0025, "step": 5720 }, { "epoch": 24.46, "learning_rate": 9.55433149724587e-05, "loss": 0.0034, "step": 5724 }, { "epoch": 24.48, "learning_rate": 9.56100817893507e-05, "loss": 0.004, "step": 5728 }, { "epoch": 24.5, "learning_rate": 9.56768486062427e-05, "loss": 0.0024, "step": 5732 }, { "epoch": 24.51, "learning_rate": 9.57436154231347e-05, "loss": 0.0027, "step": 5736 }, { "epoch": 24.53, "learning_rate": 9.58103822400267e-05, "loss": 0.0026, "step": 5740 }, { "epoch": 24.55, "learning_rate": 9.587714905691871e-05, "loss": 0.0017, "step": 5744 }, { "epoch": 24.56, "learning_rate": 9.594391587381072e-05, "loss": 0.0067, "step": 5748 }, { "epoch": 24.58, "learning_rate": 9.601068269070273e-05, "loss": 0.0032, "step": 5752 }, { "epoch": 24.6, "learning_rate": 9.607744950759473e-05, "loss": 0.0078, "step": 5756 }, { "epoch": 24.62, "learning_rate": 9.614421632448673e-05, "loss": 0.0082, "step": 5760 }, { "epoch": 24.63, "learning_rate": 9.621098314137873e-05, "loss": 0.0049, "step": 5764 }, { "epoch": 24.65, "learning_rate": 9.627774995827075e-05, "loss": 0.0036, "step": 5768 }, { "epoch": 24.67, "learning_rate": 9.634451677516275e-05, "loss": 0.0045, "step": 5772 }, { "epoch": 24.68, "learning_rate": 9.641128359205476e-05, "loss": 0.0068, "step": 5776 }, { "epoch": 24.7, "learning_rate": 9.647805040894676e-05, "loss": 0.007, "step": 5780 }, { "epoch": 24.72, "learning_rate": 9.654481722583876e-05, "loss": 0.0026, "step": 5784 }, { "epoch": 24.74, "learning_rate": 9.661158404273076e-05, "loss": 0.0066, "step": 5788 }, { "epoch": 24.75, "learning_rate": 9.667835085962277e-05, "loss": 0.002, "step": 5792 }, { "epoch": 24.77, "learning_rate": 9.674511767651477e-05, "loss": 0.0017, "step": 5796 }, { "epoch": 24.79, "learning_rate": 9.681188449340679e-05, "loss": 0.0021, "step": 5800 }, { "epoch": 24.8, "learning_rate": 9.687865131029879e-05, "loss": 0.0039, "step": 5804 }, { "epoch": 24.82, "learning_rate": 9.694541812719079e-05, "loss": 0.0055, "step": 5808 }, { "epoch": 24.84, "learning_rate": 9.701218494408279e-05, "loss": 0.0042, "step": 5812 }, { "epoch": 24.85, "learning_rate": 9.70789517609748e-05, "loss": 0.0037, "step": 5816 }, { "epoch": 24.87, "learning_rate": 9.714571857786681e-05, "loss": 0.0039, "step": 5820 }, { "epoch": 24.89, "learning_rate": 9.721248539475881e-05, "loss": 0.0026, "step": 5824 }, { "epoch": 24.91, "learning_rate": 9.727925221165082e-05, "loss": 0.0043, "step": 5828 }, { "epoch": 24.92, "learning_rate": 9.734601902854282e-05, "loss": 0.0046, "step": 5832 }, { "epoch": 24.94, "learning_rate": 9.741278584543482e-05, "loss": 0.0033, "step": 5836 }, { "epoch": 24.96, "learning_rate": 9.747955266232682e-05, "loss": 0.004, "step": 5840 }, { "epoch": 24.97, "learning_rate": 9.754631947921883e-05, "loss": 0.0051, "step": 5844 }, { "epoch": 24.99, "learning_rate": 9.761308629611083e-05, "loss": 0.0041, "step": 5848 }, { "epoch": 25.01, "learning_rate": 9.767985311300283e-05, "loss": 0.0039, "step": 5852 }, { "epoch": 25.03, "learning_rate": 9.774661992989485e-05, "loss": 0.0039, "step": 5856 }, { "epoch": 25.04, "learning_rate": 9.781338674678685e-05, "loss": 0.0026, "step": 5860 }, { "epoch": 25.06, "learning_rate": 9.788015356367885e-05, "loss": 0.0023, "step": 5864 }, { "epoch": 25.08, "learning_rate": 9.794692038057086e-05, "loss": 0.0033, "step": 5868 }, { "epoch": 25.09, "learning_rate": 9.801368719746287e-05, "loss": 0.0032, "step": 5872 }, { "epoch": 25.11, "learning_rate": 9.808045401435488e-05, "loss": 0.0048, "step": 5876 }, { "epoch": 25.13, "learning_rate": 9.814722083124688e-05, "loss": 0.0074, "step": 5880 }, { "epoch": 25.15, "learning_rate": 9.821398764813888e-05, "loss": 0.0021, "step": 5884 }, { "epoch": 25.16, "learning_rate": 9.828075446503088e-05, "loss": 0.0019, "step": 5888 }, { "epoch": 25.18, "learning_rate": 9.83475212819229e-05, "loss": 0.0014, "step": 5892 }, { "epoch": 25.2, "learning_rate": 9.841428809881489e-05, "loss": 0.0038, "step": 5896 }, { "epoch": 25.21, "learning_rate": 9.848105491570689e-05, "loss": 0.0046, "step": 5900 }, { "epoch": 25.23, "learning_rate": 9.85478217325989e-05, "loss": 0.0021, "step": 5904 }, { "epoch": 25.25, "learning_rate": 9.861458854949091e-05, "loss": 0.0037, "step": 5908 }, { "epoch": 25.26, "learning_rate": 9.868135536638291e-05, "loss": 0.0019, "step": 5912 }, { "epoch": 25.28, "learning_rate": 9.874812218327492e-05, "loss": 0.0013, "step": 5916 }, { "epoch": 25.3, "learning_rate": 9.881488900016692e-05, "loss": 0.0026, "step": 5920 }, { "epoch": 25.32, "learning_rate": 9.888165581705892e-05, "loss": 0.0034, "step": 5924 }, { "epoch": 25.33, "learning_rate": 9.894842263395094e-05, "loss": 0.0028, "step": 5928 }, { "epoch": 25.35, "learning_rate": 9.901518945084294e-05, "loss": 0.0069, "step": 5932 }, { "epoch": 25.37, "learning_rate": 9.908195626773494e-05, "loss": 0.0031, "step": 5936 }, { "epoch": 25.38, "learning_rate": 9.914872308462694e-05, "loss": 0.0047, "step": 5940 }, { "epoch": 25.4, "learning_rate": 9.921548990151896e-05, "loss": 0.0029, "step": 5944 }, { "epoch": 25.42, "learning_rate": 9.928225671841096e-05, "loss": 0.0026, "step": 5948 }, { "epoch": 25.44, "learning_rate": 9.934902353530295e-05, "loss": 0.002, "step": 5952 }, { "epoch": 25.45, "learning_rate": 9.941579035219495e-05, "loss": 0.0019, "step": 5956 }, { "epoch": 25.47, "learning_rate": 9.948255716908697e-05, "loss": 0.0021, "step": 5960 }, { "epoch": 25.49, "learning_rate": 9.954932398597897e-05, "loss": 0.0015, "step": 5964 }, { "epoch": 25.5, "learning_rate": 9.961609080287098e-05, "loss": 0.0042, "step": 5968 }, { "epoch": 25.52, "learning_rate": 9.968285761976298e-05, "loss": 0.0036, "step": 5972 }, { "epoch": 25.54, "learning_rate": 9.974962443665498e-05, "loss": 0.005, "step": 5976 }, { "epoch": 25.56, "learning_rate": 9.9816391253547e-05, "loss": 0.0019, "step": 5980 }, { "epoch": 25.57, "learning_rate": 9.9883158070439e-05, "loss": 0.0046, "step": 5984 }, { "epoch": 25.59, "learning_rate": 9.9949924887331e-05, "loss": 0.0044, "step": 5988 }, { "epoch": 25.61, "learning_rate": 9.999999991511074e-05, "loss": 0.002, "step": 5992 }, { "epoch": 25.62, "learning_rate": 9.999999787776848e-05, "loss": 0.004, "step": 5996 }, { "epoch": 25.64, "learning_rate": 9.999999312396997e-05, "loss": 0.0056, "step": 6000 }, { "epoch": 25.64, "eval_exact_match": 0.5, "eval_loss": 0.6754465699195862, "eval_runtime": 141.2031, "eval_samples_per_second": 6.813, "step": 6000 }, { "epoch": 25.66, "learning_rate": 9.999998565371547e-05, "loss": 0.0071, "step": 6004 }, { "epoch": 25.68, "learning_rate": 9.999997546700539e-05, "loss": 0.0036, "step": 6008 }, { "epoch": 25.69, "learning_rate": 9.999996256384028e-05, "loss": 0.0021, "step": 6012 }, { "epoch": 25.71, "learning_rate": 9.999994694422086e-05, "loss": 0.0037, "step": 6016 }, { "epoch": 25.73, "learning_rate": 9.999992860814793e-05, "loss": 0.0028, "step": 6020 }, { "epoch": 25.74, "learning_rate": 9.999990755562253e-05, "loss": 0.0073, "step": 6024 }, { "epoch": 25.76, "learning_rate": 9.999988378664581e-05, "loss": 0.0037, "step": 6028 }, { "epoch": 25.78, "learning_rate": 9.999985730121903e-05, "loss": 0.0035, "step": 6032 }, { "epoch": 25.79, "learning_rate": 9.999982809934364e-05, "loss": 0.004, "step": 6036 }, { "epoch": 25.81, "learning_rate": 9.999979618102123e-05, "loss": 0.0018, "step": 6040 }, { "epoch": 25.83, "learning_rate": 9.999976154625354e-05, "loss": 0.0028, "step": 6044 }, { "epoch": 25.85, "learning_rate": 9.999972419504244e-05, "loss": 0.003, "step": 6048 }, { "epoch": 25.86, "learning_rate": 9.999968412738995e-05, "loss": 0.0025, "step": 6052 }, { "epoch": 25.88, "learning_rate": 9.999964134329827e-05, "loss": 0.0024, "step": 6056 }, { "epoch": 25.9, "learning_rate": 9.999959584276973e-05, "loss": 0.0038, "step": 6060 }, { "epoch": 25.91, "learning_rate": 9.999954762580675e-05, "loss": 0.0027, "step": 6064 }, { "epoch": 25.93, "learning_rate": 9.999949669241202e-05, "loss": 0.0019, "step": 6068 }, { "epoch": 25.95, "learning_rate": 9.999944304258826e-05, "loss": 0.0034, "step": 6072 }, { "epoch": 25.97, "learning_rate": 9.999938667633839e-05, "loss": 0.0026, "step": 6076 }, { "epoch": 25.98, "learning_rate": 9.999932759366551e-05, "loss": 0.0041, "step": 6080 }, { "epoch": 26.0, "learning_rate": 9.999926579457277e-05, "loss": 0.0026, "step": 6084 }, { "epoch": 26.02, "learning_rate": 9.999920127906357e-05, "loss": 0.0028, "step": 6088 }, { "epoch": 26.03, "learning_rate": 9.999913404714139e-05, "loss": 0.0047, "step": 6092 }, { "epoch": 26.05, "learning_rate": 9.999906409880991e-05, "loss": 0.0028, "step": 6096 }, { "epoch": 26.07, "learning_rate": 9.999899143407291e-05, "loss": 0.0046, "step": 6100 }, { "epoch": 26.09, "learning_rate": 9.999891605293434e-05, "loss": 0.0027, "step": 6104 }, { "epoch": 26.1, "learning_rate": 9.999883795539832e-05, "loss": 0.0024, "step": 6108 }, { "epoch": 26.12, "learning_rate": 9.999875714146904e-05, "loss": 0.0074, "step": 6112 }, { "epoch": 26.14, "learning_rate": 9.999867361115096e-05, "loss": 0.003, "step": 6116 }, { "epoch": 26.15, "learning_rate": 9.999858736444856e-05, "loss": 0.0034, "step": 6120 }, { "epoch": 26.17, "learning_rate": 9.999849840136655e-05, "loss": 0.0035, "step": 6124 }, { "epoch": 26.19, "learning_rate": 9.999840672190975e-05, "loss": 0.0023, "step": 6128 }, { "epoch": 26.21, "learning_rate": 9.999831232608316e-05, "loss": 0.0037, "step": 6132 }, { "epoch": 26.22, "learning_rate": 9.99982152138919e-05, "loss": 0.0031, "step": 6136 }, { "epoch": 26.24, "learning_rate": 9.999811538534123e-05, "loss": 0.0053, "step": 6140 }, { "epoch": 26.26, "learning_rate": 9.99980128404366e-05, "loss": 0.0042, "step": 6144 }, { "epoch": 26.27, "learning_rate": 9.999790757918357e-05, "loss": 0.0031, "step": 6148 }, { "epoch": 26.29, "learning_rate": 9.999779960158786e-05, "loss": 0.0015, "step": 6152 }, { "epoch": 26.31, "learning_rate": 9.999768890765533e-05, "loss": 0.0042, "step": 6156 }, { "epoch": 26.32, "learning_rate": 9.999757549739201e-05, "loss": 0.0029, "step": 6160 }, { "epoch": 26.34, "learning_rate": 9.999745937080403e-05, "loss": 0.0037, "step": 6164 }, { "epoch": 26.36, "learning_rate": 9.999734052789775e-05, "loss": 0.0027, "step": 6168 }, { "epoch": 26.38, "learning_rate": 9.999721896867957e-05, "loss": 0.0019, "step": 6172 }, { "epoch": 26.39, "learning_rate": 9.999709469315612e-05, "loss": 0.0031, "step": 6176 }, { "epoch": 26.41, "learning_rate": 9.999696770133417e-05, "loss": 0.0037, "step": 6180 }, { "epoch": 26.43, "learning_rate": 9.999683799322059e-05, "loss": 0.0018, "step": 6184 }, { "epoch": 26.44, "learning_rate": 9.999670556882244e-05, "loss": 0.0025, "step": 6188 }, { "epoch": 26.46, "learning_rate": 9.99965704281469e-05, "loss": 0.0022, "step": 6192 }, { "epoch": 26.48, "learning_rate": 9.999643257120135e-05, "loss": 0.0022, "step": 6196 }, { "epoch": 26.5, "learning_rate": 9.999629199799323e-05, "loss": 0.0032, "step": 6200 }, { "epoch": 26.51, "learning_rate": 9.999614870853023e-05, "loss": 0.0041, "step": 6204 }, { "epoch": 26.53, "learning_rate": 9.999600270282007e-05, "loss": 0.005, "step": 6208 }, { "epoch": 26.55, "learning_rate": 9.999585398087074e-05, "loss": 0.0037, "step": 6212 }, { "epoch": 26.56, "learning_rate": 9.99957025426903e-05, "loss": 0.005, "step": 6216 }, { "epoch": 26.58, "learning_rate": 9.999554838828698e-05, "loss": 0.0039, "step": 6220 }, { "epoch": 26.6, "learning_rate": 9.999539151766915e-05, "loss": 0.003, "step": 6224 }, { "epoch": 26.62, "learning_rate": 9.999523193084532e-05, "loss": 0.002, "step": 6228 }, { "epoch": 26.63, "learning_rate": 9.999506962782418e-05, "loss": 0.0018, "step": 6232 }, { "epoch": 26.65, "learning_rate": 9.999490460861454e-05, "loss": 0.0031, "step": 6236 }, { "epoch": 26.67, "learning_rate": 9.999473687322538e-05, "loss": 0.0029, "step": 6240 }, { "epoch": 26.68, "learning_rate": 9.999456642166577e-05, "loss": 0.0034, "step": 6244 }, { "epoch": 26.7, "learning_rate": 9.999439325394504e-05, "loss": 0.002, "step": 6248 }, { "epoch": 26.72, "learning_rate": 9.999421737007254e-05, "loss": 0.0023, "step": 6252 }, { "epoch": 26.74, "learning_rate": 9.999403877005783e-05, "loss": 0.003, "step": 6256 }, { "epoch": 26.75, "learning_rate": 9.999385745391065e-05, "loss": 0.0034, "step": 6260 }, { "epoch": 26.77, "learning_rate": 9.999367342164083e-05, "loss": 0.0033, "step": 6264 }, { "epoch": 26.79, "learning_rate": 9.999348667325836e-05, "loss": 0.0037, "step": 6268 }, { "epoch": 26.8, "learning_rate": 9.99932972087734e-05, "loss": 0.0022, "step": 6272 }, { "epoch": 26.82, "learning_rate": 9.999310502819623e-05, "loss": 0.0036, "step": 6276 }, { "epoch": 26.84, "learning_rate": 9.999291013153729e-05, "loss": 0.0065, "step": 6280 }, { "epoch": 26.85, "learning_rate": 9.999271251880718e-05, "loss": 0.0047, "step": 6284 }, { "epoch": 26.87, "learning_rate": 9.999251219001662e-05, "loss": 0.0015, "step": 6288 }, { "epoch": 26.89, "learning_rate": 9.999230914517653e-05, "loss": 0.0022, "step": 6292 }, { "epoch": 26.91, "learning_rate": 9.99921033842979e-05, "loss": 0.0016, "step": 6296 }, { "epoch": 26.92, "learning_rate": 9.999189490739191e-05, "loss": 0.0035, "step": 6300 }, { "epoch": 26.94, "learning_rate": 9.999168371446993e-05, "loss": 0.0022, "step": 6304 }, { "epoch": 26.96, "learning_rate": 9.99914698055434e-05, "loss": 0.0029, "step": 6308 }, { "epoch": 26.97, "learning_rate": 9.999125318062394e-05, "loss": 0.004, "step": 6312 }, { "epoch": 26.99, "learning_rate": 9.999103383972333e-05, "loss": 0.0027, "step": 6316 }, { "epoch": 27.01, "learning_rate": 9.999081178285347e-05, "loss": 0.0031, "step": 6320 }, { "epoch": 27.03, "learning_rate": 9.999058701002645e-05, "loss": 0.0044, "step": 6324 }, { "epoch": 27.04, "learning_rate": 9.999035952125448e-05, "loss": 0.0014, "step": 6328 }, { "epoch": 27.06, "learning_rate": 9.99901293165499e-05, "loss": 0.0071, "step": 6332 }, { "epoch": 27.08, "learning_rate": 9.998989639592521e-05, "loss": 0.0028, "step": 6336 }, { "epoch": 27.09, "learning_rate": 9.998966075939309e-05, "loss": 0.0018, "step": 6340 }, { "epoch": 27.11, "learning_rate": 9.998942240696633e-05, "loss": 0.0015, "step": 6344 }, { "epoch": 27.13, "learning_rate": 9.998918133865788e-05, "loss": 0.0018, "step": 6348 }, { "epoch": 27.15, "learning_rate": 9.998893755448085e-05, "loss": 0.0031, "step": 6352 }, { "epoch": 27.16, "learning_rate": 9.998869105444845e-05, "loss": 0.0053, "step": 6356 }, { "epoch": 27.18, "learning_rate": 9.998844183857412e-05, "loss": 0.0033, "step": 6360 }, { "epoch": 27.2, "learning_rate": 9.998818990687135e-05, "loss": 0.0021, "step": 6364 }, { "epoch": 27.21, "learning_rate": 9.998793525935387e-05, "loss": 0.002, "step": 6368 }, { "epoch": 27.23, "learning_rate": 9.998767789603547e-05, "loss": 0.0011, "step": 6372 }, { "epoch": 27.25, "learning_rate": 9.998741781693017e-05, "loss": 0.0034, "step": 6376 }, { "epoch": 27.26, "learning_rate": 9.99871550220521e-05, "loss": 0.0034, "step": 6380 }, { "epoch": 27.28, "learning_rate": 9.998688951141552e-05, "loss": 0.0018, "step": 6384 }, { "epoch": 27.3, "learning_rate": 9.998662128503487e-05, "loss": 0.0038, "step": 6388 }, { "epoch": 27.32, "learning_rate": 9.998635034292469e-05, "loss": 0.0009, "step": 6392 }, { "epoch": 27.33, "learning_rate": 9.998607668509975e-05, "loss": 0.0019, "step": 6396 }, { "epoch": 27.35, "learning_rate": 9.998580031157486e-05, "loss": 0.0036, "step": 6400 }, { "epoch": 27.37, "learning_rate": 9.998552122236509e-05, "loss": 0.0019, "step": 6404 }, { "epoch": 27.38, "learning_rate": 9.998523941748556e-05, "loss": 0.0038, "step": 6408 }, { "epoch": 27.4, "learning_rate": 9.998495489695161e-05, "loss": 0.0042, "step": 6412 }, { "epoch": 27.42, "learning_rate": 9.998466766077868e-05, "loss": 0.0027, "step": 6416 }, { "epoch": 27.44, "learning_rate": 9.998437770898239e-05, "loss": 0.0026, "step": 6420 }, { "epoch": 27.45, "learning_rate": 9.998408504157849e-05, "loss": 0.0027, "step": 6424 }, { "epoch": 27.47, "learning_rate": 9.998378965858286e-05, "loss": 0.0043, "step": 6428 }, { "epoch": 27.49, "learning_rate": 9.998349156001156e-05, "loss": 0.0031, "step": 6432 }, { "epoch": 27.5, "learning_rate": 9.998319074588081e-05, "loss": 0.0018, "step": 6436 }, { "epoch": 27.52, "learning_rate": 9.998288721620692e-05, "loss": 0.0033, "step": 6440 }, { "epoch": 27.54, "learning_rate": 9.998258097100639e-05, "loss": 0.0009, "step": 6444 }, { "epoch": 27.56, "learning_rate": 9.998227201029586e-05, "loss": 0.0035, "step": 6448 }, { "epoch": 27.57, "learning_rate": 9.998196033409212e-05, "loss": 0.0034, "step": 6452 }, { "epoch": 27.59, "learning_rate": 9.998164594241208e-05, "loss": 0.0023, "step": 6456 }, { "epoch": 27.61, "learning_rate": 9.998132883527287e-05, "loss": 0.0042, "step": 6460 }, { "epoch": 27.62, "learning_rate": 9.998100901269167e-05, "loss": 0.0011, "step": 6464 }, { "epoch": 27.64, "learning_rate": 9.998068647468587e-05, "loss": 0.0047, "step": 6468 }, { "epoch": 27.66, "learning_rate": 9.9980361221273e-05, "loss": 0.0023, "step": 6472 }, { "epoch": 27.68, "learning_rate": 9.998003325247071e-05, "loss": 0.0025, "step": 6476 }, { "epoch": 27.69, "learning_rate": 9.997970256829687e-05, "loss": 0.003, "step": 6480 }, { "epoch": 27.71, "learning_rate": 9.997936916876939e-05, "loss": 0.0016, "step": 6484 }, { "epoch": 27.73, "learning_rate": 9.997903305390641e-05, "loss": 0.0027, "step": 6488 }, { "epoch": 27.74, "learning_rate": 9.997869422372618e-05, "loss": 0.0038, "step": 6492 }, { "epoch": 27.76, "learning_rate": 9.997835267824712e-05, "loss": 0.0047, "step": 6496 }, { "epoch": 27.78, "learning_rate": 9.997800841748777e-05, "loss": 0.002, "step": 6500 }, { "epoch": 27.79, "learning_rate": 9.997766144146685e-05, "loss": 0.0065, "step": 6504 }, { "epoch": 27.81, "learning_rate": 9.997731175020319e-05, "loss": 0.0062, "step": 6508 }, { "epoch": 27.83, "learning_rate": 9.997695934371583e-05, "loss": 0.0043, "step": 6512 }, { "epoch": 27.85, "learning_rate": 9.997660422202386e-05, "loss": 0.006, "step": 6516 }, { "epoch": 27.86, "learning_rate": 9.997624638514661e-05, "loss": 0.0015, "step": 6520 }, { "epoch": 27.88, "learning_rate": 9.997588583310352e-05, "loss": 0.0029, "step": 6524 }, { "epoch": 27.9, "learning_rate": 9.997552256591416e-05, "loss": 0.0052, "step": 6528 }, { "epoch": 27.91, "learning_rate": 9.997515658359828e-05, "loss": 0.0014, "step": 6532 }, { "epoch": 27.93, "learning_rate": 9.997478788617576e-05, "loss": 0.0018, "step": 6536 }, { "epoch": 27.95, "learning_rate": 9.997441647366663e-05, "loss": 0.0025, "step": 6540 }, { "epoch": 27.97, "learning_rate": 9.997404234609107e-05, "loss": 0.0022, "step": 6544 }, { "epoch": 27.98, "learning_rate": 9.997366550346943e-05, "loss": 0.0025, "step": 6548 }, { "epoch": 28.0, "learning_rate": 9.997328594582213e-05, "loss": 0.0033, "step": 6552 }, { "epoch": 28.02, "learning_rate": 9.997290367316982e-05, "loss": 0.0018, "step": 6556 }, { "epoch": 28.03, "learning_rate": 9.997251868553328e-05, "loss": 0.0025, "step": 6560 }, { "epoch": 28.05, "learning_rate": 9.997213098293341e-05, "loss": 0.0038, "step": 6564 }, { "epoch": 28.07, "learning_rate": 9.997174056539129e-05, "loss": 0.0038, "step": 6568 }, { "epoch": 28.09, "learning_rate": 9.997134743292811e-05, "loss": 0.0027, "step": 6572 }, { "epoch": 28.1, "learning_rate": 9.997095158556525e-05, "loss": 0.0028, "step": 6576 }, { "epoch": 28.12, "learning_rate": 9.997055302332419e-05, "loss": 0.003, "step": 6580 }, { "epoch": 28.14, "learning_rate": 9.99701517462266e-05, "loss": 0.0031, "step": 6584 }, { "epoch": 28.15, "learning_rate": 9.996974775429427e-05, "loss": 0.0022, "step": 6588 }, { "epoch": 28.17, "learning_rate": 9.996934104754916e-05, "loss": 0.0034, "step": 6592 }, { "epoch": 28.19, "learning_rate": 9.996893162601337e-05, "loss": 0.0033, "step": 6596 }, { "epoch": 28.21, "learning_rate": 9.996851948970915e-05, "loss": 0.0024, "step": 6600 }, { "epoch": 28.22, "learning_rate": 9.996810463865886e-05, "loss": 0.0016, "step": 6604 }, { "epoch": 28.24, "learning_rate": 9.996768707288506e-05, "loss": 0.0049, "step": 6608 }, { "epoch": 28.26, "learning_rate": 9.996726679241043e-05, "loss": 0.0062, "step": 6612 }, { "epoch": 28.27, "learning_rate": 9.99668437972578e-05, "loss": 0.0058, "step": 6616 }, { "epoch": 28.29, "learning_rate": 9.996641808745017e-05, "loss": 0.0044, "step": 6620 }, { "epoch": 28.31, "learning_rate": 9.996598966301065e-05, "loss": 0.0026, "step": 6624 }, { "epoch": 28.32, "learning_rate": 9.996555852396252e-05, "loss": 0.002, "step": 6628 }, { "epoch": 28.34, "learning_rate": 9.996512467032922e-05, "loss": 0.0022, "step": 6632 }, { "epoch": 28.36, "learning_rate": 9.996468810213428e-05, "loss": 0.0025, "step": 6636 }, { "epoch": 28.38, "learning_rate": 9.996424881940146e-05, "loss": 0.0037, "step": 6640 }, { "epoch": 28.39, "learning_rate": 9.996380682215461e-05, "loss": 0.0019, "step": 6644 }, { "epoch": 28.41, "learning_rate": 9.996336211041773e-05, "loss": 0.0028, "step": 6648 }, { "epoch": 28.43, "learning_rate": 9.996291468421501e-05, "loss": 0.0028, "step": 6652 }, { "epoch": 28.44, "learning_rate": 9.996246454357073e-05, "loss": 0.0025, "step": 6656 }, { "epoch": 28.46, "learning_rate": 9.996201168850936e-05, "loss": 0.0007, "step": 6660 }, { "epoch": 28.48, "learning_rate": 9.996155611905551e-05, "loss": 0.002, "step": 6664 }, { "epoch": 28.5, "learning_rate": 9.99610978352339e-05, "loss": 0.0035, "step": 6668 }, { "epoch": 28.51, "learning_rate": 9.996063683706946e-05, "loss": 0.0032, "step": 6672 }, { "epoch": 28.53, "learning_rate": 9.996017312458724e-05, "loss": 0.0017, "step": 6676 }, { "epoch": 28.55, "learning_rate": 9.995970669781241e-05, "loss": 0.0016, "step": 6680 }, { "epoch": 28.56, "learning_rate": 9.995923755677033e-05, "loss": 0.0049, "step": 6684 }, { "epoch": 28.58, "learning_rate": 9.995876570148646e-05, "loss": 0.0024, "step": 6688 }, { "epoch": 28.6, "learning_rate": 9.995829113198645e-05, "loss": 0.0041, "step": 6692 }, { "epoch": 28.62, "learning_rate": 9.995781384829609e-05, "loss": 0.0042, "step": 6696 }, { "epoch": 28.63, "learning_rate": 9.99573338504413e-05, "loss": 0.0036, "step": 6700 }, { "epoch": 28.65, "learning_rate": 9.995685113844818e-05, "loss": 0.0018, "step": 6704 }, { "epoch": 28.67, "learning_rate": 9.995636571234293e-05, "loss": 0.0019, "step": 6708 }, { "epoch": 28.68, "learning_rate": 9.995587757215194e-05, "loss": 0.0012, "step": 6712 }, { "epoch": 28.7, "learning_rate": 9.995538671790171e-05, "loss": 0.0021, "step": 6716 }, { "epoch": 28.72, "learning_rate": 9.995489314961892e-05, "loss": 0.0041, "step": 6720 }, { "epoch": 28.74, "learning_rate": 9.995439686733039e-05, "loss": 0.0026, "step": 6724 }, { "epoch": 28.75, "learning_rate": 9.995389787106309e-05, "loss": 0.0012, "step": 6728 }, { "epoch": 28.77, "learning_rate": 9.995339616084409e-05, "loss": 0.0023, "step": 6732 }, { "epoch": 28.79, "learning_rate": 9.995289173670071e-05, "loss": 0.002, "step": 6736 }, { "epoch": 28.8, "learning_rate": 9.995238459866029e-05, "loss": 0.0035, "step": 6740 }, { "epoch": 28.82, "learning_rate": 9.995187474675042e-05, "loss": 0.0012, "step": 6744 }, { "epoch": 28.84, "learning_rate": 9.995136218099878e-05, "loss": 0.0016, "step": 6748 }, { "epoch": 28.85, "learning_rate": 9.995084690143326e-05, "loss": 0.0047, "step": 6752 }, { "epoch": 28.87, "learning_rate": 9.99503289080818e-05, "loss": 0.0015, "step": 6756 }, { "epoch": 28.89, "learning_rate": 9.994980820097258e-05, "loss": 0.0055, "step": 6760 }, { "epoch": 28.91, "learning_rate": 9.994928478013387e-05, "loss": 0.0013, "step": 6764 }, { "epoch": 28.92, "learning_rate": 9.994875864559413e-05, "loss": 0.0028, "step": 6768 }, { "epoch": 28.94, "learning_rate": 9.99482297973819e-05, "loss": 0.0032, "step": 6772 }, { "epoch": 28.96, "learning_rate": 9.994769823552594e-05, "loss": 0.0039, "step": 6776 }, { "epoch": 28.97, "learning_rate": 9.994716396005515e-05, "loss": 0.002, "step": 6780 }, { "epoch": 28.99, "learning_rate": 9.994662697099851e-05, "loss": 0.0041, "step": 6784 }, { "epoch": 29.01, "learning_rate": 9.994608726838523e-05, "loss": 0.0013, "step": 6788 }, { "epoch": 29.03, "learning_rate": 9.994554485224463e-05, "loss": 0.001, "step": 6792 }, { "epoch": 29.04, "learning_rate": 9.994499972260616e-05, "loss": 0.0017, "step": 6796 }, { "epoch": 29.06, "learning_rate": 9.994445187949944e-05, "loss": 0.0041, "step": 6800 }, { "epoch": 29.08, "learning_rate": 9.994390132295426e-05, "loss": 0.0044, "step": 6804 }, { "epoch": 29.09, "learning_rate": 9.99433480530005e-05, "loss": 0.0037, "step": 6808 }, { "epoch": 29.11, "learning_rate": 9.994279206966824e-05, "loss": 0.0013, "step": 6812 }, { "epoch": 29.13, "learning_rate": 9.994223337298764e-05, "loss": 0.0019, "step": 6816 }, { "epoch": 29.15, "learning_rate": 9.994167196298914e-05, "loss": 0.0017, "step": 6820 }, { "epoch": 29.16, "learning_rate": 9.994110783970316e-05, "loss": 0.0024, "step": 6824 }, { "epoch": 29.18, "learning_rate": 9.994054100316038e-05, "loss": 0.0036, "step": 6828 }, { "epoch": 29.2, "learning_rate": 9.99399714533916e-05, "loss": 0.0047, "step": 6832 }, { "epoch": 29.21, "learning_rate": 9.993939919042776e-05, "loss": 0.0036, "step": 6836 }, { "epoch": 29.23, "learning_rate": 9.993882421429994e-05, "loss": 0.0024, "step": 6840 }, { "epoch": 29.25, "learning_rate": 9.993824652503939e-05, "loss": 0.0018, "step": 6844 }, { "epoch": 29.26, "learning_rate": 9.99376661226775e-05, "loss": 0.0035, "step": 6848 }, { "epoch": 29.28, "learning_rate": 9.993708300724577e-05, "loss": 0.0021, "step": 6852 }, { "epoch": 29.3, "learning_rate": 9.993649717877593e-05, "loss": 0.0027, "step": 6856 }, { "epoch": 29.32, "learning_rate": 9.993590863729978e-05, "loss": 0.0031, "step": 6860 }, { "epoch": 29.33, "learning_rate": 9.993531738284929e-05, "loss": 0.0026, "step": 6864 }, { "epoch": 29.35, "learning_rate": 9.99347234154566e-05, "loss": 0.0019, "step": 6868 }, { "epoch": 29.37, "learning_rate": 9.993412673515396e-05, "loss": 0.0034, "step": 6872 }, { "epoch": 29.38, "learning_rate": 9.993352734197379e-05, "loss": 0.0022, "step": 6876 }, { "epoch": 29.4, "learning_rate": 9.993292523594867e-05, "loss": 0.0054, "step": 6880 }, { "epoch": 29.42, "learning_rate": 9.993232041711129e-05, "loss": 0.0025, "step": 6884 }, { "epoch": 29.44, "learning_rate": 9.993171288549454e-05, "loss": 0.004, "step": 6888 }, { "epoch": 29.45, "learning_rate": 9.99311026411314e-05, "loss": 0.003, "step": 6892 }, { "epoch": 29.47, "learning_rate": 9.993048968405502e-05, "loss": 0.0017, "step": 6896 }, { "epoch": 29.49, "learning_rate": 9.992987401429873e-05, "loss": 0.0017, "step": 6900 }, { "epoch": 29.5, "learning_rate": 9.992925563189595e-05, "loss": 0.0028, "step": 6904 }, { "epoch": 29.52, "learning_rate": 9.992863453688031e-05, "loss": 0.0049, "step": 6908 }, { "epoch": 29.54, "learning_rate": 9.992801072928551e-05, "loss": 0.0042, "step": 6912 }, { "epoch": 29.56, "learning_rate": 9.992738420914546e-05, "loss": 0.0019, "step": 6916 }, { "epoch": 29.57, "learning_rate": 9.992675497649424e-05, "loss": 0.0044, "step": 6920 }, { "epoch": 29.59, "learning_rate": 9.992612303136597e-05, "loss": 0.0049, "step": 6924 }, { "epoch": 29.61, "learning_rate": 9.992548837379501e-05, "loss": 0.0028, "step": 6928 }, { "epoch": 29.62, "learning_rate": 9.992485100381584e-05, "loss": 0.0036, "step": 6932 }, { "epoch": 29.64, "learning_rate": 9.992421092146309e-05, "loss": 0.003, "step": 6936 }, { "epoch": 29.66, "learning_rate": 9.992356812677152e-05, "loss": 0.0064, "step": 6940 }, { "epoch": 29.68, "learning_rate": 9.992292261977609e-05, "loss": 0.0013, "step": 6944 }, { "epoch": 29.69, "learning_rate": 9.992227440051184e-05, "loss": 0.0043, "step": 6948 }, { "epoch": 29.71, "learning_rate": 9.992162346901399e-05, "loss": 0.0018, "step": 6952 }, { "epoch": 29.73, "learning_rate": 9.99209698253179e-05, "loss": 0.0048, "step": 6956 }, { "epoch": 29.74, "learning_rate": 9.992031346945911e-05, "loss": 0.0025, "step": 6960 }, { "epoch": 29.76, "learning_rate": 9.991965440147324e-05, "loss": 0.0017, "step": 6964 }, { "epoch": 29.78, "learning_rate": 9.991899262139612e-05, "loss": 0.0022, "step": 6968 }, { "epoch": 29.79, "learning_rate": 9.991832812926371e-05, "loss": 0.0033, "step": 6972 }, { "epoch": 29.81, "learning_rate": 9.99176609251121e-05, "loss": 0.0036, "step": 6976 }, { "epoch": 29.83, "learning_rate": 9.991699100897753e-05, "loss": 0.0047, "step": 6980 }, { "epoch": 29.85, "learning_rate": 9.99163183808964e-05, "loss": 0.0021, "step": 6984 }, { "epoch": 29.86, "learning_rate": 9.991564304090525e-05, "loss": 0.002, "step": 6988 }, { "epoch": 29.88, "learning_rate": 9.99149649890408e-05, "loss": 0.0044, "step": 6992 }, { "epoch": 29.9, "learning_rate": 9.991428422533987e-05, "loss": 0.0025, "step": 6996 }, { "epoch": 29.91, "learning_rate": 9.991360074983943e-05, "loss": 0.006, "step": 7000 }, { "epoch": 29.91, "eval_exact_match": 0.502079002079002, "eval_loss": 0.7090530395507812, "eval_runtime": 143.1626, "eval_samples_per_second": 6.72, "step": 7000 }, { "epoch": 29.93, "learning_rate": 9.991291456257663e-05, "loss": 0.0016, "step": 7004 }, { "epoch": 29.95, "learning_rate": 9.991222566358874e-05, "loss": 0.0021, "step": 7008 }, { "epoch": 29.97, "learning_rate": 9.99115340529132e-05, "loss": 0.0012, "step": 7012 }, { "epoch": 29.98, "learning_rate": 9.991083973058757e-05, "loss": 0.0042, "step": 7016 }, { "epoch": 30.0, "learning_rate": 9.991014269664958e-05, "loss": 0.0028, "step": 7020 }, { "epoch": 30.02, "learning_rate": 9.990944295113711e-05, "loss": 0.0038, "step": 7024 }, { "epoch": 30.03, "learning_rate": 9.990874049408815e-05, "loss": 0.0027, "step": 7028 }, { "epoch": 30.05, "learning_rate": 9.990803532554087e-05, "loss": 0.0031, "step": 7032 }, { "epoch": 30.07, "learning_rate": 9.990732744553361e-05, "loss": 0.0041, "step": 7036 }, { "epoch": 30.09, "learning_rate": 9.990661685410481e-05, "loss": 0.002, "step": 7040 }, { "epoch": 30.1, "learning_rate": 9.990590355129306e-05, "loss": 0.0017, "step": 7044 }, { "epoch": 30.12, "learning_rate": 9.990518753713712e-05, "loss": 0.0021, "step": 7048 }, { "epoch": 30.14, "learning_rate": 9.990446881167592e-05, "loss": 0.0021, "step": 7052 }, { "epoch": 30.15, "learning_rate": 9.990374737494848e-05, "loss": 0.0017, "step": 7056 }, { "epoch": 30.17, "learning_rate": 9.990302322699399e-05, "loss": 0.0041, "step": 7060 }, { "epoch": 30.19, "learning_rate": 9.99022963678518e-05, "loss": 0.0013, "step": 7064 }, { "epoch": 30.21, "learning_rate": 9.99015667975614e-05, "loss": 0.0035, "step": 7068 }, { "epoch": 30.22, "learning_rate": 9.990083451616245e-05, "loss": 0.0028, "step": 7072 }, { "epoch": 30.24, "learning_rate": 9.990009952369471e-05, "loss": 0.0013, "step": 7076 }, { "epoch": 30.26, "learning_rate": 9.98993618201981e-05, "loss": 0.0031, "step": 7080 }, { "epoch": 30.27, "learning_rate": 9.98986214057127e-05, "loss": 0.0015, "step": 7084 }, { "epoch": 30.29, "learning_rate": 9.98978782802788e-05, "loss": 0.004, "step": 7088 }, { "epoch": 30.31, "learning_rate": 9.989713244393669e-05, "loss": 0.002, "step": 7092 }, { "epoch": 30.32, "learning_rate": 9.989638389672691e-05, "loss": 0.0028, "step": 7096 }, { "epoch": 30.34, "learning_rate": 9.989563263869016e-05, "loss": 0.0037, "step": 7100 }, { "epoch": 30.36, "learning_rate": 9.989487866986724e-05, "loss": 0.0011, "step": 7104 }, { "epoch": 30.38, "learning_rate": 9.98941219902991e-05, "loss": 0.0018, "step": 7108 }, { "epoch": 30.39, "learning_rate": 9.989336260002687e-05, "loss": 0.0015, "step": 7112 }, { "epoch": 30.41, "learning_rate": 9.989260049909179e-05, "loss": 0.0031, "step": 7116 }, { "epoch": 30.43, "learning_rate": 9.989183568753527e-05, "loss": 0.0039, "step": 7120 }, { "epoch": 30.44, "learning_rate": 9.989106816539885e-05, "loss": 0.0032, "step": 7124 }, { "epoch": 30.46, "learning_rate": 9.989029793272425e-05, "loss": 0.0017, "step": 7128 }, { "epoch": 30.48, "learning_rate": 9.988952498955331e-05, "loss": 0.0011, "step": 7132 }, { "epoch": 30.5, "learning_rate": 9.988874933592803e-05, "loss": 0.0035, "step": 7136 }, { "epoch": 30.51, "learning_rate": 9.988797097189052e-05, "loss": 0.0041, "step": 7140 }, { "epoch": 30.53, "learning_rate": 9.988718989748309e-05, "loss": 0.0044, "step": 7144 }, { "epoch": 30.55, "learning_rate": 9.988640611274819e-05, "loss": 0.0022, "step": 7148 }, { "epoch": 30.56, "learning_rate": 9.988561961772838e-05, "loss": 0.0027, "step": 7152 }, { "epoch": 30.58, "learning_rate": 9.988483041246639e-05, "loss": 0.0018, "step": 7156 }, { "epoch": 30.6, "learning_rate": 9.988403849700511e-05, "loss": 0.0016, "step": 7160 }, { "epoch": 30.62, "learning_rate": 9.988324387138754e-05, "loss": 0.0048, "step": 7164 }, { "epoch": 30.63, "learning_rate": 9.988244653565688e-05, "loss": 0.001, "step": 7168 }, { "epoch": 30.65, "learning_rate": 9.988164648985644e-05, "loss": 0.004, "step": 7172 }, { "epoch": 30.67, "learning_rate": 9.988084373402968e-05, "loss": 0.002, "step": 7176 }, { "epoch": 30.68, "learning_rate": 9.988003826822022e-05, "loss": 0.0025, "step": 7180 }, { "epoch": 30.7, "learning_rate": 9.987923009247179e-05, "loss": 0.0029, "step": 7184 }, { "epoch": 30.72, "learning_rate": 9.987841920682834e-05, "loss": 0.0022, "step": 7188 }, { "epoch": 30.74, "learning_rate": 9.987760561133391e-05, "loss": 0.0023, "step": 7192 }, { "epoch": 30.75, "learning_rate": 9.98767893060327e-05, "loss": 0.0011, "step": 7196 }, { "epoch": 30.77, "learning_rate": 9.987597029096907e-05, "loss": 0.002, "step": 7200 }, { "epoch": 30.79, "learning_rate": 9.987514856618748e-05, "loss": 0.0014, "step": 7204 }, { "epoch": 30.8, "learning_rate": 9.987432413173262e-05, "loss": 0.0025, "step": 7208 }, { "epoch": 30.82, "learning_rate": 9.987349698764923e-05, "loss": 0.0033, "step": 7212 }, { "epoch": 30.84, "learning_rate": 9.987266713398232e-05, "loss": 0.0023, "step": 7216 }, { "epoch": 30.85, "learning_rate": 9.987183457077689e-05, "loss": 0.0026, "step": 7220 }, { "epoch": 30.87, "learning_rate": 9.987099929807824e-05, "loss": 0.0027, "step": 7224 }, { "epoch": 30.89, "learning_rate": 9.987016131593173e-05, "loss": 0.0017, "step": 7228 }, { "epoch": 30.91, "learning_rate": 9.986932062438285e-05, "loss": 0.0015, "step": 7232 }, { "epoch": 30.92, "learning_rate": 9.986847722347734e-05, "loss": 0.0025, "step": 7236 }, { "epoch": 30.94, "learning_rate": 9.986763111326096e-05, "loss": 0.0048, "step": 7240 }, { "epoch": 30.96, "learning_rate": 9.986678229377973e-05, "loss": 0.0034, "step": 7244 }, { "epoch": 30.97, "learning_rate": 9.986593076507974e-05, "loss": 0.004, "step": 7248 }, { "epoch": 30.99, "learning_rate": 9.986507652720723e-05, "loss": 0.0027, "step": 7252 }, { "epoch": 31.01, "learning_rate": 9.986421958020863e-05, "loss": 0.004, "step": 7256 }, { "epoch": 31.03, "learning_rate": 9.986335992413054e-05, "loss": 0.0015, "step": 7260 }, { "epoch": 31.04, "learning_rate": 9.98624975590196e-05, "loss": 0.0023, "step": 7264 }, { "epoch": 31.06, "learning_rate": 9.986163248492268e-05, "loss": 0.0063, "step": 7268 }, { "epoch": 31.08, "learning_rate": 9.98607647018868e-05, "loss": 0.0012, "step": 7272 }, { "epoch": 31.09, "learning_rate": 9.98598942099591e-05, "loss": 0.0052, "step": 7276 }, { "epoch": 31.11, "learning_rate": 9.985902100918684e-05, "loss": 0.0011, "step": 7280 }, { "epoch": 31.13, "learning_rate": 9.985814509961748e-05, "loss": 0.0039, "step": 7284 }, { "epoch": 31.15, "learning_rate": 9.985726648129863e-05, "loss": 0.0085, "step": 7288 }, { "epoch": 31.16, "learning_rate": 9.9856385154278e-05, "loss": 0.0036, "step": 7292 }, { "epoch": 31.18, "learning_rate": 9.98555011186035e-05, "loss": 0.0015, "step": 7296 }, { "epoch": 31.2, "learning_rate": 9.985461437432311e-05, "loss": 0.0014, "step": 7300 }, { "epoch": 31.21, "learning_rate": 9.985372492148504e-05, "loss": 0.003, "step": 7304 }, { "epoch": 31.23, "learning_rate": 9.985283276013761e-05, "loss": 0.0022, "step": 7308 }, { "epoch": 31.25, "learning_rate": 9.98519378903293e-05, "loss": 0.0007, "step": 7312 }, { "epoch": 31.26, "learning_rate": 9.985104031210869e-05, "loss": 0.0033, "step": 7316 }, { "epoch": 31.28, "learning_rate": 9.985014002552457e-05, "loss": 0.0025, "step": 7320 }, { "epoch": 31.3, "learning_rate": 9.984923703062588e-05, "loss": 0.004, "step": 7324 }, { "epoch": 31.32, "learning_rate": 9.984833132746163e-05, "loss": 0.0019, "step": 7328 }, { "epoch": 31.33, "learning_rate": 9.984742291608104e-05, "loss": 0.002, "step": 7332 }, { "epoch": 31.35, "learning_rate": 9.984651179653348e-05, "loss": 0.0011, "step": 7336 }, { "epoch": 31.37, "learning_rate": 9.984559796886844e-05, "loss": 0.0012, "step": 7340 }, { "epoch": 31.38, "learning_rate": 9.984468143313555e-05, "loss": 0.0013, "step": 7344 }, { "epoch": 31.4, "learning_rate": 9.984376218938463e-05, "loss": 0.0024, "step": 7348 }, { "epoch": 31.42, "learning_rate": 9.984284023766562e-05, "loss": 0.0057, "step": 7352 }, { "epoch": 31.44, "learning_rate": 9.98419155780286e-05, "loss": 0.0017, "step": 7356 }, { "epoch": 31.45, "learning_rate": 9.98409882105238e-05, "loss": 0.0012, "step": 7360 }, { "epoch": 31.47, "learning_rate": 9.984005813520162e-05, "loss": 0.0014, "step": 7364 }, { "epoch": 31.49, "learning_rate": 9.983912535211258e-05, "loss": 0.0015, "step": 7368 }, { "epoch": 31.5, "learning_rate": 9.983818986130736e-05, "loss": 0.0021, "step": 7372 }, { "epoch": 31.52, "learning_rate": 9.983725166283676e-05, "loss": 0.001, "step": 7376 }, { "epoch": 31.54, "learning_rate": 9.983631075675179e-05, "loss": 0.0026, "step": 7380 }, { "epoch": 31.56, "learning_rate": 9.983536714310355e-05, "loss": 0.0015, "step": 7384 }, { "epoch": 31.57, "learning_rate": 9.983442082194333e-05, "loss": 0.0016, "step": 7388 }, { "epoch": 31.59, "learning_rate": 9.98334717933225e-05, "loss": 0.0013, "step": 7392 }, { "epoch": 31.61, "learning_rate": 9.983252005729264e-05, "loss": 0.0035, "step": 7396 }, { "epoch": 31.62, "learning_rate": 9.983156561390547e-05, "loss": 0.0008, "step": 7400 }, { "epoch": 31.64, "learning_rate": 9.983060846321282e-05, "loss": 0.0021, "step": 7404 }, { "epoch": 31.66, "learning_rate": 9.982964860526671e-05, "loss": 0.0017, "step": 7408 }, { "epoch": 31.68, "learning_rate": 9.982868604011929e-05, "loss": 0.0025, "step": 7412 }, { "epoch": 31.69, "learning_rate": 9.982772076782283e-05, "loss": 0.0009, "step": 7416 }, { "epoch": 31.71, "learning_rate": 9.982675278842981e-05, "loss": 0.0026, "step": 7420 }, { "epoch": 31.73, "learning_rate": 9.982578210199279e-05, "loss": 0.0022, "step": 7424 }, { "epoch": 31.74, "learning_rate": 9.98248087085645e-05, "loss": 0.002, "step": 7428 }, { "epoch": 31.76, "learning_rate": 9.982383260819785e-05, "loss": 0.002, "step": 7432 }, { "epoch": 31.78, "learning_rate": 9.982285380094587e-05, "loss": 0.0018, "step": 7436 }, { "epoch": 31.79, "learning_rate": 9.982187228686172e-05, "loss": 0.0017, "step": 7440 }, { "epoch": 31.81, "learning_rate": 9.982088806599874e-05, "loss": 0.0035, "step": 7444 }, { "epoch": 31.83, "learning_rate": 9.981990113841038e-05, "loss": 0.0027, "step": 7448 }, { "epoch": 31.85, "learning_rate": 9.981891150415029e-05, "loss": 0.0017, "step": 7452 }, { "epoch": 31.86, "learning_rate": 9.981791916327218e-05, "loss": 0.0029, "step": 7456 }, { "epoch": 31.88, "learning_rate": 9.981692411583005e-05, "loss": 0.0012, "step": 7460 }, { "epoch": 31.9, "learning_rate": 9.981592636187789e-05, "loss": 0.0023, "step": 7464 }, { "epoch": 31.91, "learning_rate": 9.981492590146992e-05, "loss": 0.0023, "step": 7468 }, { "epoch": 31.93, "learning_rate": 9.981392273466053e-05, "loss": 0.0024, "step": 7472 }, { "epoch": 31.95, "learning_rate": 9.981291686150418e-05, "loss": 0.0035, "step": 7476 }, { "epoch": 31.97, "learning_rate": 9.981190828205553e-05, "loss": 0.0029, "step": 7480 }, { "epoch": 31.98, "learning_rate": 9.981089699636939e-05, "loss": 0.0013, "step": 7484 }, { "epoch": 32.0, "learning_rate": 9.980988300450067e-05, "loss": 0.0011, "step": 7488 }, { "epoch": 32.02, "learning_rate": 9.98088663065045e-05, "loss": 0.0022, "step": 7492 }, { "epoch": 32.03, "learning_rate": 9.980784690243608e-05, "loss": 0.0028, "step": 7496 }, { "epoch": 32.05, "learning_rate": 9.980682479235082e-05, "loss": 0.002, "step": 7500 }, { "epoch": 32.07, "learning_rate": 9.980579997630423e-05, "loss": 0.0013, "step": 7504 }, { "epoch": 32.09, "learning_rate": 9.9804772454352e-05, "loss": 0.0044, "step": 7508 }, { "epoch": 32.1, "learning_rate": 9.980374222654996e-05, "loss": 0.002, "step": 7512 }, { "epoch": 32.12, "learning_rate": 9.980270929295406e-05, "loss": 0.0019, "step": 7516 }, { "epoch": 32.14, "learning_rate": 9.980167365362044e-05, "loss": 0.0014, "step": 7520 }, { "epoch": 32.15, "learning_rate": 9.980063530860535e-05, "loss": 0.0061, "step": 7524 }, { "epoch": 32.17, "learning_rate": 9.979959425796521e-05, "loss": 0.0006, "step": 7528 }, { "epoch": 32.19, "learning_rate": 9.979855050175659e-05, "loss": 0.0022, "step": 7532 }, { "epoch": 32.21, "learning_rate": 9.979750404003617e-05, "loss": 0.004, "step": 7536 }, { "epoch": 32.22, "learning_rate": 9.979645487286082e-05, "loss": 0.0024, "step": 7540 }, { "epoch": 32.24, "learning_rate": 9.979540300028755e-05, "loss": 0.001, "step": 7544 }, { "epoch": 32.26, "learning_rate": 9.979434842237347e-05, "loss": 0.0016, "step": 7548 }, { "epoch": 32.27, "learning_rate": 9.979329113917593e-05, "loss": 0.0019, "step": 7552 }, { "epoch": 32.29, "learning_rate": 9.979223115075233e-05, "loss": 0.008, "step": 7556 }, { "epoch": 32.31, "learning_rate": 9.979116845716026e-05, "loss": 0.0041, "step": 7560 }, { "epoch": 32.32, "learning_rate": 9.979010305845747e-05, "loss": 0.0019, "step": 7564 }, { "epoch": 32.34, "learning_rate": 9.978903495470184e-05, "loss": 0.0021, "step": 7568 }, { "epoch": 32.36, "learning_rate": 9.97879641459514e-05, "loss": 0.0012, "step": 7572 }, { "epoch": 32.38, "learning_rate": 9.978689063226432e-05, "loss": 0.0005, "step": 7576 }, { "epoch": 32.39, "learning_rate": 9.978581441369892e-05, "loss": 0.0009, "step": 7580 }, { "epoch": 32.41, "learning_rate": 9.978473549031368e-05, "loss": 0.0023, "step": 7584 }, { "epoch": 32.43, "learning_rate": 9.97836538621672e-05, "loss": 0.0013, "step": 7588 }, { "epoch": 32.44, "learning_rate": 9.978256952931827e-05, "loss": 0.0018, "step": 7592 }, { "epoch": 32.46, "learning_rate": 9.978148249182578e-05, "loss": 0.0034, "step": 7596 }, { "epoch": 32.48, "learning_rate": 9.97803927497488e-05, "loss": 0.0006, "step": 7600 }, { "epoch": 32.5, "learning_rate": 9.977930030314653e-05, "loss": 0.0017, "step": 7604 }, { "epoch": 32.51, "learning_rate": 9.977820515207831e-05, "loss": 0.0005, "step": 7608 }, { "epoch": 32.53, "learning_rate": 9.977710729660365e-05, "loss": 0.0008, "step": 7612 }, { "epoch": 32.55, "learning_rate": 9.97760067367822e-05, "loss": 0.0013, "step": 7616 }, { "epoch": 32.56, "learning_rate": 9.977490347267375e-05, "loss": 0.0021, "step": 7620 }, { "epoch": 32.58, "learning_rate": 9.977379750433824e-05, "loss": 0.0037, "step": 7624 }, { "epoch": 32.6, "learning_rate": 9.977268883183575e-05, "loss": 0.0007, "step": 7628 }, { "epoch": 32.62, "learning_rate": 9.977157745522652e-05, "loss": 0.0025, "step": 7632 }, { "epoch": 32.63, "learning_rate": 9.977046337457092e-05, "loss": 0.0029, "step": 7636 }, { "epoch": 32.65, "learning_rate": 9.976934658992949e-05, "loss": 0.0022, "step": 7640 }, { "epoch": 32.67, "learning_rate": 9.97682271013629e-05, "loss": 0.002, "step": 7644 }, { "epoch": 32.68, "learning_rate": 9.976710490893195e-05, "loss": 0.0019, "step": 7648 }, { "epoch": 32.7, "learning_rate": 9.976598001269765e-05, "loss": 0.002, "step": 7652 }, { "epoch": 32.72, "learning_rate": 9.976485241272107e-05, "loss": 0.0009, "step": 7656 }, { "epoch": 32.74, "learning_rate": 9.97637221090635e-05, "loss": 0.0046, "step": 7660 }, { "epoch": 32.75, "learning_rate": 9.976258910178635e-05, "loss": 0.002, "step": 7664 }, { "epoch": 32.77, "learning_rate": 9.976145339095115e-05, "loss": 0.001, "step": 7668 }, { "epoch": 32.79, "learning_rate": 9.976031497661964e-05, "loss": 0.0008, "step": 7672 }, { "epoch": 32.8, "learning_rate": 9.975917385885363e-05, "loss": 0.0016, "step": 7676 }, { "epoch": 32.82, "learning_rate": 9.975803003771513e-05, "loss": 0.0009, "step": 7680 }, { "epoch": 32.84, "learning_rate": 9.97568835132663e-05, "loss": 0.0007, "step": 7684 }, { "epoch": 32.85, "learning_rate": 9.975573428556941e-05, "loss": 0.0008, "step": 7688 }, { "epoch": 32.87, "learning_rate": 9.975458235468691e-05, "loss": 0.0007, "step": 7692 }, { "epoch": 32.89, "learning_rate": 9.975342772068137e-05, "loss": 0.0004, "step": 7696 }, { "epoch": 32.91, "learning_rate": 9.975227038361552e-05, "loss": 0.0009, "step": 7700 }, { "epoch": 32.92, "learning_rate": 9.975111034355225e-05, "loss": 0.0006, "step": 7704 }, { "epoch": 32.94, "learning_rate": 9.974994760055457e-05, "loss": 0.0029, "step": 7708 }, { "epoch": 32.96, "learning_rate": 9.974878215468566e-05, "loss": 0.0026, "step": 7712 }, { "epoch": 32.97, "learning_rate": 9.974761400600883e-05, "loss": 0.0011, "step": 7716 }, { "epoch": 32.99, "learning_rate": 9.974644315458756e-05, "loss": 0.001, "step": 7720 }, { "epoch": 33.01, "learning_rate": 9.974526960048545e-05, "loss": 0.0004, "step": 7724 }, { "epoch": 33.03, "learning_rate": 9.974409334376626e-05, "loss": 0.0034, "step": 7728 }, { "epoch": 33.04, "learning_rate": 9.974291438449388e-05, "loss": 0.0015, "step": 7732 }, { "epoch": 33.06, "learning_rate": 9.974173272273239e-05, "loss": 0.002, "step": 7736 }, { "epoch": 33.08, "learning_rate": 9.974054835854598e-05, "loss": 0.0028, "step": 7740 }, { "epoch": 33.09, "learning_rate": 9.973936129199898e-05, "loss": 0.003, "step": 7744 }, { "epoch": 33.11, "learning_rate": 9.97381715231559e-05, "loss": 0.0017, "step": 7748 }, { "epoch": 33.13, "learning_rate": 9.973697905208136e-05, "loss": 0.0019, "step": 7752 }, { "epoch": 33.15, "learning_rate": 9.973578387884017e-05, "loss": 0.0009, "step": 7756 }, { "epoch": 33.16, "learning_rate": 9.973458600349724e-05, "loss": 0.001, "step": 7760 }, { "epoch": 33.18, "learning_rate": 9.973338542611766e-05, "loss": 0.0003, "step": 7764 }, { "epoch": 33.2, "learning_rate": 9.973218214676668e-05, "loss": 0.0007, "step": 7768 }, { "epoch": 33.21, "learning_rate": 9.973097616550961e-05, "loss": 0.0006, "step": 7772 }, { "epoch": 33.23, "learning_rate": 9.972976748241203e-05, "loss": 0.0048, "step": 7776 }, { "epoch": 33.25, "learning_rate": 9.972855609753959e-05, "loss": 0.0006, "step": 7780 }, { "epoch": 33.26, "learning_rate": 9.97273420109581e-05, "loss": 0.0005, "step": 7784 }, { "epoch": 33.28, "learning_rate": 9.972612522273352e-05, "loss": 0.0021, "step": 7788 }, { "epoch": 33.3, "learning_rate": 9.972490573293196e-05, "loss": 0.0008, "step": 7792 }, { "epoch": 33.32, "learning_rate": 9.972368354161965e-05, "loss": 0.0013, "step": 7796 }, { "epoch": 33.33, "learning_rate": 9.972245864886302e-05, "loss": 0.0017, "step": 7800 }, { "epoch": 33.35, "learning_rate": 9.972123105472862e-05, "loss": 0.0012, "step": 7804 }, { "epoch": 33.37, "learning_rate": 9.972000075928311e-05, "loss": 0.001, "step": 7808 }, { "epoch": 33.38, "learning_rate": 9.971876776259339e-05, "loss": 0.0016, "step": 7812 }, { "epoch": 33.4, "learning_rate": 9.971753206472638e-05, "loss": 0.002, "step": 7816 }, { "epoch": 33.42, "learning_rate": 9.971629366574926e-05, "loss": 0.0023, "step": 7820 }, { "epoch": 33.44, "learning_rate": 9.971505256572928e-05, "loss": 0.0011, "step": 7824 }, { "epoch": 33.45, "learning_rate": 9.971380876473391e-05, "loss": 0.0006, "step": 7828 }, { "epoch": 33.47, "learning_rate": 9.971256226283068e-05, "loss": 0.0011, "step": 7832 }, { "epoch": 33.49, "learning_rate": 9.971131306008734e-05, "loss": 0.0006, "step": 7836 }, { "epoch": 33.5, "learning_rate": 9.971006115657174e-05, "loss": 0.0006, "step": 7840 }, { "epoch": 33.52, "learning_rate": 9.970880655235191e-05, "loss": 0.001, "step": 7844 }, { "epoch": 33.54, "learning_rate": 9.970754924749601e-05, "loss": 0.0006, "step": 7848 }, { "epoch": 33.56, "learning_rate": 9.970628924207232e-05, "loss": 0.0049, "step": 7852 }, { "epoch": 33.57, "learning_rate": 9.970502653614934e-05, "loss": 0.0012, "step": 7856 }, { "epoch": 33.59, "learning_rate": 9.970376112979564e-05, "loss": 0.001, "step": 7860 }, { "epoch": 33.61, "learning_rate": 9.970249302307999e-05, "loss": 0.0016, "step": 7864 }, { "epoch": 33.62, "learning_rate": 9.970122221607128e-05, "loss": 0.0008, "step": 7868 }, { "epoch": 33.64, "learning_rate": 9.969994870883852e-05, "loss": 0.0011, "step": 7872 }, { "epoch": 33.66, "learning_rate": 9.969867250145094e-05, "loss": 0.0013, "step": 7876 }, { "epoch": 33.68, "learning_rate": 9.969739359397785e-05, "loss": 0.002, "step": 7880 }, { "epoch": 33.69, "learning_rate": 9.969611198648876e-05, "loss": 0.002, "step": 7884 }, { "epoch": 33.71, "learning_rate": 9.969482767905326e-05, "loss": 0.0014, "step": 7888 }, { "epoch": 33.73, "learning_rate": 9.969354067174115e-05, "loss": 0.0008, "step": 7892 }, { "epoch": 33.74, "learning_rate": 9.969225096462234e-05, "loss": 0.0013, "step": 7896 }, { "epoch": 33.76, "learning_rate": 9.969095855776692e-05, "loss": 0.0043, "step": 7900 }, { "epoch": 33.78, "learning_rate": 9.968966345124509e-05, "loss": 0.0015, "step": 7904 }, { "epoch": 33.79, "learning_rate": 9.96883656451272e-05, "loss": 0.0008, "step": 7908 }, { "epoch": 33.81, "learning_rate": 9.968706513948378e-05, "loss": 0.0025, "step": 7912 }, { "epoch": 33.83, "learning_rate": 9.968576193438548e-05, "loss": 0.0021, "step": 7916 }, { "epoch": 33.85, "learning_rate": 9.968445602990309e-05, "loss": 0.0016, "step": 7920 }, { "epoch": 33.86, "learning_rate": 9.968314742610758e-05, "loss": 0.0035, "step": 7924 }, { "epoch": 33.88, "learning_rate": 9.968183612307003e-05, "loss": 0.0033, "step": 7928 }, { "epoch": 33.9, "learning_rate": 9.968052212086168e-05, "loss": 0.0039, "step": 7932 }, { "epoch": 33.91, "learning_rate": 9.967920541955392e-05, "loss": 0.0021, "step": 7936 }, { "epoch": 33.93, "learning_rate": 9.96778860192183e-05, "loss": 0.0035, "step": 7940 }, { "epoch": 33.95, "learning_rate": 9.967656391992647e-05, "loss": 0.0005, "step": 7944 }, { "epoch": 33.97, "learning_rate": 9.96752391217503e-05, "loss": 0.0016, "step": 7948 }, { "epoch": 33.98, "learning_rate": 9.967391162476174e-05, "loss": 0.0014, "step": 7952 }, { "epoch": 34.0, "learning_rate": 9.967258142903291e-05, "loss": 0.0031, "step": 7956 }, { "epoch": 34.02, "learning_rate": 9.967124853463608e-05, "loss": 0.0027, "step": 7960 }, { "epoch": 34.03, "learning_rate": 9.966991294164368e-05, "loss": 0.0011, "step": 7964 }, { "epoch": 34.05, "learning_rate": 9.966857465012827e-05, "loss": 0.0019, "step": 7968 }, { "epoch": 34.07, "learning_rate": 9.966723366016253e-05, "loss": 0.003, "step": 7972 }, { "epoch": 34.09, "learning_rate": 9.966588997181933e-05, "loss": 0.0013, "step": 7976 }, { "epoch": 34.1, "learning_rate": 9.96645435851717e-05, "loss": 0.002, "step": 7980 }, { "epoch": 34.12, "learning_rate": 9.966319450029274e-05, "loss": 0.0021, "step": 7984 }, { "epoch": 34.14, "learning_rate": 9.966184271725578e-05, "loss": 0.0033, "step": 7988 }, { "epoch": 34.15, "learning_rate": 9.966048823613425e-05, "loss": 0.001, "step": 7992 }, { "epoch": 34.17, "learning_rate": 9.965913105700173e-05, "loss": 0.0015, "step": 7996 }, { "epoch": 34.19, "learning_rate": 9.965777117993196e-05, "loss": 0.0013, "step": 8000 }, { "epoch": 34.19, "eval_exact_match": 0.5103950103950103, "eval_loss": 0.726246178150177, "eval_runtime": 130.3318, "eval_samples_per_second": 7.381, "step": 8000 }, { "epoch": 34.21, "learning_rate": 9.965640860499883e-05, "loss": 0.0044, "step": 8004 }, { "epoch": 34.22, "learning_rate": 9.965504333227636e-05, "loss": 0.0018, "step": 8008 }, { "epoch": 34.24, "learning_rate": 9.965367536183872e-05, "loss": 0.0017, "step": 8012 }, { "epoch": 34.26, "learning_rate": 9.965230469376024e-05, "loss": 0.002, "step": 8016 }, { "epoch": 34.27, "learning_rate": 9.965093132811539e-05, "loss": 0.0006, "step": 8020 }, { "epoch": 34.29, "learning_rate": 9.964955526497874e-05, "loss": 0.0014, "step": 8024 }, { "epoch": 34.31, "learning_rate": 9.964817650442512e-05, "loss": 0.0015, "step": 8028 }, { "epoch": 34.32, "learning_rate": 9.964679504652939e-05, "loss": 0.0038, "step": 8032 }, { "epoch": 34.34, "learning_rate": 9.964541089136661e-05, "loss": 0.0006, "step": 8036 }, { "epoch": 34.36, "learning_rate": 9.9644024039012e-05, "loss": 0.0024, "step": 8040 }, { "epoch": 34.38, "learning_rate": 9.964263448954087e-05, "loss": 0.0022, "step": 8044 }, { "epoch": 34.39, "learning_rate": 9.964124224302877e-05, "loss": 0.0045, "step": 8048 }, { "epoch": 34.41, "learning_rate": 9.963984729955128e-05, "loss": 0.0012, "step": 8052 }, { "epoch": 34.43, "learning_rate": 9.96384496591842e-05, "loss": 0.0018, "step": 8056 }, { "epoch": 34.44, "learning_rate": 9.963704932200351e-05, "loss": 0.0014, "step": 8060 }, { "epoch": 34.46, "learning_rate": 9.963564628808523e-05, "loss": 0.0019, "step": 8064 }, { "epoch": 34.48, "learning_rate": 9.963424055750561e-05, "loss": 0.0006, "step": 8068 }, { "epoch": 34.5, "learning_rate": 9.963283213034103e-05, "loss": 0.0013, "step": 8072 }, { "epoch": 34.51, "learning_rate": 9.963142100666799e-05, "loss": 0.0026, "step": 8076 }, { "epoch": 34.53, "learning_rate": 9.963000718656316e-05, "loss": 0.0021, "step": 8080 }, { "epoch": 34.55, "learning_rate": 9.962859067010335e-05, "loss": 0.0031, "step": 8084 }, { "epoch": 34.56, "learning_rate": 9.962717145736554e-05, "loss": 0.0012, "step": 8088 }, { "epoch": 34.58, "learning_rate": 9.962574954842682e-05, "loss": 0.0004, "step": 8092 }, { "epoch": 34.6, "learning_rate": 9.962432494336442e-05, "loss": 0.0014, "step": 8096 }, { "epoch": 34.62, "learning_rate": 9.962289764225579e-05, "loss": 0.0045, "step": 8100 }, { "epoch": 34.63, "learning_rate": 9.962146764517841e-05, "loss": 0.0006, "step": 8104 }, { "epoch": 34.65, "learning_rate": 9.962003495221002e-05, "loss": 0.0021, "step": 8108 }, { "epoch": 34.67, "learning_rate": 9.961859956342843e-05, "loss": 0.0015, "step": 8112 }, { "epoch": 34.68, "learning_rate": 9.961716147891163e-05, "loss": 0.0008, "step": 8116 }, { "epoch": 34.7, "learning_rate": 9.961572069873775e-05, "loss": 0.0037, "step": 8120 }, { "epoch": 34.72, "learning_rate": 9.961427722298507e-05, "loss": 0.0009, "step": 8124 }, { "epoch": 34.74, "learning_rate": 9.961283105173202e-05, "loss": 0.0023, "step": 8128 }, { "epoch": 34.75, "learning_rate": 9.961138218505714e-05, "loss": 0.0029, "step": 8132 }, { "epoch": 34.77, "learning_rate": 9.960993062303919e-05, "loss": 0.0008, "step": 8136 }, { "epoch": 34.79, "learning_rate": 9.960847636575699e-05, "loss": 0.0018, "step": 8140 }, { "epoch": 34.8, "learning_rate": 9.960701941328958e-05, "loss": 0.0014, "step": 8144 }, { "epoch": 34.82, "learning_rate": 9.96055597657161e-05, "loss": 0.0011, "step": 8148 }, { "epoch": 34.84, "learning_rate": 9.960409742311584e-05, "loss": 0.0009, "step": 8152 }, { "epoch": 34.85, "learning_rate": 9.960263238556828e-05, "loss": 0.0048, "step": 8156 }, { "epoch": 34.87, "learning_rate": 9.9601164653153e-05, "loss": 0.0022, "step": 8160 }, { "epoch": 34.89, "learning_rate": 9.95996942259497e-05, "loss": 0.0086, "step": 8164 }, { "epoch": 34.91, "learning_rate": 9.959822110403832e-05, "loss": 0.0022, "step": 8168 }, { "epoch": 34.92, "learning_rate": 9.959674528749888e-05, "loss": 0.0008, "step": 8172 }, { "epoch": 34.94, "learning_rate": 9.959526677641156e-05, "loss": 0.0015, "step": 8176 }, { "epoch": 34.96, "learning_rate": 9.959378557085668e-05, "loss": 0.0016, "step": 8180 }, { "epoch": 34.97, "learning_rate": 9.959230167091471e-05, "loss": 0.0005, "step": 8184 }, { "epoch": 34.99, "learning_rate": 9.959081507666626e-05, "loss": 0.0009, "step": 8188 }, { "epoch": 35.01, "learning_rate": 9.958932578819213e-05, "loss": 0.0011, "step": 8192 }, { "epoch": 35.03, "learning_rate": 9.95878338055732e-05, "loss": 0.0018, "step": 8196 }, { "epoch": 35.04, "learning_rate": 9.958633912889054e-05, "loss": 0.0006, "step": 8200 }, { "epoch": 35.06, "learning_rate": 9.958484175822535e-05, "loss": 0.0028, "step": 8204 }, { "epoch": 35.08, "learning_rate": 9.958334169365899e-05, "loss": 0.0011, "step": 8208 }, { "epoch": 35.09, "learning_rate": 9.958183893527296e-05, "loss": 0.0039, "step": 8212 }, { "epoch": 35.11, "learning_rate": 9.958033348314888e-05, "loss": 0.0019, "step": 8216 }, { "epoch": 35.13, "learning_rate": 9.957882533736856e-05, "loss": 0.0022, "step": 8220 }, { "epoch": 35.15, "learning_rate": 9.957731449801393e-05, "loss": 0.002, "step": 8224 }, { "epoch": 35.16, "learning_rate": 9.957580096516707e-05, "loss": 0.0025, "step": 8228 }, { "epoch": 35.18, "learning_rate": 9.957428473891023e-05, "loss": 0.0025, "step": 8232 }, { "epoch": 35.2, "learning_rate": 9.957276581932575e-05, "loss": 0.0018, "step": 8236 }, { "epoch": 35.21, "learning_rate": 9.957124420649617e-05, "loss": 0.0011, "step": 8240 }, { "epoch": 35.23, "learning_rate": 9.956971990050417e-05, "loss": 0.0028, "step": 8244 }, { "epoch": 35.25, "learning_rate": 9.956819290143255e-05, "loss": 0.0021, "step": 8248 }, { "epoch": 35.26, "learning_rate": 9.956666320936425e-05, "loss": 0.0012, "step": 8252 }, { "epoch": 35.28, "learning_rate": 9.956513082438243e-05, "loss": 0.0011, "step": 8256 }, { "epoch": 35.3, "learning_rate": 9.956359574657028e-05, "loss": 0.0027, "step": 8260 }, { "epoch": 35.32, "learning_rate": 9.956205797601126e-05, "loss": 0.0009, "step": 8264 }, { "epoch": 35.33, "learning_rate": 9.956051751278888e-05, "loss": 0.0021, "step": 8268 }, { "epoch": 35.35, "learning_rate": 9.955897435698683e-05, "loss": 0.0034, "step": 8272 }, { "epoch": 35.37, "learning_rate": 9.955742850868895e-05, "loss": 0.0011, "step": 8276 }, { "epoch": 35.38, "learning_rate": 9.955587996797925e-05, "loss": 0.0055, "step": 8280 }, { "epoch": 35.4, "learning_rate": 9.955432873494183e-05, "loss": 0.0007, "step": 8284 }, { "epoch": 35.42, "learning_rate": 9.955277480966098e-05, "loss": 0.002, "step": 8288 }, { "epoch": 35.44, "learning_rate": 9.955121819222112e-05, "loss": 0.0027, "step": 8292 }, { "epoch": 35.45, "learning_rate": 9.954965888270684e-05, "loss": 0.0018, "step": 8296 }, { "epoch": 35.47, "learning_rate": 9.954809688120284e-05, "loss": 0.0014, "step": 8300 }, { "epoch": 35.49, "learning_rate": 9.954653218779397e-05, "loss": 0.0024, "step": 8304 }, { "epoch": 35.5, "learning_rate": 9.954496480256525e-05, "loss": 0.0034, "step": 8308 }, { "epoch": 35.52, "learning_rate": 9.954339472560183e-05, "loss": 0.0037, "step": 8312 }, { "epoch": 35.54, "learning_rate": 9.954182195698902e-05, "loss": 0.0012, "step": 8316 }, { "epoch": 35.56, "learning_rate": 9.954024649681227e-05, "loss": 0.0008, "step": 8320 }, { "epoch": 35.57, "learning_rate": 9.953866834515717e-05, "loss": 0.0009, "step": 8324 }, { "epoch": 35.59, "learning_rate": 9.953708750210943e-05, "loss": 0.0021, "step": 8328 }, { "epoch": 35.61, "learning_rate": 9.953550396775498e-05, "loss": 0.0036, "step": 8332 }, { "epoch": 35.62, "learning_rate": 9.953391774217984e-05, "loss": 0.0013, "step": 8336 }, { "epoch": 35.64, "learning_rate": 9.953232882547017e-05, "loss": 0.0008, "step": 8340 }, { "epoch": 35.66, "learning_rate": 9.95307372177123e-05, "loss": 0.0011, "step": 8344 }, { "epoch": 35.68, "learning_rate": 9.952914291899271e-05, "loss": 0.0033, "step": 8348 }, { "epoch": 35.69, "learning_rate": 9.952754592939801e-05, "loss": 0.0012, "step": 8352 }, { "epoch": 35.71, "learning_rate": 9.952594624901498e-05, "loss": 0.0017, "step": 8356 }, { "epoch": 35.73, "learning_rate": 9.95243438779305e-05, "loss": 0.002, "step": 8360 }, { "epoch": 35.74, "learning_rate": 9.952273881623166e-05, "loss": 0.0019, "step": 8364 }, { "epoch": 35.76, "learning_rate": 9.952113106400562e-05, "loss": 0.0042, "step": 8368 }, { "epoch": 35.78, "learning_rate": 9.951952062133977e-05, "loss": 0.0017, "step": 8372 }, { "epoch": 35.79, "learning_rate": 9.951790748832158e-05, "loss": 0.0017, "step": 8376 }, { "epoch": 35.81, "learning_rate": 9.951629166503868e-05, "loss": 0.0014, "step": 8380 }, { "epoch": 35.83, "learning_rate": 9.95146731515789e-05, "loss": 0.0016, "step": 8384 }, { "epoch": 35.85, "learning_rate": 9.951305194803013e-05, "loss": 0.0005, "step": 8388 }, { "epoch": 35.86, "learning_rate": 9.951142805448046e-05, "loss": 0.0008, "step": 8392 }, { "epoch": 35.88, "learning_rate": 9.95098014710181e-05, "loss": 0.0029, "step": 8396 }, { "epoch": 35.9, "learning_rate": 9.950817219773145e-05, "loss": 0.0026, "step": 8400 }, { "epoch": 35.91, "learning_rate": 9.950654023470902e-05, "loss": 0.0044, "step": 8404 }, { "epoch": 35.93, "learning_rate": 9.950490558203947e-05, "loss": 0.0011, "step": 8408 }, { "epoch": 35.95, "learning_rate": 9.95032682398116e-05, "loss": 0.0013, "step": 8412 }, { "epoch": 35.97, "learning_rate": 9.950162820811438e-05, "loss": 0.0014, "step": 8416 }, { "epoch": 35.98, "learning_rate": 9.949998548703688e-05, "loss": 0.002, "step": 8420 }, { "epoch": 36.0, "learning_rate": 9.949834007666838e-05, "loss": 0.0011, "step": 8424 }, { "epoch": 36.02, "learning_rate": 9.949669197709827e-05, "loss": 0.0014, "step": 8428 }, { "epoch": 36.03, "learning_rate": 9.94950411884161e-05, "loss": 0.0015, "step": 8432 }, { "epoch": 36.05, "learning_rate": 9.949338771071152e-05, "loss": 0.0008, "step": 8436 }, { "epoch": 36.07, "learning_rate": 9.94917315440744e-05, "loss": 0.0014, "step": 8440 }, { "epoch": 36.09, "learning_rate": 9.94900726885947e-05, "loss": 0.001, "step": 8444 }, { "epoch": 36.1, "learning_rate": 9.948841114436253e-05, "loss": 0.0024, "step": 8448 }, { "epoch": 36.12, "learning_rate": 9.948674691146817e-05, "loss": 0.0012, "step": 8452 }, { "epoch": 36.14, "learning_rate": 9.948507999000205e-05, "loss": 0.0011, "step": 8456 }, { "epoch": 36.15, "learning_rate": 9.948341038005474e-05, "loss": 0.0014, "step": 8460 }, { "epoch": 36.17, "learning_rate": 9.948173808171691e-05, "loss": 0.0017, "step": 8464 }, { "epoch": 36.19, "learning_rate": 9.948006309507944e-05, "loss": 0.0021, "step": 8468 }, { "epoch": 36.21, "learning_rate": 9.947838542023334e-05, "loss": 0.0021, "step": 8472 }, { "epoch": 36.22, "learning_rate": 9.947670505726974e-05, "loss": 0.0008, "step": 8476 }, { "epoch": 36.24, "learning_rate": 9.947502200627994e-05, "loss": 0.0036, "step": 8480 }, { "epoch": 36.26, "learning_rate": 9.947333626735535e-05, "loss": 0.0006, "step": 8484 }, { "epoch": 36.27, "learning_rate": 9.947164784058761e-05, "loss": 0.0019, "step": 8488 }, { "epoch": 36.29, "learning_rate": 9.94699567260684e-05, "loss": 0.0004, "step": 8492 }, { "epoch": 36.31, "learning_rate": 9.946826292388964e-05, "loss": 0.0028, "step": 8496 }, { "epoch": 36.32, "learning_rate": 9.946656643414331e-05, "loss": 0.0039, "step": 8500 }, { "epoch": 36.34, "learning_rate": 9.946486725692161e-05, "loss": 0.0011, "step": 8504 }, { "epoch": 36.36, "learning_rate": 9.946316539231683e-05, "loss": 0.0007, "step": 8508 }, { "epoch": 36.38, "learning_rate": 9.946146084042148e-05, "loss": 0.0022, "step": 8512 }, { "epoch": 36.39, "learning_rate": 9.94597536013281e-05, "loss": 0.0009, "step": 8516 }, { "epoch": 36.41, "learning_rate": 9.945804367512948e-05, "loss": 0.0015, "step": 8520 }, { "epoch": 36.43, "learning_rate": 9.945633106191851e-05, "loss": 0.001, "step": 8524 }, { "epoch": 36.44, "learning_rate": 9.945461576178825e-05, "loss": 0.0004, "step": 8528 }, { "epoch": 36.46, "learning_rate": 9.945289777483186e-05, "loss": 0.0011, "step": 8532 }, { "epoch": 36.48, "learning_rate": 9.945117710114272e-05, "loss": 0.002, "step": 8536 }, { "epoch": 36.5, "learning_rate": 9.944945374081427e-05, "loss": 0.0013, "step": 8540 }, { "epoch": 36.51, "learning_rate": 9.944772769394018e-05, "loss": 0.0008, "step": 8544 }, { "epoch": 36.53, "learning_rate": 9.944599896061417e-05, "loss": 0.0004, "step": 8548 }, { "epoch": 36.55, "learning_rate": 9.944426754093021e-05, "loss": 0.0007, "step": 8552 }, { "epoch": 36.56, "learning_rate": 9.944253343498234e-05, "loss": 0.0017, "step": 8556 }, { "epoch": 36.58, "learning_rate": 9.944079664286479e-05, "loss": 0.0013, "step": 8560 }, { "epoch": 36.6, "learning_rate": 9.94390571646719e-05, "loss": 0.0013, "step": 8564 }, { "epoch": 36.62, "learning_rate": 9.94373150004982e-05, "loss": 0.0007, "step": 8568 }, { "epoch": 36.63, "learning_rate": 9.943557015043832e-05, "loss": 0.0038, "step": 8572 }, { "epoch": 36.65, "learning_rate": 9.943382261458705e-05, "loss": 0.0022, "step": 8576 }, { "epoch": 36.67, "learning_rate": 9.943207239303935e-05, "loss": 0.0011, "step": 8580 }, { "epoch": 36.68, "learning_rate": 9.943031948589029e-05, "loss": 0.0009, "step": 8584 }, { "epoch": 36.7, "learning_rate": 9.942856389323512e-05, "loss": 0.0012, "step": 8588 }, { "epoch": 36.72, "learning_rate": 9.942680561516921e-05, "loss": 0.0004, "step": 8592 }, { "epoch": 36.74, "learning_rate": 9.94250446517881e-05, "loss": 0.001, "step": 8596 }, { "epoch": 36.75, "learning_rate": 9.942328100318745e-05, "loss": 0.0008, "step": 8600 }, { "epoch": 36.77, "learning_rate": 9.942151466946308e-05, "loss": 0.0009, "step": 8604 }, { "epoch": 36.79, "learning_rate": 9.941974565071094e-05, "loss": 0.0004, "step": 8608 }, { "epoch": 36.8, "learning_rate": 9.941797394702717e-05, "loss": 0.0011, "step": 8612 }, { "epoch": 36.82, "learning_rate": 9.941619955850798e-05, "loss": 0.0014, "step": 8616 }, { "epoch": 36.84, "learning_rate": 9.941442248524981e-05, "loss": 0.0029, "step": 8620 }, { "epoch": 36.85, "learning_rate": 9.94126427273492e-05, "loss": 0.0015, "step": 8624 }, { "epoch": 36.87, "learning_rate": 9.941086028490284e-05, "loss": 0.0011, "step": 8628 }, { "epoch": 36.89, "learning_rate": 9.940907515800755e-05, "loss": 0.0017, "step": 8632 }, { "epoch": 36.91, "learning_rate": 9.940728734676035e-05, "loss": 0.0012, "step": 8636 }, { "epoch": 36.92, "learning_rate": 9.940549685125834e-05, "loss": 0.0006, "step": 8640 }, { "epoch": 36.94, "learning_rate": 9.940370367159881e-05, "loss": 0.0022, "step": 8644 }, { "epoch": 36.96, "learning_rate": 9.940190780787917e-05, "loss": 0.0028, "step": 8648 }, { "epoch": 36.97, "learning_rate": 9.9400109260197e-05, "loss": 0.0019, "step": 8652 }, { "epoch": 36.99, "learning_rate": 9.939830802865002e-05, "loss": 0.002, "step": 8656 }, { "epoch": 37.01, "learning_rate": 9.939650411333606e-05, "loss": 0.0009, "step": 8660 }, { "epoch": 37.03, "learning_rate": 9.939469751435315e-05, "loss": 0.0087, "step": 8664 }, { "epoch": 37.04, "learning_rate": 9.939288823179945e-05, "loss": 0.0025, "step": 8668 }, { "epoch": 37.06, "learning_rate": 9.939107626577324e-05, "loss": 0.001, "step": 8672 }, { "epoch": 37.08, "learning_rate": 9.938926161637294e-05, "loss": 0.0009, "step": 8676 }, { "epoch": 37.09, "learning_rate": 9.938744428369717e-05, "loss": 0.0068, "step": 8680 }, { "epoch": 37.11, "learning_rate": 9.938562426784468e-05, "loss": 0.0018, "step": 8684 }, { "epoch": 37.13, "learning_rate": 9.93838015689143e-05, "loss": 0.0011, "step": 8688 }, { "epoch": 37.15, "learning_rate": 9.93819761870051e-05, "loss": 0.0017, "step": 8692 }, { "epoch": 37.16, "learning_rate": 9.938014812221622e-05, "loss": 0.0017, "step": 8696 }, { "epoch": 37.18, "learning_rate": 9.9378317374647e-05, "loss": 0.0018, "step": 8700 }, { "epoch": 37.2, "learning_rate": 9.937648394439689e-05, "loss": 0.0014, "step": 8704 }, { "epoch": 37.21, "learning_rate": 9.937464783156549e-05, "loss": 0.0024, "step": 8708 }, { "epoch": 37.23, "learning_rate": 9.937280903625259e-05, "loss": 0.0018, "step": 8712 }, { "epoch": 37.25, "learning_rate": 9.937096755855806e-05, "loss": 0.0004, "step": 8716 }, { "epoch": 37.26, "learning_rate": 9.936912339858194e-05, "loss": 0.0026, "step": 8720 }, { "epoch": 37.28, "learning_rate": 9.936727655642443e-05, "loss": 0.0006, "step": 8724 }, { "epoch": 37.3, "learning_rate": 9.936542703218589e-05, "loss": 0.0003, "step": 8728 }, { "epoch": 37.32, "learning_rate": 9.936357482596679e-05, "loss": 0.0018, "step": 8732 }, { "epoch": 37.33, "learning_rate": 9.936171993786773e-05, "loss": 0.001, "step": 8736 }, { "epoch": 37.35, "learning_rate": 9.935986236798953e-05, "loss": 0.0009, "step": 8740 }, { "epoch": 37.37, "learning_rate": 9.935800211643307e-05, "loss": 0.0017, "step": 8744 }, { "epoch": 37.38, "learning_rate": 9.935613918329945e-05, "loss": 0.0028, "step": 8748 }, { "epoch": 37.4, "learning_rate": 9.935427356868985e-05, "loss": 0.0015, "step": 8752 }, { "epoch": 37.42, "learning_rate": 9.935240527270566e-05, "loss": 0.0004, "step": 8756 }, { "epoch": 37.44, "learning_rate": 9.935053429544835e-05, "loss": 0.0015, "step": 8760 }, { "epoch": 37.45, "learning_rate": 9.93486606370196e-05, "loss": 0.001, "step": 8764 }, { "epoch": 37.47, "learning_rate": 9.934678429752117e-05, "loss": 0.0014, "step": 8768 }, { "epoch": 37.49, "learning_rate": 9.934490527705502e-05, "loss": 0.0007, "step": 8772 }, { "epoch": 37.5, "learning_rate": 9.934302357572324e-05, "loss": 0.0042, "step": 8776 }, { "epoch": 37.52, "learning_rate": 9.934113919362807e-05, "loss": 0.0016, "step": 8780 }, { "epoch": 37.54, "learning_rate": 9.933925213087184e-05, "loss": 0.002, "step": 8784 }, { "epoch": 37.56, "learning_rate": 9.933736238755714e-05, "loss": 0.0013, "step": 8788 }, { "epoch": 37.57, "learning_rate": 9.933546996378657e-05, "loss": 0.0036, "step": 8792 }, { "epoch": 37.59, "learning_rate": 9.9333574859663e-05, "loss": 0.002, "step": 8796 }, { "epoch": 37.61, "learning_rate": 9.933167707528934e-05, "loss": 0.0006, "step": 8800 }, { "epoch": 37.62, "learning_rate": 9.932977661076873e-05, "loss": 0.0012, "step": 8804 }, { "epoch": 37.64, "learning_rate": 9.932787346620443e-05, "loss": 0.0007, "step": 8808 }, { "epoch": 37.66, "learning_rate": 9.932596764169979e-05, "loss": 0.001, "step": 8812 }, { "epoch": 37.68, "learning_rate": 9.93240591373584e-05, "loss": 0.0026, "step": 8816 }, { "epoch": 37.69, "learning_rate": 9.932214795328391e-05, "loss": 0.0009, "step": 8820 }, { "epoch": 37.71, "learning_rate": 9.932023408958018e-05, "loss": 0.0014, "step": 8824 }, { "epoch": 37.73, "learning_rate": 9.931831754635117e-05, "loss": 0.0003, "step": 8828 }, { "epoch": 37.74, "learning_rate": 9.9316398323701e-05, "loss": 0.0014, "step": 8832 }, { "epoch": 37.76, "learning_rate": 9.931447642173398e-05, "loss": 0.0021, "step": 8836 }, { "epoch": 37.78, "learning_rate": 9.93125518405545e-05, "loss": 0.0007, "step": 8840 }, { "epoch": 37.79, "learning_rate": 9.93106245802671e-05, "loss": 0.001, "step": 8844 }, { "epoch": 37.81, "learning_rate": 9.93086946409765e-05, "loss": 0.0007, "step": 8848 }, { "epoch": 37.83, "learning_rate": 9.930676202278756e-05, "loss": 0.0011, "step": 8852 }, { "epoch": 37.85, "learning_rate": 9.930482672580528e-05, "loss": 0.0006, "step": 8856 }, { "epoch": 37.86, "learning_rate": 9.930288875013479e-05, "loss": 0.0017, "step": 8860 }, { "epoch": 37.88, "learning_rate": 9.930094809588139e-05, "loss": 0.0027, "step": 8864 }, { "epoch": 37.9, "learning_rate": 9.92990047631505e-05, "loss": 0.0022, "step": 8868 }, { "epoch": 37.91, "learning_rate": 9.929705875204771e-05, "loss": 0.0028, "step": 8872 }, { "epoch": 37.93, "learning_rate": 9.929511006267876e-05, "loss": 0.0015, "step": 8876 }, { "epoch": 37.95, "learning_rate": 9.92931586951495e-05, "loss": 0.0009, "step": 8880 }, { "epoch": 37.97, "learning_rate": 9.929120464956594e-05, "loss": 0.0012, "step": 8884 }, { "epoch": 37.98, "learning_rate": 9.928924792603424e-05, "loss": 0.0008, "step": 8888 }, { "epoch": 38.0, "learning_rate": 9.928728852466073e-05, "loss": 0.0062, "step": 8892 }, { "epoch": 38.02, "learning_rate": 9.928532644555186e-05, "loss": 0.0005, "step": 8896 }, { "epoch": 38.03, "learning_rate": 9.928336168881423e-05, "loss": 0.0003, "step": 8900 }, { "epoch": 38.05, "learning_rate": 9.928139425455455e-05, "loss": 0.0015, "step": 8904 }, { "epoch": 38.07, "learning_rate": 9.927942414287974e-05, "loss": 0.0005, "step": 8908 }, { "epoch": 38.09, "learning_rate": 9.927745135389684e-05, "loss": 0.0007, "step": 8912 }, { "epoch": 38.1, "learning_rate": 9.927547588771299e-05, "loss": 0.0008, "step": 8916 }, { "epoch": 38.12, "learning_rate": 9.927349774443555e-05, "loss": 0.0008, "step": 8920 }, { "epoch": 38.14, "learning_rate": 9.9271516924172e-05, "loss": 0.0013, "step": 8924 }, { "epoch": 38.15, "learning_rate": 9.926953342702991e-05, "loss": 0.0017, "step": 8928 }, { "epoch": 38.17, "learning_rate": 9.926754725311709e-05, "loss": 0.0021, "step": 8932 }, { "epoch": 38.19, "learning_rate": 9.926555840254144e-05, "loss": 0.0008, "step": 8936 }, { "epoch": 38.21, "learning_rate": 9.926356687541098e-05, "loss": 0.0002, "step": 8940 }, { "epoch": 38.22, "learning_rate": 9.926157267183394e-05, "loss": 0.0037, "step": 8944 }, { "epoch": 38.24, "learning_rate": 9.925957579191863e-05, "loss": 0.0024, "step": 8948 }, { "epoch": 38.26, "learning_rate": 9.925757623577358e-05, "loss": 0.001, "step": 8952 }, { "epoch": 38.27, "learning_rate": 9.92555740035074e-05, "loss": 0.0007, "step": 8956 }, { "epoch": 38.29, "learning_rate": 9.925356909522887e-05, "loss": 0.0007, "step": 8960 }, { "epoch": 38.31, "learning_rate": 9.925156151104692e-05, "loss": 0.0011, "step": 8964 }, { "epoch": 38.32, "learning_rate": 9.924955125107063e-05, "loss": 0.0011, "step": 8968 }, { "epoch": 38.34, "learning_rate": 9.924753831540918e-05, "loss": 0.0019, "step": 8972 }, { "epoch": 38.36, "learning_rate": 9.924552270417198e-05, "loss": 0.0008, "step": 8976 }, { "epoch": 38.38, "learning_rate": 9.92435044174685e-05, "loss": 0.0014, "step": 8980 }, { "epoch": 38.39, "learning_rate": 9.92414834554084e-05, "loss": 0.0024, "step": 8984 }, { "epoch": 38.41, "learning_rate": 9.92394598181015e-05, "loss": 0.0007, "step": 8988 }, { "epoch": 38.43, "learning_rate": 9.923743350565771e-05, "loss": 0.0006, "step": 8992 }, { "epoch": 38.44, "learning_rate": 9.923540451818713e-05, "loss": 0.0015, "step": 8996 }, { "epoch": 38.46, "learning_rate": 9.923337285579999e-05, "loss": 0.0006, "step": 9000 }, { "epoch": 38.46, "eval_exact_match": 0.501039501039501, "eval_loss": 0.7777053117752075, "eval_runtime": 134.1347, "eval_samples_per_second": 7.172, "step": 9000 }, { "epoch": 38.48, "learning_rate": 9.923133851860669e-05, "loss": 0.0006, "step": 9004 }, { "epoch": 38.5, "learning_rate": 9.922930150671773e-05, "loss": 0.0008, "step": 9008 }, { "epoch": 38.51, "learning_rate": 9.922726182024378e-05, "loss": 0.0009, "step": 9012 }, { "epoch": 38.53, "learning_rate": 9.922521945929567e-05, "loss": 0.0014, "step": 9016 }, { "epoch": 38.55, "learning_rate": 9.922317442398433e-05, "loss": 0.0031, "step": 9020 }, { "epoch": 38.56, "learning_rate": 9.92211267144209e-05, "loss": 0.0017, "step": 9024 }, { "epoch": 38.58, "learning_rate": 9.921907633071661e-05, "loss": 0.0018, "step": 9028 }, { "epoch": 38.6, "learning_rate": 9.921702327298286e-05, "loss": 0.001, "step": 9032 }, { "epoch": 38.62, "learning_rate": 9.92149675413312e-05, "loss": 0.0011, "step": 9036 }, { "epoch": 38.63, "learning_rate": 9.921290913587328e-05, "loss": 0.0012, "step": 9040 }, { "epoch": 38.65, "learning_rate": 9.921084805672098e-05, "loss": 0.0023, "step": 9044 }, { "epoch": 38.67, "learning_rate": 9.920878430398626e-05, "loss": 0.0031, "step": 9048 }, { "epoch": 38.68, "learning_rate": 9.920671787778124e-05, "loss": 0.0007, "step": 9052 }, { "epoch": 38.7, "learning_rate": 9.920464877821818e-05, "loss": 0.0012, "step": 9056 }, { "epoch": 38.72, "learning_rate": 9.920257700540949e-05, "loss": 0.0025, "step": 9060 }, { "epoch": 38.74, "learning_rate": 9.920050255946774e-05, "loss": 0.0034, "step": 9064 }, { "epoch": 38.75, "learning_rate": 9.919842544050561e-05, "loss": 0.0022, "step": 9068 }, { "epoch": 38.77, "learning_rate": 9.919634564863599e-05, "loss": 0.0008, "step": 9072 }, { "epoch": 38.79, "learning_rate": 9.919426318397185e-05, "loss": 0.0012, "step": 9076 }, { "epoch": 38.8, "learning_rate": 9.91921780466263e-05, "loss": 0.0018, "step": 9080 }, { "epoch": 38.82, "learning_rate": 9.919009023671268e-05, "loss": 0.0012, "step": 9084 }, { "epoch": 38.84, "learning_rate": 9.918799975434436e-05, "loss": 0.0006, "step": 9088 }, { "epoch": 38.85, "learning_rate": 9.918590659963497e-05, "loss": 0.0011, "step": 9092 }, { "epoch": 38.87, "learning_rate": 9.91838107726982e-05, "loss": 0.0006, "step": 9096 }, { "epoch": 38.89, "learning_rate": 9.918171227364791e-05, "loss": 0.0004, "step": 9100 }, { "epoch": 38.91, "learning_rate": 9.917961110259811e-05, "loss": 0.0009, "step": 9104 }, { "epoch": 38.92, "learning_rate": 9.917750725966297e-05, "loss": 0.0012, "step": 9108 }, { "epoch": 38.94, "learning_rate": 9.917540074495679e-05, "loss": 0.0006, "step": 9112 }, { "epoch": 38.96, "learning_rate": 9.917329155859401e-05, "loss": 0.0015, "step": 9116 }, { "epoch": 38.97, "learning_rate": 9.917117970068919e-05, "loss": 0.0003, "step": 9120 }, { "epoch": 38.99, "learning_rate": 9.91690651713571e-05, "loss": 0.002, "step": 9124 }, { "epoch": 39.01, "learning_rate": 9.916694797071265e-05, "loss": 0.0006, "step": 9128 }, { "epoch": 39.03, "learning_rate": 9.91648280988708e-05, "loss": 0.0005, "step": 9132 }, { "epoch": 39.04, "learning_rate": 9.916270555594674e-05, "loss": 0.0003, "step": 9136 }, { "epoch": 39.06, "learning_rate": 9.91605803420558e-05, "loss": 0.0011, "step": 9140 }, { "epoch": 39.08, "learning_rate": 9.915845245731345e-05, "loss": 0.0024, "step": 9144 }, { "epoch": 39.09, "learning_rate": 9.915632190183528e-05, "loss": 0.0017, "step": 9148 }, { "epoch": 39.11, "learning_rate": 9.915418867573703e-05, "loss": 0.001, "step": 9152 }, { "epoch": 39.13, "learning_rate": 9.915205277913462e-05, "loss": 0.0003, "step": 9156 }, { "epoch": 39.15, "learning_rate": 9.914991421214408e-05, "loss": 0.0006, "step": 9160 }, { "epoch": 39.16, "learning_rate": 9.91477729748816e-05, "loss": 0.0002, "step": 9164 }, { "epoch": 39.18, "learning_rate": 9.914562906746353e-05, "loss": 0.0021, "step": 9168 }, { "epoch": 39.2, "learning_rate": 9.914348249000629e-05, "loss": 0.0006, "step": 9172 }, { "epoch": 39.21, "learning_rate": 9.914133324262656e-05, "loss": 0.001, "step": 9176 }, { "epoch": 39.23, "learning_rate": 9.913918132544107e-05, "loss": 0.002, "step": 9180 }, { "epoch": 39.25, "learning_rate": 9.913702673856675e-05, "loss": 0.0009, "step": 9184 }, { "epoch": 39.26, "learning_rate": 9.913486948212066e-05, "loss": 0.0021, "step": 9188 }, { "epoch": 39.28, "learning_rate": 9.913270955622e-05, "loss": 0.0021, "step": 9192 }, { "epoch": 39.3, "learning_rate": 9.91305469609821e-05, "loss": 0.0012, "step": 9196 }, { "epoch": 39.32, "learning_rate": 9.912838169652446e-05, "loss": 0.0025, "step": 9200 }, { "epoch": 39.33, "learning_rate": 9.912621376296474e-05, "loss": 0.001, "step": 9204 }, { "epoch": 39.35, "learning_rate": 9.91240431604207e-05, "loss": 0.0045, "step": 9208 }, { "epoch": 39.37, "learning_rate": 9.912186988901025e-05, "loss": 0.0017, "step": 9212 }, { "epoch": 39.38, "learning_rate": 9.91196939488515e-05, "loss": 0.0008, "step": 9216 }, { "epoch": 39.4, "learning_rate": 9.911751534006264e-05, "loss": 0.0014, "step": 9220 }, { "epoch": 39.42, "learning_rate": 9.911533406276203e-05, "loss": 0.001, "step": 9224 }, { "epoch": 39.44, "learning_rate": 9.911315011706822e-05, "loss": 0.0025, "step": 9228 }, { "epoch": 39.45, "learning_rate": 9.911096350309979e-05, "loss": 0.003, "step": 9232 }, { "epoch": 39.47, "learning_rate": 9.910877422097559e-05, "loss": 0.0009, "step": 9236 }, { "epoch": 39.49, "learning_rate": 9.910658227081457e-05, "loss": 0.0009, "step": 9240 }, { "epoch": 39.5, "learning_rate": 9.910438765273578e-05, "loss": 0.0023, "step": 9244 }, { "epoch": 39.52, "learning_rate": 9.910219036685845e-05, "loss": 0.0013, "step": 9248 }, { "epoch": 39.54, "learning_rate": 9.9099990413302e-05, "loss": 0.002, "step": 9252 }, { "epoch": 39.56, "learning_rate": 9.90977877921859e-05, "loss": 0.0004, "step": 9256 }, { "epoch": 39.57, "learning_rate": 9.909558250362986e-05, "loss": 0.0003, "step": 9260 }, { "epoch": 39.59, "learning_rate": 9.909337454775367e-05, "loss": 0.0011, "step": 9264 }, { "epoch": 39.61, "learning_rate": 9.909116392467728e-05, "loss": 0.0019, "step": 9268 }, { "epoch": 39.62, "learning_rate": 9.908895063452079e-05, "loss": 0.0007, "step": 9272 }, { "epoch": 39.64, "learning_rate": 9.908673467740448e-05, "loss": 0.001, "step": 9276 }, { "epoch": 39.66, "learning_rate": 9.90845160534487e-05, "loss": 0.0012, "step": 9280 }, { "epoch": 39.68, "learning_rate": 9.908229476277401e-05, "loss": 0.0009, "step": 9284 }, { "epoch": 39.69, "learning_rate": 9.908007080550109e-05, "loss": 0.0016, "step": 9288 }, { "epoch": 39.71, "learning_rate": 9.907784418175075e-05, "loss": 0.0013, "step": 9292 }, { "epoch": 39.73, "learning_rate": 9.907561489164397e-05, "loss": 0.002, "step": 9296 }, { "epoch": 39.74, "learning_rate": 9.907338293530187e-05, "loss": 0.0015, "step": 9300 }, { "epoch": 39.76, "learning_rate": 9.90711483128457e-05, "loss": 0.0024, "step": 9304 }, { "epoch": 39.78, "learning_rate": 9.906891102439688e-05, "loss": 0.0007, "step": 9308 }, { "epoch": 39.79, "learning_rate": 9.906667107007694e-05, "loss": 0.0021, "step": 9312 }, { "epoch": 39.81, "learning_rate": 9.90644284500076e-05, "loss": 0.0008, "step": 9316 }, { "epoch": 39.83, "learning_rate": 9.906218316431066e-05, "loss": 0.0034, "step": 9320 }, { "epoch": 39.85, "learning_rate": 9.905993521310816e-05, "loss": 0.0005, "step": 9324 }, { "epoch": 39.86, "learning_rate": 9.905768459652218e-05, "loss": 0.0036, "step": 9328 }, { "epoch": 39.88, "learning_rate": 9.905543131467501e-05, "loss": 0.0008, "step": 9332 }, { "epoch": 39.9, "learning_rate": 9.905317536768908e-05, "loss": 0.0011, "step": 9336 }, { "epoch": 39.91, "learning_rate": 9.905091675568694e-05, "loss": 0.0012, "step": 9340 }, { "epoch": 39.93, "learning_rate": 9.904865547879131e-05, "loss": 0.0009, "step": 9344 }, { "epoch": 39.95, "learning_rate": 9.904639153712503e-05, "loss": 0.0006, "step": 9348 }, { "epoch": 39.97, "learning_rate": 9.90441249308111e-05, "loss": 0.002, "step": 9352 }, { "epoch": 39.98, "learning_rate": 9.904185565997269e-05, "loss": 0.0036, "step": 9356 }, { "epoch": 40.0, "learning_rate": 9.903958372473303e-05, "loss": 0.0006, "step": 9360 }, { "epoch": 40.02, "learning_rate": 9.903730912521561e-05, "loss": 0.001, "step": 9364 }, { "epoch": 40.03, "learning_rate": 9.903503186154399e-05, "loss": 0.0019, "step": 9368 }, { "epoch": 40.05, "learning_rate": 9.903275193384188e-05, "loss": 0.0008, "step": 9372 }, { "epoch": 40.07, "learning_rate": 9.903046934223315e-05, "loss": 0.002, "step": 9376 }, { "epoch": 40.09, "learning_rate": 9.90281840868418e-05, "loss": 0.0023, "step": 9380 }, { "epoch": 40.1, "learning_rate": 9.902589616779201e-05, "loss": 0.0011, "step": 9384 }, { "epoch": 40.12, "learning_rate": 9.902360558520807e-05, "loss": 0.0016, "step": 9388 }, { "epoch": 40.14, "learning_rate": 9.902131233921443e-05, "loss": 0.0041, "step": 9392 }, { "epoch": 40.15, "learning_rate": 9.901901642993567e-05, "loss": 0.0029, "step": 9396 }, { "epoch": 40.17, "learning_rate": 9.901671785749654e-05, "loss": 0.0011, "step": 9400 }, { "epoch": 40.19, "learning_rate": 9.90144166220219e-05, "loss": 0.001, "step": 9404 }, { "epoch": 40.21, "learning_rate": 9.901211272363679e-05, "loss": 0.0007, "step": 9408 }, { "epoch": 40.22, "learning_rate": 9.900980616246637e-05, "loss": 0.0021, "step": 9412 }, { "epoch": 40.24, "learning_rate": 9.900749693863596e-05, "loss": 0.0009, "step": 9416 }, { "epoch": 40.26, "learning_rate": 9.900518505227101e-05, "loss": 0.0004, "step": 9420 }, { "epoch": 40.27, "learning_rate": 9.900287050349712e-05, "loss": 0.001, "step": 9424 }, { "epoch": 40.29, "learning_rate": 9.900055329244004e-05, "loss": 0.0022, "step": 9428 }, { "epoch": 40.31, "learning_rate": 9.89982334192257e-05, "loss": 0.0014, "step": 9432 }, { "epoch": 40.32, "learning_rate": 9.899591088398007e-05, "loss": 0.0029, "step": 9436 }, { "epoch": 40.34, "learning_rate": 9.899358568682938e-05, "loss": 0.0004, "step": 9440 }, { "epoch": 40.36, "learning_rate": 9.899125782789993e-05, "loss": 0.0006, "step": 9444 }, { "epoch": 40.38, "learning_rate": 9.898892730731821e-05, "loss": 0.0019, "step": 9448 }, { "epoch": 40.39, "learning_rate": 9.898659412521082e-05, "loss": 0.0016, "step": 9452 }, { "epoch": 40.41, "learning_rate": 9.898425828170452e-05, "loss": 0.0029, "step": 9456 }, { "epoch": 40.43, "learning_rate": 9.898191977692623e-05, "loss": 0.0018, "step": 9460 }, { "epoch": 40.44, "learning_rate": 9.8979578611003e-05, "loss": 0.0005, "step": 9464 }, { "epoch": 40.46, "learning_rate": 9.8977234784062e-05, "loss": 0.0007, "step": 9468 }, { "epoch": 40.48, "learning_rate": 9.897488829623059e-05, "loss": 0.0022, "step": 9472 }, { "epoch": 40.5, "learning_rate": 9.897253914763622e-05, "loss": 0.0034, "step": 9476 }, { "epoch": 40.51, "learning_rate": 9.897018733840656e-05, "loss": 0.0019, "step": 9480 }, { "epoch": 40.53, "learning_rate": 9.896783286866935e-05, "loss": 0.0009, "step": 9484 }, { "epoch": 40.55, "learning_rate": 9.896547573855253e-05, "loss": 0.0006, "step": 9488 }, { "epoch": 40.56, "learning_rate": 9.896311594818415e-05, "loss": 0.0004, "step": 9492 }, { "epoch": 40.58, "learning_rate": 9.896075349769241e-05, "loss": 0.0038, "step": 9496 }, { "epoch": 40.6, "learning_rate": 9.895838838720565e-05, "loss": 0.0023, "step": 9500 }, { "epoch": 40.62, "learning_rate": 9.89560206168524e-05, "loss": 0.0005, "step": 9504 }, { "epoch": 40.63, "learning_rate": 9.895365018676128e-05, "loss": 0.0008, "step": 9508 }, { "epoch": 40.65, "learning_rate": 9.895127709706106e-05, "loss": 0.0016, "step": 9512 }, { "epoch": 40.67, "learning_rate": 9.894890134788068e-05, "loss": 0.0013, "step": 9516 }, { "epoch": 40.68, "learning_rate": 9.894652293934921e-05, "loss": 0.0011, "step": 9520 }, { "epoch": 40.7, "learning_rate": 9.894414187159587e-05, "loss": 0.0006, "step": 9524 }, { "epoch": 40.72, "learning_rate": 9.894175814475002e-05, "loss": 0.0008, "step": 9528 }, { "epoch": 40.74, "learning_rate": 9.893937175894117e-05, "loss": 0.0008, "step": 9532 }, { "epoch": 40.75, "learning_rate": 9.893698271429896e-05, "loss": 0.001, "step": 9536 }, { "epoch": 40.77, "learning_rate": 9.893459101095319e-05, "loss": 0.0006, "step": 9540 }, { "epoch": 40.79, "learning_rate": 9.893219664903382e-05, "loss": 0.0009, "step": 9544 }, { "epoch": 40.8, "learning_rate": 9.892979962867088e-05, "loss": 0.0005, "step": 9548 }, { "epoch": 40.82, "learning_rate": 9.892739994999466e-05, "loss": 0.0013, "step": 9552 }, { "epoch": 40.84, "learning_rate": 9.89249976131355e-05, "loss": 0.0012, "step": 9556 }, { "epoch": 40.85, "learning_rate": 9.89225926182239e-05, "loss": 0.0007, "step": 9560 }, { "epoch": 40.87, "learning_rate": 9.892018496539055e-05, "loss": 0.0019, "step": 9564 }, { "epoch": 40.89, "learning_rate": 9.891777465476627e-05, "loss": 0.0026, "step": 9568 }, { "epoch": 40.91, "learning_rate": 9.891536168648195e-05, "loss": 0.0009, "step": 9572 }, { "epoch": 40.92, "learning_rate": 9.891294606066875e-05, "loss": 0.0008, "step": 9576 }, { "epoch": 40.94, "learning_rate": 9.891052777745786e-05, "loss": 0.0007, "step": 9580 }, { "epoch": 40.96, "learning_rate": 9.89081068369807e-05, "loss": 0.0015, "step": 9584 }, { "epoch": 40.97, "learning_rate": 9.890568323936877e-05, "loss": 0.0007, "step": 9588 }, { "epoch": 40.99, "learning_rate": 9.890325698475377e-05, "loss": 0.0007, "step": 9592 }, { "epoch": 41.01, "learning_rate": 9.89008280732675e-05, "loss": 0.0029, "step": 9596 }, { "epoch": 41.03, "learning_rate": 9.889839650504191e-05, "loss": 0.0007, "step": 9600 }, { "epoch": 41.04, "learning_rate": 9.889596228020912e-05, "loss": 0.0013, "step": 9604 }, { "epoch": 41.06, "learning_rate": 9.889352539890136e-05, "loss": 0.0062, "step": 9608 }, { "epoch": 41.08, "learning_rate": 9.889108586125105e-05, "loss": 0.0003, "step": 9612 }, { "epoch": 41.09, "learning_rate": 9.888864366739073e-05, "loss": 0.001, "step": 9616 }, { "epoch": 41.11, "learning_rate": 9.888619881745305e-05, "loss": 0.0005, "step": 9620 }, { "epoch": 41.13, "learning_rate": 9.888375131157085e-05, "loss": 0.0016, "step": 9624 }, { "epoch": 41.15, "learning_rate": 9.888130114987712e-05, "loss": 0.0013, "step": 9628 }, { "epoch": 41.16, "learning_rate": 9.887884833250496e-05, "loss": 0.0008, "step": 9632 }, { "epoch": 41.18, "learning_rate": 9.887639285958763e-05, "loss": 0.0025, "step": 9636 }, { "epoch": 41.2, "learning_rate": 9.887393473125854e-05, "loss": 0.001, "step": 9640 }, { "epoch": 41.21, "learning_rate": 9.887147394765122e-05, "loss": 0.0007, "step": 9644 }, { "epoch": 41.23, "learning_rate": 9.886901050889938e-05, "loss": 0.0018, "step": 9648 }, { "epoch": 41.25, "learning_rate": 9.886654441513686e-05, "loss": 0.0007, "step": 9652 }, { "epoch": 41.26, "learning_rate": 9.886407566649762e-05, "loss": 0.0013, "step": 9656 }, { "epoch": 41.28, "learning_rate": 9.886160426311581e-05, "loss": 0.0006, "step": 9660 }, { "epoch": 41.3, "learning_rate": 9.885913020512568e-05, "loss": 0.0021, "step": 9664 }, { "epoch": 41.32, "learning_rate": 9.885665349266163e-05, "loss": 0.0017, "step": 9668 }, { "epoch": 41.33, "learning_rate": 9.885417412585826e-05, "loss": 0.0004, "step": 9672 }, { "epoch": 41.35, "learning_rate": 9.885169210485023e-05, "loss": 0.0027, "step": 9676 }, { "epoch": 41.37, "learning_rate": 9.884920742977243e-05, "loss": 0.0028, "step": 9680 }, { "epoch": 41.38, "learning_rate": 9.884672010075982e-05, "loss": 0.0042, "step": 9684 }, { "epoch": 41.4, "learning_rate": 9.884423011794753e-05, "loss": 0.0006, "step": 9688 }, { "epoch": 41.42, "learning_rate": 9.884173748147084e-05, "loss": 0.0021, "step": 9692 }, { "epoch": 41.44, "learning_rate": 9.883924219146519e-05, "loss": 0.0016, "step": 9696 }, { "epoch": 41.45, "learning_rate": 9.883674424806614e-05, "loss": 0.0003, "step": 9700 }, { "epoch": 41.47, "learning_rate": 9.88342436514094e-05, "loss": 0.0007, "step": 9704 }, { "epoch": 41.49, "learning_rate": 9.883174040163081e-05, "loss": 0.0015, "step": 9708 }, { "epoch": 41.5, "learning_rate": 9.882923449886641e-05, "loss": 0.0033, "step": 9712 }, { "epoch": 41.52, "learning_rate": 9.882672594325228e-05, "loss": 0.0012, "step": 9716 }, { "epoch": 41.54, "learning_rate": 9.882421473492478e-05, "loss": 0.0012, "step": 9720 }, { "epoch": 41.56, "learning_rate": 9.88217008740203e-05, "loss": 0.0007, "step": 9724 }, { "epoch": 41.57, "learning_rate": 9.881918436067541e-05, "loss": 0.0033, "step": 9728 }, { "epoch": 41.59, "learning_rate": 9.881666519502684e-05, "loss": 0.001, "step": 9732 }, { "epoch": 41.61, "learning_rate": 9.881414337721146e-05, "loss": 0.0011, "step": 9736 }, { "epoch": 41.62, "learning_rate": 9.881161890736627e-05, "loss": 0.0009, "step": 9740 }, { "epoch": 41.64, "learning_rate": 9.880909178562845e-05, "loss": 0.0018, "step": 9744 }, { "epoch": 41.66, "learning_rate": 9.880656201213525e-05, "loss": 0.0008, "step": 9748 }, { "epoch": 41.68, "learning_rate": 9.880402958702415e-05, "loss": 0.0016, "step": 9752 }, { "epoch": 41.69, "learning_rate": 9.88014945104327e-05, "loss": 0.0017, "step": 9756 }, { "epoch": 41.71, "learning_rate": 9.879895678249866e-05, "loss": 0.0011, "step": 9760 }, { "epoch": 41.73, "learning_rate": 9.879641640335988e-05, "loss": 0.0003, "step": 9764 }, { "epoch": 41.74, "learning_rate": 9.87938733731544e-05, "loss": 0.0009, "step": 9768 }, { "epoch": 41.76, "learning_rate": 9.879132769202035e-05, "loss": 0.0007, "step": 9772 }, { "epoch": 41.78, "learning_rate": 9.878877936009605e-05, "loss": 0.0007, "step": 9776 }, { "epoch": 41.79, "learning_rate": 9.878622837751997e-05, "loss": 0.0005, "step": 9780 }, { "epoch": 41.81, "learning_rate": 9.878367474443066e-05, "loss": 0.0005, "step": 9784 }, { "epoch": 41.83, "learning_rate": 9.87811184609669e-05, "loss": 0.0012, "step": 9788 }, { "epoch": 41.85, "learning_rate": 9.877855952726752e-05, "loss": 0.0019, "step": 9792 }, { "epoch": 41.86, "learning_rate": 9.877599794347158e-05, "loss": 0.0015, "step": 9796 }, { "epoch": 41.88, "learning_rate": 9.877343370971827e-05, "loss": 0.0008, "step": 9800 }, { "epoch": 41.9, "learning_rate": 9.877086682614686e-05, "loss": 0.0003, "step": 9804 }, { "epoch": 41.91, "learning_rate": 9.876829729289682e-05, "loss": 0.0005, "step": 9808 }, { "epoch": 41.93, "learning_rate": 9.876572511010775e-05, "loss": 0.0009, "step": 9812 }, { "epoch": 41.95, "learning_rate": 9.87631502779194e-05, "loss": 0.002, "step": 9816 }, { "epoch": 41.97, "learning_rate": 9.876057279647165e-05, "loss": 0.001, "step": 9820 }, { "epoch": 41.98, "learning_rate": 9.875799266590453e-05, "loss": 0.0019, "step": 9824 }, { "epoch": 42.0, "learning_rate": 9.875540988635824e-05, "loss": 0.0003, "step": 9828 }, { "epoch": 42.02, "learning_rate": 9.875282445797308e-05, "loss": 0.0003, "step": 9832 }, { "epoch": 42.03, "learning_rate": 9.875023638088951e-05, "loss": 0.0013, "step": 9836 }, { "epoch": 42.05, "learning_rate": 9.874764565524816e-05, "loss": 0.0004, "step": 9840 }, { "epoch": 42.07, "learning_rate": 9.874505228118976e-05, "loss": 0.0006, "step": 9844 }, { "epoch": 42.09, "learning_rate": 9.874245625885521e-05, "loss": 0.0004, "step": 9848 }, { "epoch": 42.1, "learning_rate": 9.873985758838555e-05, "loss": 0.0003, "step": 9852 }, { "epoch": 42.12, "learning_rate": 9.873725626992199e-05, "loss": 0.0023, "step": 9856 }, { "epoch": 42.14, "learning_rate": 9.873465230360582e-05, "loss": 0.0017, "step": 9860 }, { "epoch": 42.15, "learning_rate": 9.87320456895785e-05, "loss": 0.0009, "step": 9864 }, { "epoch": 42.17, "learning_rate": 9.87294364279817e-05, "loss": 0.001, "step": 9868 }, { "epoch": 42.19, "learning_rate": 9.872682451895714e-05, "loss": 0.0005, "step": 9872 }, { "epoch": 42.21, "learning_rate": 9.872420996264673e-05, "loss": 0.0007, "step": 9876 }, { "epoch": 42.22, "learning_rate": 9.872159275919253e-05, "loss": 0.001, "step": 9880 }, { "epoch": 42.24, "learning_rate": 9.87189729087367e-05, "loss": 0.0009, "step": 9884 }, { "epoch": 42.26, "learning_rate": 9.87163504114216e-05, "loss": 0.0012, "step": 9888 }, { "epoch": 42.27, "learning_rate": 9.871372526738971e-05, "loss": 0.0007, "step": 9892 }, { "epoch": 42.29, "learning_rate": 9.871109747678364e-05, "loss": 0.0005, "step": 9896 }, { "epoch": 42.31, "learning_rate": 9.870846703974614e-05, "loss": 0.0009, "step": 9900 }, { "epoch": 42.32, "learning_rate": 9.870583395642017e-05, "loss": 0.0005, "step": 9904 }, { "epoch": 42.34, "learning_rate": 9.870319822694874e-05, "loss": 0.0007, "step": 9908 }, { "epoch": 42.36, "learning_rate": 9.870055985147503e-05, "loss": 0.0015, "step": 9912 }, { "epoch": 42.38, "learning_rate": 9.869791883014243e-05, "loss": 0.0015, "step": 9916 }, { "epoch": 42.39, "learning_rate": 9.869527516309441e-05, "loss": 0.0014, "step": 9920 }, { "epoch": 42.41, "learning_rate": 9.869262885047459e-05, "loss": 0.0015, "step": 9924 }, { "epoch": 42.43, "learning_rate": 9.868997989242672e-05, "loss": 0.0018, "step": 9928 }, { "epoch": 42.44, "learning_rate": 9.868732828909476e-05, "loss": 0.0018, "step": 9932 }, { "epoch": 42.46, "learning_rate": 9.868467404062273e-05, "loss": 0.0009, "step": 9936 }, { "epoch": 42.48, "learning_rate": 9.868201714715489e-05, "loss": 0.001, "step": 9940 }, { "epoch": 42.5, "learning_rate": 9.867935760883551e-05, "loss": 0.0009, "step": 9944 }, { "epoch": 42.51, "learning_rate": 9.867669542580912e-05, "loss": 0.0009, "step": 9948 }, { "epoch": 42.53, "learning_rate": 9.867403059822035e-05, "loss": 0.0008, "step": 9952 }, { "epoch": 42.55, "learning_rate": 9.867136312621398e-05, "loss": 0.0011, "step": 9956 }, { "epoch": 42.56, "learning_rate": 9.866869300993496e-05, "loss": 0.002, "step": 9960 }, { "epoch": 42.58, "learning_rate": 9.866602024952828e-05, "loss": 0.001, "step": 9964 }, { "epoch": 42.6, "learning_rate": 9.866334484513923e-05, "loss": 0.0008, "step": 9968 }, { "epoch": 42.62, "learning_rate": 9.866066679691313e-05, "loss": 0.0006, "step": 9972 }, { "epoch": 42.63, "learning_rate": 9.865798610499546e-05, "loss": 0.0012, "step": 9976 }, { "epoch": 42.65, "learning_rate": 9.865530276953187e-05, "loss": 0.0004, "step": 9980 }, { "epoch": 42.67, "learning_rate": 9.865261679066814e-05, "loss": 0.0033, "step": 9984 }, { "epoch": 42.68, "learning_rate": 9.864992816855021e-05, "loss": 0.0021, "step": 9988 }, { "epoch": 42.7, "learning_rate": 9.864723690332415e-05, "loss": 0.0006, "step": 9992 }, { "epoch": 42.72, "learning_rate": 9.864454299513616e-05, "loss": 0.002, "step": 9996 }, { "epoch": 42.74, "learning_rate": 9.864184644413262e-05, "loss": 0.0016, "step": 10000 }, { "epoch": 42.74, "eval_exact_match": 0.5031185031185031, "eval_loss": 0.7922365665435791, "eval_runtime": 139.7876, "eval_samples_per_second": 6.882, "step": 10000 }, { "epoch": 42.75, "learning_rate": 9.863914725046e-05, "loss": 0.0025, "step": 10004 }, { "epoch": 42.77, "learning_rate": 9.863644541426496e-05, "loss": 0.0007, "step": 10008 }, { "epoch": 42.79, "learning_rate": 9.863374093569428e-05, "loss": 0.0017, "step": 10012 }, { "epoch": 42.8, "learning_rate": 9.863103381489493e-05, "loss": 0.0003, "step": 10016 }, { "epoch": 42.82, "learning_rate": 9.862832405201395e-05, "loss": 0.0007, "step": 10020 }, { "epoch": 42.84, "learning_rate": 9.862561164719855e-05, "loss": 0.0023, "step": 10024 }, { "epoch": 42.85, "learning_rate": 9.862289660059611e-05, "loss": 0.0014, "step": 10028 }, { "epoch": 42.87, "learning_rate": 9.862017891235416e-05, "loss": 0.0014, "step": 10032 }, { "epoch": 42.89, "learning_rate": 9.86174585826203e-05, "loss": 0.0044, "step": 10036 }, { "epoch": 42.91, "learning_rate": 9.861473561154236e-05, "loss": 0.0004, "step": 10040 }, { "epoch": 42.92, "learning_rate": 9.861200999926826e-05, "loss": 0.0011, "step": 10044 }, { "epoch": 42.94, "learning_rate": 9.860928174594608e-05, "loss": 0.0016, "step": 10048 }, { "epoch": 42.96, "learning_rate": 9.860655085172405e-05, "loss": 0.0008, "step": 10052 }, { "epoch": 42.97, "learning_rate": 9.860381731675055e-05, "loss": 0.0011, "step": 10056 }, { "epoch": 42.99, "learning_rate": 9.860108114117406e-05, "loss": 0.0011, "step": 10060 }, { "epoch": 43.01, "learning_rate": 9.859834232514324e-05, "loss": 0.0004, "step": 10064 }, { "epoch": 43.03, "learning_rate": 9.859560086880693e-05, "loss": 0.0004, "step": 10068 }, { "epoch": 43.04, "learning_rate": 9.859285677231402e-05, "loss": 0.0013, "step": 10072 }, { "epoch": 43.06, "learning_rate": 9.85901100358136e-05, "loss": 0.0013, "step": 10076 }, { "epoch": 43.08, "learning_rate": 9.858736065945493e-05, "loss": 0.0012, "step": 10080 }, { "epoch": 43.09, "learning_rate": 9.858460864338735e-05, "loss": 0.0007, "step": 10084 }, { "epoch": 43.11, "learning_rate": 9.858185398776039e-05, "loss": 0.0031, "step": 10088 }, { "epoch": 43.13, "learning_rate": 9.85790966927237e-05, "loss": 0.0007, "step": 10092 }, { "epoch": 43.15, "learning_rate": 9.85763367584271e-05, "loss": 0.0012, "step": 10096 }, { "epoch": 43.16, "learning_rate": 9.857357418502051e-05, "loss": 0.0033, "step": 10100 }, { "epoch": 43.18, "learning_rate": 9.857080897265403e-05, "loss": 0.0031, "step": 10104 }, { "epoch": 43.2, "learning_rate": 9.856804112147788e-05, "loss": 0.001, "step": 10108 }, { "epoch": 43.21, "learning_rate": 9.856527063164246e-05, "loss": 0.0013, "step": 10112 }, { "epoch": 43.23, "learning_rate": 9.856249750329827e-05, "loss": 0.0018, "step": 10116 }, { "epoch": 43.25, "learning_rate": 9.855972173659597e-05, "loss": 0.0009, "step": 10120 }, { "epoch": 43.26, "learning_rate": 9.855694333168638e-05, "loss": 0.0004, "step": 10124 }, { "epoch": 43.28, "learning_rate": 9.855416228872043e-05, "loss": 0.0004, "step": 10128 }, { "epoch": 43.3, "learning_rate": 9.855137860784923e-05, "loss": 0.0002, "step": 10132 }, { "epoch": 43.32, "learning_rate": 9.854859228922398e-05, "loss": 0.0004, "step": 10136 }, { "epoch": 43.33, "learning_rate": 9.85458033329961e-05, "loss": 0.0013, "step": 10140 }, { "epoch": 43.35, "learning_rate": 9.85430117393171e-05, "loss": 0.0029, "step": 10144 }, { "epoch": 43.37, "learning_rate": 9.854021750833864e-05, "loss": 0.0011, "step": 10148 }, { "epoch": 43.38, "learning_rate": 9.853742064021252e-05, "loss": 0.0003, "step": 10152 }, { "epoch": 43.4, "learning_rate": 9.85346211350907e-05, "loss": 0.0012, "step": 10156 }, { "epoch": 43.42, "learning_rate": 9.853181899312527e-05, "loss": 0.0012, "step": 10160 }, { "epoch": 43.44, "learning_rate": 9.852901421446849e-05, "loss": 0.0008, "step": 10164 }, { "epoch": 43.45, "learning_rate": 9.85262067992727e-05, "loss": 0.0026, "step": 10168 }, { "epoch": 43.47, "learning_rate": 9.852339674769047e-05, "loss": 0.0006, "step": 10172 }, { "epoch": 43.49, "learning_rate": 9.852058405987444e-05, "loss": 0.0016, "step": 10176 }, { "epoch": 43.5, "learning_rate": 9.85177687359774e-05, "loss": 0.0003, "step": 10180 }, { "epoch": 43.52, "learning_rate": 9.851495077615236e-05, "loss": 0.0004, "step": 10184 }, { "epoch": 43.54, "learning_rate": 9.851213018055237e-05, "loss": 0.0014, "step": 10188 }, { "epoch": 43.56, "learning_rate": 9.850930694933069e-05, "loss": 0.0008, "step": 10192 }, { "epoch": 43.57, "learning_rate": 9.85064810826407e-05, "loss": 0.0003, "step": 10196 }, { "epoch": 43.59, "learning_rate": 9.850365258063595e-05, "loss": 0.0015, "step": 10200 }, { "epoch": 43.61, "learning_rate": 9.850082144347007e-05, "loss": 0.0016, "step": 10204 }, { "epoch": 43.62, "learning_rate": 9.84979876712969e-05, "loss": 0.0032, "step": 10208 }, { "epoch": 43.64, "learning_rate": 9.849515126427038e-05, "loss": 0.0004, "step": 10212 }, { "epoch": 43.66, "learning_rate": 9.849231222254463e-05, "loss": 0.0004, "step": 10216 }, { "epoch": 43.68, "learning_rate": 9.848947054627386e-05, "loss": 0.0007, "step": 10220 }, { "epoch": 43.69, "learning_rate": 9.848662623561248e-05, "loss": 0.0019, "step": 10224 }, { "epoch": 43.71, "learning_rate": 9.848377929071503e-05, "loss": 0.0004, "step": 10228 }, { "epoch": 43.73, "learning_rate": 9.848092971173615e-05, "loss": 0.0017, "step": 10232 }, { "epoch": 43.74, "learning_rate": 9.847807749883069e-05, "loss": 0.0007, "step": 10236 }, { "epoch": 43.76, "learning_rate": 9.847522265215357e-05, "loss": 0.0009, "step": 10240 }, { "epoch": 43.78, "learning_rate": 9.847236517185994e-05, "loss": 0.002, "step": 10244 }, { "epoch": 43.79, "learning_rate": 9.846950505810499e-05, "loss": 0.0019, "step": 10248 }, { "epoch": 43.81, "learning_rate": 9.846664231104414e-05, "loss": 0.0013, "step": 10252 }, { "epoch": 43.83, "learning_rate": 9.846377693083292e-05, "loss": 0.0004, "step": 10256 }, { "epoch": 43.85, "learning_rate": 9.8460908917627e-05, "loss": 0.0003, "step": 10260 }, { "epoch": 43.86, "learning_rate": 9.845803827158218e-05, "loss": 0.0014, "step": 10264 }, { "epoch": 43.88, "learning_rate": 9.845516499285444e-05, "loss": 0.0009, "step": 10268 }, { "epoch": 43.9, "learning_rate": 9.845228908159988e-05, "loss": 0.0004, "step": 10272 }, { "epoch": 43.91, "learning_rate": 9.844941053797474e-05, "loss": 0.0007, "step": 10276 }, { "epoch": 43.93, "learning_rate": 9.844652936213541e-05, "loss": 0.0017, "step": 10280 }, { "epoch": 43.95, "learning_rate": 9.844364555423844e-05, "loss": 0.0019, "step": 10284 }, { "epoch": 43.97, "learning_rate": 9.844075911444046e-05, "loss": 0.0004, "step": 10288 }, { "epoch": 43.98, "learning_rate": 9.843787004289833e-05, "loss": 0.0006, "step": 10292 }, { "epoch": 44.0, "learning_rate": 9.8434978339769e-05, "loss": 0.0007, "step": 10296 }, { "epoch": 44.02, "learning_rate": 9.843208400520956e-05, "loss": 0.0002, "step": 10300 }, { "epoch": 44.03, "learning_rate": 9.842918703937727e-05, "loss": 0.0005, "step": 10304 }, { "epoch": 44.05, "learning_rate": 9.842628744242953e-05, "loss": 0.0021, "step": 10308 }, { "epoch": 44.07, "learning_rate": 9.842338521452384e-05, "loss": 0.0002, "step": 10312 }, { "epoch": 44.09, "learning_rate": 9.84204803558179e-05, "loss": 0.0006, "step": 10316 }, { "epoch": 44.1, "learning_rate": 9.841757286646953e-05, "loss": 0.0013, "step": 10320 }, { "epoch": 44.12, "learning_rate": 9.841466274663669e-05, "loss": 0.0006, "step": 10324 }, { "epoch": 44.14, "learning_rate": 9.841174999647747e-05, "loss": 0.0006, "step": 10328 }, { "epoch": 44.15, "learning_rate": 9.840883461615011e-05, "loss": 0.0013, "step": 10332 }, { "epoch": 44.17, "learning_rate": 9.840591660581304e-05, "loss": 0.0004, "step": 10336 }, { "epoch": 44.19, "learning_rate": 9.840299596562475e-05, "loss": 0.0012, "step": 10340 }, { "epoch": 44.21, "learning_rate": 9.840007269574394e-05, "loss": 0.0007, "step": 10344 }, { "epoch": 44.22, "learning_rate": 9.839714679632943e-05, "loss": 0.0012, "step": 10348 }, { "epoch": 44.24, "learning_rate": 9.839421826754016e-05, "loss": 0.0021, "step": 10352 }, { "epoch": 44.26, "learning_rate": 9.839128710953527e-05, "loss": 0.0015, "step": 10356 }, { "epoch": 44.27, "learning_rate": 9.838835332247397e-05, "loss": 0.0038, "step": 10360 }, { "epoch": 44.29, "learning_rate": 9.838541690651569e-05, "loss": 0.0006, "step": 10364 }, { "epoch": 44.31, "learning_rate": 9.83824778618199e-05, "loss": 0.0007, "step": 10368 }, { "epoch": 44.32, "learning_rate": 9.837953618854634e-05, "loss": 0.0015, "step": 10372 }, { "epoch": 44.34, "learning_rate": 9.83765918868548e-05, "loss": 0.0002, "step": 10376 }, { "epoch": 44.36, "learning_rate": 9.837364495690525e-05, "loss": 0.0015, "step": 10380 }, { "epoch": 44.38, "learning_rate": 9.83706953988578e-05, "loss": 0.0015, "step": 10384 }, { "epoch": 44.39, "learning_rate": 9.836774321287265e-05, "loss": 0.0022, "step": 10388 }, { "epoch": 44.41, "learning_rate": 9.836478839911025e-05, "loss": 0.0007, "step": 10392 }, { "epoch": 44.43, "learning_rate": 9.83618309577311e-05, "loss": 0.0006, "step": 10396 }, { "epoch": 44.44, "learning_rate": 9.83588708888959e-05, "loss": 0.0009, "step": 10400 }, { "epoch": 44.46, "learning_rate": 9.835590819276543e-05, "loss": 0.0023, "step": 10404 }, { "epoch": 44.48, "learning_rate": 9.835294286950069e-05, "loss": 0.0028, "step": 10408 }, { "epoch": 44.5, "learning_rate": 9.834997491926275e-05, "loss": 0.0035, "step": 10412 }, { "epoch": 44.51, "learning_rate": 9.83470043422129e-05, "loss": 0.0011, "step": 10416 }, { "epoch": 44.53, "learning_rate": 9.834403113851247e-05, "loss": 0.0004, "step": 10420 }, { "epoch": 44.55, "learning_rate": 9.834105530832302e-05, "loss": 0.0017, "step": 10424 }, { "epoch": 44.56, "learning_rate": 9.833807685180624e-05, "loss": 0.0011, "step": 10428 }, { "epoch": 44.58, "learning_rate": 9.833509576912394e-05, "loss": 0.0005, "step": 10432 }, { "epoch": 44.6, "learning_rate": 9.833211206043807e-05, "loss": 0.0012, "step": 10436 }, { "epoch": 44.62, "learning_rate": 9.832912572591072e-05, "loss": 0.003, "step": 10440 }, { "epoch": 44.63, "learning_rate": 9.832613676570417e-05, "loss": 0.001, "step": 10444 }, { "epoch": 44.65, "learning_rate": 9.832314517998076e-05, "loss": 0.0005, "step": 10448 }, { "epoch": 44.67, "learning_rate": 9.832015096890308e-05, "loss": 0.0006, "step": 10452 }, { "epoch": 44.68, "learning_rate": 9.831715413263375e-05, "loss": 0.0025, "step": 10456 }, { "epoch": 44.7, "learning_rate": 9.831415467133562e-05, "loss": 0.0005, "step": 10460 }, { "epoch": 44.72, "learning_rate": 9.831115258517163e-05, "loss": 0.0008, "step": 10464 }, { "epoch": 44.74, "learning_rate": 9.830814787430486e-05, "loss": 0.0003, "step": 10468 }, { "epoch": 44.75, "learning_rate": 9.830514053889861e-05, "loss": 0.0008, "step": 10472 }, { "epoch": 44.77, "learning_rate": 9.830213057911624e-05, "loss": 0.0035, "step": 10476 }, { "epoch": 44.79, "learning_rate": 9.829911799512124e-05, "loss": 0.0015, "step": 10480 }, { "epoch": 44.8, "learning_rate": 9.829610278707735e-05, "loss": 0.0002, "step": 10484 }, { "epoch": 44.82, "learning_rate": 9.829308495514832e-05, "loss": 0.0005, "step": 10488 }, { "epoch": 44.84, "learning_rate": 9.829006449949813e-05, "loss": 0.0015, "step": 10492 }, { "epoch": 44.85, "learning_rate": 9.82870414202909e-05, "loss": 0.0007, "step": 10496 }, { "epoch": 44.87, "learning_rate": 9.828401571769086e-05, "loss": 0.0004, "step": 10500 }, { "epoch": 44.89, "learning_rate": 9.828098739186238e-05, "loss": 0.0006, "step": 10504 }, { "epoch": 44.91, "learning_rate": 9.827795644296998e-05, "loss": 0.001, "step": 10508 }, { "epoch": 44.92, "learning_rate": 9.827492287117836e-05, "loss": 0.0011, "step": 10512 }, { "epoch": 44.94, "learning_rate": 9.82718866766523e-05, "loss": 0.0008, "step": 10516 }, { "epoch": 44.96, "learning_rate": 9.826884785955679e-05, "loss": 0.003, "step": 10520 }, { "epoch": 44.97, "learning_rate": 9.826580642005688e-05, "loss": 0.0006, "step": 10524 }, { "epoch": 44.99, "learning_rate": 9.826276235831785e-05, "loss": 0.0011, "step": 10528 }, { "epoch": 45.01, "learning_rate": 9.825971567450505e-05, "loss": 0.0002, "step": 10532 }, { "epoch": 45.03, "learning_rate": 9.825666636878403e-05, "loss": 0.0008, "step": 10536 }, { "epoch": 45.04, "learning_rate": 9.825361444132044e-05, "loss": 0.0004, "step": 10540 }, { "epoch": 45.06, "learning_rate": 9.82505598922801e-05, "loss": 0.0006, "step": 10544 }, { "epoch": 45.08, "learning_rate": 9.824750272182894e-05, "loss": 0.0005, "step": 10548 }, { "epoch": 45.09, "learning_rate": 9.824444293013306e-05, "loss": 0.0002, "step": 10552 }, { "epoch": 45.11, "learning_rate": 9.824138051735872e-05, "loss": 0.0014, "step": 10556 }, { "epoch": 45.13, "learning_rate": 9.823831548367226e-05, "loss": 0.0008, "step": 10560 }, { "epoch": 45.15, "learning_rate": 9.823524782924024e-05, "loss": 0.0008, "step": 10564 }, { "epoch": 45.16, "learning_rate": 9.82321775542293e-05, "loss": 0.0003, "step": 10568 }, { "epoch": 45.18, "learning_rate": 9.822910465880625e-05, "loss": 0.0006, "step": 10572 }, { "epoch": 45.2, "learning_rate": 9.822602914313803e-05, "loss": 0.0015, "step": 10576 }, { "epoch": 45.21, "learning_rate": 9.822295100739176e-05, "loss": 0.0048, "step": 10580 }, { "epoch": 45.23, "learning_rate": 9.821987025173462e-05, "loss": 0.0041, "step": 10584 }, { "epoch": 45.25, "learning_rate": 9.821678687633404e-05, "loss": 0.002, "step": 10588 }, { "epoch": 45.26, "learning_rate": 9.821370088135748e-05, "loss": 0.0005, "step": 10592 }, { "epoch": 45.28, "learning_rate": 9.821061226697266e-05, "loss": 0.0007, "step": 10596 }, { "epoch": 45.3, "learning_rate": 9.820752103334734e-05, "loss": 0.0008, "step": 10600 }, { "epoch": 45.32, "learning_rate": 9.820442718064948e-05, "loss": 0.0018, "step": 10604 }, { "epoch": 45.33, "learning_rate": 9.820133070904717e-05, "loss": 0.001, "step": 10608 }, { "epoch": 45.35, "learning_rate": 9.819823161870863e-05, "loss": 0.0002, "step": 10612 }, { "epoch": 45.37, "learning_rate": 9.819512990980223e-05, "loss": 0.0004, "step": 10616 }, { "epoch": 45.38, "learning_rate": 9.819202558249649e-05, "loss": 0.0015, "step": 10620 }, { "epoch": 45.4, "learning_rate": 9.818891863696004e-05, "loss": 0.0005, "step": 10624 }, { "epoch": 45.42, "learning_rate": 9.818580907336173e-05, "loss": 0.0009, "step": 10628 }, { "epoch": 45.44, "learning_rate": 9.818269689187045e-05, "loss": 0.0006, "step": 10632 }, { "epoch": 45.45, "learning_rate": 9.817958209265531e-05, "loss": 0.0014, "step": 10636 }, { "epoch": 45.47, "learning_rate": 9.817646467588551e-05, "loss": 0.0006, "step": 10640 }, { "epoch": 45.49, "learning_rate": 9.817334464173045e-05, "loss": 0.0011, "step": 10644 }, { "epoch": 45.5, "learning_rate": 9.817022199035961e-05, "loss": 0.0003, "step": 10648 }, { "epoch": 45.52, "learning_rate": 9.816709672194265e-05, "loss": 0.0007, "step": 10652 }, { "epoch": 45.54, "learning_rate": 9.816396883664938e-05, "loss": 0.0002, "step": 10656 }, { "epoch": 45.56, "learning_rate": 9.81608383346497e-05, "loss": 0.0004, "step": 10660 }, { "epoch": 45.57, "learning_rate": 9.815770521611371e-05, "loss": 0.0011, "step": 10664 }, { "epoch": 45.59, "learning_rate": 9.815456948121164e-05, "loss": 0.0026, "step": 10668 }, { "epoch": 45.61, "learning_rate": 9.815143113011383e-05, "loss": 0.0032, "step": 10672 }, { "epoch": 45.62, "learning_rate": 9.81482901629908e-05, "loss": 0.0008, "step": 10676 }, { "epoch": 45.64, "learning_rate": 9.814514658001317e-05, "loss": 0.0014, "step": 10680 }, { "epoch": 45.66, "learning_rate": 9.814200038135177e-05, "loss": 0.0004, "step": 10684 }, { "epoch": 45.68, "learning_rate": 9.813885156717749e-05, "loss": 0.0008, "step": 10688 }, { "epoch": 45.69, "learning_rate": 9.813570013766142e-05, "loss": 0.0032, "step": 10692 }, { "epoch": 45.71, "learning_rate": 9.813254609297477e-05, "loss": 0.0004, "step": 10696 }, { "epoch": 45.73, "learning_rate": 9.812938943328892e-05, "loss": 0.0017, "step": 10700 }, { "epoch": 45.74, "learning_rate": 9.812623015877533e-05, "loss": 0.0024, "step": 10704 }, { "epoch": 45.76, "learning_rate": 9.812306826960566e-05, "loss": 0.0007, "step": 10708 }, { "epoch": 45.78, "learning_rate": 9.81199037659517e-05, "loss": 0.0013, "step": 10712 }, { "epoch": 45.79, "learning_rate": 9.811673664798538e-05, "loss": 0.0008, "step": 10716 }, { "epoch": 45.81, "learning_rate": 9.811356691587874e-05, "loss": 0.0003, "step": 10720 }, { "epoch": 45.83, "learning_rate": 9.8110394569804e-05, "loss": 0.0026, "step": 10724 }, { "epoch": 45.85, "learning_rate": 9.81072196099335e-05, "loss": 0.0004, "step": 10728 }, { "epoch": 45.86, "learning_rate": 9.810404203643977e-05, "loss": 0.0007, "step": 10732 }, { "epoch": 45.88, "learning_rate": 9.81008618494954e-05, "loss": 0.0005, "step": 10736 }, { "epoch": 45.9, "learning_rate": 9.809767904927321e-05, "loss": 0.0029, "step": 10740 }, { "epoch": 45.91, "learning_rate": 9.809449363594607e-05, "loss": 0.0022, "step": 10744 }, { "epoch": 45.93, "learning_rate": 9.809130560968709e-05, "loss": 0.002, "step": 10748 }, { "epoch": 45.95, "learning_rate": 9.808811497066943e-05, "loss": 0.0028, "step": 10752 }, { "epoch": 45.97, "learning_rate": 9.808492171906647e-05, "loss": 0.0023, "step": 10756 }, { "epoch": 45.98, "learning_rate": 9.808172585505167e-05, "loss": 0.0015, "step": 10760 }, { "epoch": 46.0, "learning_rate": 9.807852737879868e-05, "loss": 0.0014, "step": 10764 }, { "epoch": 46.02, "learning_rate": 9.807532629048125e-05, "loss": 0.003, "step": 10768 }, { "epoch": 46.03, "learning_rate": 9.807212259027329e-05, "loss": 0.0003, "step": 10772 }, { "epoch": 46.05, "learning_rate": 9.806891627834889e-05, "loss": 0.002, "step": 10776 }, { "epoch": 46.07, "learning_rate": 9.80657073548822e-05, "loss": 0.001, "step": 10780 }, { "epoch": 46.09, "learning_rate": 9.80624958200476e-05, "loss": 0.0014, "step": 10784 }, { "epoch": 46.1, "learning_rate": 9.805928167401955e-05, "loss": 0.001, "step": 10788 }, { "epoch": 46.12, "learning_rate": 9.805606491697267e-05, "loss": 0.003, "step": 10792 }, { "epoch": 46.14, "learning_rate": 9.805284554908172e-05, "loss": 0.0012, "step": 10796 }, { "epoch": 46.15, "learning_rate": 9.80496235705216e-05, "loss": 0.0004, "step": 10800 }, { "epoch": 46.17, "learning_rate": 9.804639898146739e-05, "loss": 0.0006, "step": 10804 }, { "epoch": 46.19, "learning_rate": 9.804317178209426e-05, "loss": 0.0007, "step": 10808 }, { "epoch": 46.21, "learning_rate": 9.803994197257751e-05, "loss": 0.0003, "step": 10812 }, { "epoch": 46.22, "learning_rate": 9.803670955309265e-05, "loss": 0.0002, "step": 10816 }, { "epoch": 46.24, "learning_rate": 9.80334745238153e-05, "loss": 0.0038, "step": 10820 }, { "epoch": 46.26, "learning_rate": 9.80302368849212e-05, "loss": 0.0003, "step": 10824 }, { "epoch": 46.27, "learning_rate": 9.802699663658625e-05, "loss": 0.0006, "step": 10828 }, { "epoch": 46.29, "learning_rate": 9.802375377898649e-05, "loss": 0.0004, "step": 10832 }, { "epoch": 46.31, "learning_rate": 9.802050831229808e-05, "loss": 0.0003, "step": 10836 }, { "epoch": 46.32, "learning_rate": 9.801726023669739e-05, "loss": 0.0002, "step": 10840 }, { "epoch": 46.34, "learning_rate": 9.801400955236084e-05, "loss": 0.0009, "step": 10844 }, { "epoch": 46.36, "learning_rate": 9.801075625946508e-05, "loss": 0.0012, "step": 10848 }, { "epoch": 46.38, "learning_rate": 9.800750035818682e-05, "loss": 0.0005, "step": 10852 }, { "epoch": 46.39, "learning_rate": 9.800424184870298e-05, "loss": 0.0019, "step": 10856 }, { "epoch": 46.41, "learning_rate": 9.800098073119055e-05, "loss": 0.0015, "step": 10860 }, { "epoch": 46.43, "learning_rate": 9.799771700582676e-05, "loss": 0.0003, "step": 10864 }, { "epoch": 46.44, "learning_rate": 9.799445067278888e-05, "loss": 0.0005, "step": 10868 }, { "epoch": 46.46, "learning_rate": 9.799118173225438e-05, "loss": 0.0009, "step": 10872 }, { "epoch": 46.48, "learning_rate": 9.798791018440088e-05, "loss": 0.0005, "step": 10876 }, { "epoch": 46.5, "learning_rate": 9.798463602940608e-05, "loss": 0.0007, "step": 10880 }, { "epoch": 46.51, "learning_rate": 9.798135926744791e-05, "loss": 0.0009, "step": 10884 }, { "epoch": 46.53, "learning_rate": 9.797807989870435e-05, "loss": 0.0043, "step": 10888 }, { "epoch": 46.55, "learning_rate": 9.79747979233536e-05, "loss": 0.0027, "step": 10892 }, { "epoch": 46.56, "learning_rate": 9.797151334157393e-05, "loss": 0.0012, "step": 10896 }, { "epoch": 46.58, "learning_rate": 9.796822615354383e-05, "loss": 0.0004, "step": 10900 }, { "epoch": 46.6, "learning_rate": 9.796493635944186e-05, "loss": 0.0007, "step": 10904 }, { "epoch": 46.62, "learning_rate": 9.796164395944675e-05, "loss": 0.0009, "step": 10908 }, { "epoch": 46.63, "learning_rate": 9.795834895373739e-05, "loss": 0.0008, "step": 10912 }, { "epoch": 46.65, "learning_rate": 9.79550513424928e-05, "loss": 0.0002, "step": 10916 }, { "epoch": 46.67, "learning_rate": 9.795175112589213e-05, "loss": 0.0009, "step": 10920 }, { "epoch": 46.68, "learning_rate": 9.794844830411465e-05, "loss": 0.0002, "step": 10924 }, { "epoch": 46.7, "learning_rate": 9.794514287733983e-05, "loss": 0.0005, "step": 10928 }, { "epoch": 46.72, "learning_rate": 9.794183484574724e-05, "loss": 0.001, "step": 10932 }, { "epoch": 46.74, "learning_rate": 9.793852420951661e-05, "loss": 0.001, "step": 10936 }, { "epoch": 46.75, "learning_rate": 9.79352109688278e-05, "loss": 0.0018, "step": 10940 }, { "epoch": 46.77, "learning_rate": 9.793189512386081e-05, "loss": 0.0014, "step": 10944 }, { "epoch": 46.79, "learning_rate": 9.792857667479581e-05, "loss": 0.0002, "step": 10948 }, { "epoch": 46.8, "learning_rate": 9.792525562181305e-05, "loss": 0.0008, "step": 10952 }, { "epoch": 46.82, "learning_rate": 9.792193196509299e-05, "loss": 0.0017, "step": 10956 }, { "epoch": 46.84, "learning_rate": 9.791860570481618e-05, "loss": 0.0006, "step": 10960 }, { "epoch": 46.85, "learning_rate": 9.791527684116336e-05, "loss": 0.0017, "step": 10964 }, { "epoch": 46.87, "learning_rate": 9.791194537431536e-05, "loss": 0.0005, "step": 10968 }, { "epoch": 46.89, "learning_rate": 9.79086113044532e-05, "loss": 0.0002, "step": 10972 }, { "epoch": 46.91, "learning_rate": 9.790527463175799e-05, "loss": 0.0005, "step": 10976 }, { "epoch": 46.92, "learning_rate": 9.790193535641101e-05, "loss": 0.0008, "step": 10980 }, { "epoch": 46.94, "learning_rate": 9.789859347859369e-05, "loss": 0.0004, "step": 10984 }, { "epoch": 46.96, "learning_rate": 9.789524899848761e-05, "loss": 0.0016, "step": 10988 }, { "epoch": 46.97, "learning_rate": 9.789190191627444e-05, "loss": 0.0003, "step": 10992 }, { "epoch": 46.99, "learning_rate": 9.788855223213605e-05, "loss": 0.002, "step": 10996 }, { "epoch": 47.01, "learning_rate": 9.788519994625441e-05, "loss": 0.0004, "step": 11000 }, { "epoch": 47.01, "eval_exact_match": 0.5207900207900208, "eval_loss": 0.7863634824752808, "eval_runtime": 130.5406, "eval_samples_per_second": 7.369, "step": 11000 }, { "epoch": 47.03, "learning_rate": 9.788184505881164e-05, "loss": 0.0013, "step": 11004 }, { "epoch": 47.04, "learning_rate": 9.787848756999002e-05, "loss": 0.0003, "step": 11008 }, { "epoch": 47.06, "learning_rate": 9.787512747997197e-05, "loss": 0.0006, "step": 11012 }, { "epoch": 47.08, "learning_rate": 9.787176478894002e-05, "loss": 0.0008, "step": 11016 }, { "epoch": 47.09, "learning_rate": 9.786839949707686e-05, "loss": 0.0003, "step": 11020 }, { "epoch": 47.11, "learning_rate": 9.786503160456536e-05, "loss": 0.0004, "step": 11024 }, { "epoch": 47.13, "learning_rate": 9.786166111158845e-05, "loss": 0.0004, "step": 11028 }, { "epoch": 47.15, "learning_rate": 9.785828801832928e-05, "loss": 0.0006, "step": 11032 }, { "epoch": 47.16, "learning_rate": 9.785491232497108e-05, "loss": 0.0005, "step": 11036 }, { "epoch": 47.18, "learning_rate": 9.785153403169726e-05, "loss": 0.0003, "step": 11040 }, { "epoch": 47.2, "learning_rate": 9.784815313869136e-05, "loss": 0.0014, "step": 11044 }, { "epoch": 47.21, "learning_rate": 9.784476964613707e-05, "loss": 0.0006, "step": 11048 }, { "epoch": 47.23, "learning_rate": 9.784138355421819e-05, "loss": 0.0005, "step": 11052 }, { "epoch": 47.25, "learning_rate": 9.78379948631187e-05, "loss": 0.001, "step": 11056 }, { "epoch": 47.26, "learning_rate": 9.783460357302271e-05, "loss": 0.0006, "step": 11060 }, { "epoch": 47.28, "learning_rate": 9.783120968411445e-05, "loss": 0.0011, "step": 11064 }, { "epoch": 47.3, "learning_rate": 9.782781319657832e-05, "loss": 0.0002, "step": 11068 }, { "epoch": 47.32, "learning_rate": 9.782441411059884e-05, "loss": 0.0006, "step": 11072 }, { "epoch": 47.33, "learning_rate": 9.782101242636068e-05, "loss": 0.001, "step": 11076 }, { "epoch": 47.35, "learning_rate": 9.781760814404864e-05, "loss": 0.0009, "step": 11080 }, { "epoch": 47.37, "learning_rate": 9.78142012638477e-05, "loss": 0.0017, "step": 11084 }, { "epoch": 47.38, "learning_rate": 9.781079178594294e-05, "loss": 0.0029, "step": 11088 }, { "epoch": 47.4, "learning_rate": 9.780737971051957e-05, "loss": 0.0012, "step": 11092 }, { "epoch": 47.42, "learning_rate": 9.780396503776301e-05, "loss": 0.0015, "step": 11096 }, { "epoch": 47.44, "learning_rate": 9.780054776785875e-05, "loss": 0.0013, "step": 11100 }, { "epoch": 47.45, "learning_rate": 9.779712790099242e-05, "loss": 0.0045, "step": 11104 }, { "epoch": 47.47, "learning_rate": 9.779370543734987e-05, "loss": 0.0019, "step": 11108 }, { "epoch": 47.49, "learning_rate": 9.779028037711701e-05, "loss": 0.0006, "step": 11112 }, { "epoch": 47.5, "learning_rate": 9.778685272047994e-05, "loss": 0.0004, "step": 11116 }, { "epoch": 47.52, "learning_rate": 9.778342246762486e-05, "loss": 0.0006, "step": 11120 }, { "epoch": 47.54, "learning_rate": 9.777998961873815e-05, "loss": 0.0004, "step": 11124 }, { "epoch": 47.56, "learning_rate": 9.77765541740063e-05, "loss": 0.0003, "step": 11128 }, { "epoch": 47.57, "learning_rate": 9.777311613361596e-05, "loss": 0.0017, "step": 11132 }, { "epoch": 47.59, "learning_rate": 9.776967549775392e-05, "loss": 0.0008, "step": 11136 }, { "epoch": 47.61, "learning_rate": 9.77662322666071e-05, "loss": 0.0004, "step": 11140 }, { "epoch": 47.62, "learning_rate": 9.776278644036257e-05, "loss": 0.0006, "step": 11144 }, { "epoch": 47.64, "learning_rate": 9.775933801920754e-05, "loss": 0.0032, "step": 11148 }, { "epoch": 47.66, "learning_rate": 9.775588700332935e-05, "loss": 0.0021, "step": 11152 }, { "epoch": 47.68, "learning_rate": 9.77524333929155e-05, "loss": 0.0006, "step": 11156 }, { "epoch": 47.69, "learning_rate": 9.774897718815364e-05, "loss": 0.0006, "step": 11160 }, { "epoch": 47.71, "learning_rate": 9.77455183892315e-05, "loss": 0.0006, "step": 11164 }, { "epoch": 47.73, "learning_rate": 9.774205699633704e-05, "loss": 0.0004, "step": 11168 }, { "epoch": 47.74, "learning_rate": 9.773859300965828e-05, "loss": 0.0005, "step": 11172 }, { "epoch": 47.76, "learning_rate": 9.773512642938343e-05, "loss": 0.0003, "step": 11176 }, { "epoch": 47.78, "learning_rate": 9.773165725570081e-05, "loss": 0.0007, "step": 11180 }, { "epoch": 47.79, "learning_rate": 9.772818548879893e-05, "loss": 0.0021, "step": 11184 }, { "epoch": 47.81, "learning_rate": 9.772471112886639e-05, "loss": 0.0003, "step": 11188 }, { "epoch": 47.83, "learning_rate": 9.772123417609194e-05, "loss": 0.0006, "step": 11192 }, { "epoch": 47.85, "learning_rate": 9.771775463066449e-05, "loss": 0.0006, "step": 11196 }, { "epoch": 47.86, "learning_rate": 9.771427249277306e-05, "loss": 0.0028, "step": 11200 }, { "epoch": 47.88, "learning_rate": 9.771078776260687e-05, "loss": 0.0002, "step": 11204 }, { "epoch": 47.9, "learning_rate": 9.77073004403552e-05, "loss": 0.0003, "step": 11208 }, { "epoch": 47.91, "learning_rate": 9.770381052620756e-05, "loss": 0.0033, "step": 11212 }, { "epoch": 47.93, "learning_rate": 9.770031802035353e-05, "loss": 0.0002, "step": 11216 }, { "epoch": 47.95, "learning_rate": 9.769682292298284e-05, "loss": 0.0012, "step": 11220 }, { "epoch": 47.97, "learning_rate": 9.76933252342854e-05, "loss": 0.0012, "step": 11224 }, { "epoch": 47.98, "learning_rate": 9.768982495445122e-05, "loss": 0.0008, "step": 11228 }, { "epoch": 48.0, "learning_rate": 9.768632208367048e-05, "loss": 0.0015, "step": 11232 }, { "epoch": 48.02, "learning_rate": 9.768281662213347e-05, "loss": 0.001, "step": 11236 }, { "epoch": 48.03, "learning_rate": 9.767930857003065e-05, "loss": 0.0006, "step": 11240 }, { "epoch": 48.05, "learning_rate": 9.767579792755262e-05, "loss": 0.0005, "step": 11244 }, { "epoch": 48.07, "learning_rate": 9.76722846948901e-05, "loss": 0.0004, "step": 11248 }, { "epoch": 48.09, "learning_rate": 9.766876887223393e-05, "loss": 0.001, "step": 11252 }, { "epoch": 48.1, "learning_rate": 9.766525045977519e-05, "loss": 0.0016, "step": 11256 }, { "epoch": 48.12, "learning_rate": 9.766172945770497e-05, "loss": 0.0009, "step": 11260 }, { "epoch": 48.14, "learning_rate": 9.76582058662146e-05, "loss": 0.0044, "step": 11264 }, { "epoch": 48.15, "learning_rate": 9.765467968549549e-05, "loss": 0.0007, "step": 11268 }, { "epoch": 48.17, "learning_rate": 9.765115091573923e-05, "loss": 0.0008, "step": 11272 }, { "epoch": 48.19, "learning_rate": 9.764761955713752e-05, "loss": 0.0004, "step": 11276 }, { "epoch": 48.21, "learning_rate": 9.764408560988225e-05, "loss": 0.001, "step": 11280 }, { "epoch": 48.22, "learning_rate": 9.764054907416537e-05, "loss": 0.0012, "step": 11284 }, { "epoch": 48.24, "learning_rate": 9.763700995017905e-05, "loss": 0.0007, "step": 11288 }, { "epoch": 48.26, "learning_rate": 9.763346823811555e-05, "loss": 0.0012, "step": 11292 }, { "epoch": 48.27, "learning_rate": 9.76299239381673e-05, "loss": 0.0009, "step": 11296 }, { "epoch": 48.29, "learning_rate": 9.762637705052684e-05, "loss": 0.0007, "step": 11300 }, { "epoch": 48.31, "learning_rate": 9.76228275753869e-05, "loss": 0.0009, "step": 11304 }, { "epoch": 48.32, "learning_rate": 9.76192755129403e-05, "loss": 0.0014, "step": 11308 }, { "epoch": 48.34, "learning_rate": 9.761572086338002e-05, "loss": 0.0006, "step": 11312 }, { "epoch": 48.36, "learning_rate": 9.761216362689918e-05, "loss": 0.0006, "step": 11316 }, { "epoch": 48.38, "learning_rate": 9.760860380369105e-05, "loss": 0.001, "step": 11320 }, { "epoch": 48.39, "learning_rate": 9.760504139394902e-05, "loss": 0.0011, "step": 11324 }, { "epoch": 48.41, "learning_rate": 9.760147639786665e-05, "loss": 0.0001, "step": 11328 }, { "epoch": 48.43, "learning_rate": 9.75979088156376e-05, "loss": 0.0004, "step": 11332 }, { "epoch": 48.44, "learning_rate": 9.759433864745571e-05, "loss": 0.0002, "step": 11336 }, { "epoch": 48.46, "learning_rate": 9.759076589351494e-05, "loss": 0.0007, "step": 11340 }, { "epoch": 48.48, "learning_rate": 9.75871905540094e-05, "loss": 0.0013, "step": 11344 }, { "epoch": 48.5, "learning_rate": 9.758361262913334e-05, "loss": 0.0004, "step": 11348 }, { "epoch": 48.51, "learning_rate": 9.758003211908111e-05, "loss": 0.0007, "step": 11352 }, { "epoch": 48.53, "learning_rate": 9.757644902404728e-05, "loss": 0.0005, "step": 11356 }, { "epoch": 48.55, "learning_rate": 9.75728633442265e-05, "loss": 0.0002, "step": 11360 }, { "epoch": 48.56, "learning_rate": 9.756927507981357e-05, "loss": 0.0011, "step": 11364 }, { "epoch": 48.58, "learning_rate": 9.756568423100344e-05, "loss": 0.0003, "step": 11368 }, { "epoch": 48.6, "learning_rate": 9.756209079799121e-05, "loss": 0.001, "step": 11372 }, { "epoch": 48.62, "learning_rate": 9.75584947809721e-05, "loss": 0.0003, "step": 11376 }, { "epoch": 48.63, "learning_rate": 9.755489618014147e-05, "loss": 0.0002, "step": 11380 }, { "epoch": 48.65, "learning_rate": 9.755129499569482e-05, "loss": 0.0019, "step": 11384 }, { "epoch": 48.67, "learning_rate": 9.754769122782784e-05, "loss": 0.0008, "step": 11388 }, { "epoch": 48.68, "learning_rate": 9.754408487673628e-05, "loss": 0.0002, "step": 11392 }, { "epoch": 48.7, "learning_rate": 9.754047594261609e-05, "loss": 0.0004, "step": 11396 }, { "epoch": 48.72, "learning_rate": 9.753686442566334e-05, "loss": 0.0005, "step": 11400 }, { "epoch": 48.74, "learning_rate": 9.753325032607423e-05, "loss": 0.0001, "step": 11404 }, { "epoch": 48.75, "learning_rate": 9.75296336440451e-05, "loss": 0.0004, "step": 11408 }, { "epoch": 48.77, "learning_rate": 9.752601437977248e-05, "loss": 0.0018, "step": 11412 }, { "epoch": 48.79, "learning_rate": 9.752239253345297e-05, "loss": 0.0003, "step": 11416 }, { "epoch": 48.8, "learning_rate": 9.751876810528335e-05, "loss": 0.001, "step": 11420 }, { "epoch": 48.82, "learning_rate": 9.751514109546054e-05, "loss": 0.0031, "step": 11424 }, { "epoch": 48.84, "learning_rate": 9.751151150418158e-05, "loss": 0.0004, "step": 11428 }, { "epoch": 48.85, "learning_rate": 9.750787933164366e-05, "loss": 0.0027, "step": 11432 }, { "epoch": 48.87, "learning_rate": 9.750424457804412e-05, "loss": 0.0003, "step": 11436 }, { "epoch": 48.89, "learning_rate": 9.750060724358045e-05, "loss": 0.0021, "step": 11440 }, { "epoch": 48.91, "learning_rate": 9.749696732845021e-05, "loss": 0.0033, "step": 11444 }, { "epoch": 48.92, "learning_rate": 9.749332483285123e-05, "loss": 0.0011, "step": 11448 }, { "epoch": 48.94, "learning_rate": 9.748967975698135e-05, "loss": 0.0002, "step": 11452 }, { "epoch": 48.96, "learning_rate": 9.748603210103862e-05, "loss": 0.0012, "step": 11456 }, { "epoch": 48.97, "learning_rate": 9.748238186522119e-05, "loss": 0.0025, "step": 11460 }, { "epoch": 48.99, "learning_rate": 9.747872904972741e-05, "loss": 0.006, "step": 11464 }, { "epoch": 49.01, "learning_rate": 9.747507365475573e-05, "loss": 0.0002, "step": 11468 }, { "epoch": 49.03, "learning_rate": 9.747141568050472e-05, "loss": 0.0004, "step": 11472 }, { "epoch": 49.04, "learning_rate": 9.746775512717312e-05, "loss": 0.0005, "step": 11476 }, { "epoch": 49.06, "learning_rate": 9.746409199495983e-05, "loss": 0.0004, "step": 11480 }, { "epoch": 49.08, "learning_rate": 9.746042628406384e-05, "loss": 0.0016, "step": 11484 }, { "epoch": 49.09, "learning_rate": 9.745675799468432e-05, "loss": 0.0002, "step": 11488 }, { "epoch": 49.11, "learning_rate": 9.745308712702055e-05, "loss": 0.0008, "step": 11492 }, { "epoch": 49.13, "learning_rate": 9.744941368127199e-05, "loss": 0.0002, "step": 11496 }, { "epoch": 49.15, "learning_rate": 9.744573765763816e-05, "loss": 0.0006, "step": 11500 }, { "epoch": 49.16, "learning_rate": 9.744205905631884e-05, "loss": 0.0013, "step": 11504 }, { "epoch": 49.18, "learning_rate": 9.743837787751386e-05, "loss": 0.0009, "step": 11508 }, { "epoch": 49.2, "learning_rate": 9.743469412142321e-05, "loss": 0.0004, "step": 11512 }, { "epoch": 49.21, "learning_rate": 9.743100778824703e-05, "loss": 0.0003, "step": 11516 }, { "epoch": 49.23, "learning_rate": 9.742731887818558e-05, "loss": 0.0003, "step": 11520 }, { "epoch": 49.25, "learning_rate": 9.74236273914393e-05, "loss": 0.0002, "step": 11524 }, { "epoch": 49.26, "learning_rate": 9.741993332820873e-05, "loss": 0.0004, "step": 11528 }, { "epoch": 49.28, "learning_rate": 9.741623668869457e-05, "loss": 0.0002, "step": 11532 }, { "epoch": 49.3, "learning_rate": 9.741253747309767e-05, "loss": 0.0002, "step": 11536 }, { "epoch": 49.32, "learning_rate": 9.740883568161897e-05, "loss": 0.0003, "step": 11540 }, { "epoch": 49.33, "learning_rate": 9.740513131445961e-05, "loss": 0.0007, "step": 11544 }, { "epoch": 49.35, "learning_rate": 9.740142437182084e-05, "loss": 0.0024, "step": 11548 }, { "epoch": 49.37, "learning_rate": 9.739771485390406e-05, "loss": 0.0004, "step": 11552 }, { "epoch": 49.38, "learning_rate": 9.73940027609108e-05, "loss": 0.0014, "step": 11556 }, { "epoch": 49.4, "learning_rate": 9.739028809304274e-05, "loss": 0.001, "step": 11560 }, { "epoch": 49.42, "learning_rate": 9.738657085050167e-05, "loss": 0.0013, "step": 11564 }, { "epoch": 49.44, "learning_rate": 9.738285103348958e-05, "loss": 0.0003, "step": 11568 }, { "epoch": 49.45, "learning_rate": 9.737912864220856e-05, "loss": 0.0017, "step": 11572 }, { "epoch": 49.47, "learning_rate": 9.73754036768608e-05, "loss": 0.0009, "step": 11576 }, { "epoch": 49.49, "learning_rate": 9.737167613764873e-05, "loss": 0.0004, "step": 11580 }, { "epoch": 49.5, "learning_rate": 9.736794602477483e-05, "loss": 0.0008, "step": 11584 }, { "epoch": 49.52, "learning_rate": 9.736421333844177e-05, "loss": 0.0032, "step": 11588 }, { "epoch": 49.54, "learning_rate": 9.736047807885233e-05, "loss": 0.0004, "step": 11592 }, { "epoch": 49.56, "learning_rate": 9.735674024620947e-05, "loss": 0.0011, "step": 11596 }, { "epoch": 49.57, "learning_rate": 9.735299984071621e-05, "loss": 0.0023, "step": 11600 }, { "epoch": 49.59, "learning_rate": 9.734925686257584e-05, "loss": 0.001, "step": 11604 }, { "epoch": 49.61, "learning_rate": 9.734551131199163e-05, "loss": 0.0008, "step": 11608 }, { "epoch": 49.62, "learning_rate": 9.734176318916715e-05, "loss": 0.0002, "step": 11612 }, { "epoch": 49.64, "learning_rate": 9.733801249430596e-05, "loss": 0.0006, "step": 11616 }, { "epoch": 49.66, "learning_rate": 9.73342592276119e-05, "loss": 0.0016, "step": 11620 }, { "epoch": 49.68, "learning_rate": 9.733050338928882e-05, "loss": 0.0007, "step": 11624 }, { "epoch": 49.69, "learning_rate": 9.732674497954082e-05, "loss": 0.0009, "step": 11628 }, { "epoch": 49.71, "learning_rate": 9.732298399857206e-05, "loss": 0.0012, "step": 11632 }, { "epoch": 49.73, "learning_rate": 9.73192204465869e-05, "loss": 0.0019, "step": 11636 }, { "epoch": 49.74, "learning_rate": 9.731545432378977e-05, "loss": 0.0004, "step": 11640 }, { "epoch": 49.76, "learning_rate": 9.73116856303853e-05, "loss": 0.0004, "step": 11644 }, { "epoch": 49.78, "learning_rate": 9.730791436657827e-05, "loss": 0.001, "step": 11648 }, { "epoch": 49.79, "learning_rate": 9.730414053257352e-05, "loss": 0.0002, "step": 11652 }, { "epoch": 49.81, "learning_rate": 9.730036412857611e-05, "loss": 0.0007, "step": 11656 }, { "epoch": 49.83, "learning_rate": 9.72965851547912e-05, "loss": 0.0002, "step": 11660 }, { "epoch": 49.85, "learning_rate": 9.72928036114241e-05, "loss": 0.0021, "step": 11664 }, { "epoch": 49.86, "learning_rate": 9.728901949868024e-05, "loss": 0.0029, "step": 11668 }, { "epoch": 49.88, "learning_rate": 9.728523281676524e-05, "loss": 0.0008, "step": 11672 }, { "epoch": 49.9, "learning_rate": 9.728144356588481e-05, "loss": 0.0002, "step": 11676 }, { "epoch": 49.91, "learning_rate": 9.727765174624482e-05, "loss": 0.0004, "step": 11680 }, { "epoch": 49.93, "learning_rate": 9.727385735805126e-05, "loss": 0.0002, "step": 11684 }, { "epoch": 49.95, "learning_rate": 9.72700604015103e-05, "loss": 0.0006, "step": 11688 }, { "epoch": 49.97, "learning_rate": 9.726626087682821e-05, "loss": 0.0015, "step": 11692 }, { "epoch": 49.98, "learning_rate": 9.726245878421142e-05, "loss": 0.0017, "step": 11696 }, { "epoch": 50.0, "learning_rate": 9.72586541238665e-05, "loss": 0.0007, "step": 11700 }, { "epoch": 50.02, "learning_rate": 9.725484689600013e-05, "loss": 0.0005, "step": 11704 }, { "epoch": 50.03, "learning_rate": 9.72510371008192e-05, "loss": 0.0006, "step": 11708 }, { "epoch": 50.05, "learning_rate": 9.724722473853065e-05, "loss": 0.0004, "step": 11712 }, { "epoch": 50.07, "learning_rate": 9.724340980934162e-05, "loss": 0.0003, "step": 11716 }, { "epoch": 50.09, "learning_rate": 9.723959231345936e-05, "loss": 0.0008, "step": 11720 }, { "epoch": 50.1, "learning_rate": 9.723577225109128e-05, "loss": 0.0009, "step": 11724 }, { "epoch": 50.12, "learning_rate": 9.723194962244493e-05, "loss": 0.0015, "step": 11728 }, { "epoch": 50.14, "learning_rate": 9.722812442772798e-05, "loss": 0.0001, "step": 11732 }, { "epoch": 50.15, "learning_rate": 9.722429666714823e-05, "loss": 0.0045, "step": 11736 }, { "epoch": 50.17, "learning_rate": 9.722046634091367e-05, "loss": 0.0069, "step": 11740 }, { "epoch": 50.19, "learning_rate": 9.72166334492324e-05, "loss": 0.0003, "step": 11744 }, { "epoch": 50.21, "learning_rate": 9.721279799231263e-05, "loss": 0.0005, "step": 11748 }, { "epoch": 50.22, "learning_rate": 9.720895997036275e-05, "loss": 0.0008, "step": 11752 }, { "epoch": 50.24, "learning_rate": 9.72051193835913e-05, "loss": 0.0003, "step": 11756 }, { "epoch": 50.26, "learning_rate": 9.720127623220689e-05, "loss": 0.0002, "step": 11760 }, { "epoch": 50.27, "learning_rate": 9.719743051641834e-05, "loss": 0.0008, "step": 11764 }, { "epoch": 50.29, "learning_rate": 9.719358223643459e-05, "loss": 0.0003, "step": 11768 }, { "epoch": 50.31, "learning_rate": 9.718973139246471e-05, "loss": 0.0004, "step": 11772 }, { "epoch": 50.32, "learning_rate": 9.71858779847179e-05, "loss": 0.0002, "step": 11776 }, { "epoch": 50.34, "learning_rate": 9.718202201340353e-05, "loss": 0.0012, "step": 11780 }, { "epoch": 50.36, "learning_rate": 9.717816347873108e-05, "loss": 0.001, "step": 11784 }, { "epoch": 50.38, "learning_rate": 9.717430238091019e-05, "loss": 0.0002, "step": 11788 }, { "epoch": 50.39, "learning_rate": 9.717043872015061e-05, "loss": 0.0011, "step": 11792 }, { "epoch": 50.41, "learning_rate": 9.716657249666228e-05, "loss": 0.001, "step": 11796 }, { "epoch": 50.43, "learning_rate": 9.716270371065523e-05, "loss": 0.001, "step": 11800 }, { "epoch": 50.44, "learning_rate": 9.715883236233965e-05, "loss": 0.0002, "step": 11804 }, { "epoch": 50.46, "learning_rate": 9.715495845192586e-05, "loss": 0.0004, "step": 11808 }, { "epoch": 50.48, "learning_rate": 9.715108197962435e-05, "loss": 0.0017, "step": 11812 }, { "epoch": 50.5, "learning_rate": 9.714720294564569e-05, "loss": 0.0007, "step": 11816 }, { "epoch": 50.51, "learning_rate": 9.714332135020066e-05, "loss": 0.0006, "step": 11820 }, { "epoch": 50.53, "learning_rate": 9.71394371935001e-05, "loss": 0.0011, "step": 11824 }, { "epoch": 50.55, "learning_rate": 9.713555047575509e-05, "loss": 0.0003, "step": 11828 }, { "epoch": 50.56, "learning_rate": 9.713166119717674e-05, "loss": 0.0006, "step": 11832 }, { "epoch": 50.58, "learning_rate": 9.712776935797638e-05, "loss": 0.0009, "step": 11836 }, { "epoch": 50.6, "learning_rate": 9.712387495836544e-05, "loss": 0.0004, "step": 11840 }, { "epoch": 50.62, "learning_rate": 9.711997799855552e-05, "loss": 0.0002, "step": 11844 }, { "epoch": 50.63, "learning_rate": 9.71160784787583e-05, "loss": 0.0003, "step": 11848 }, { "epoch": 50.65, "learning_rate": 9.711217639918567e-05, "loss": 0.0008, "step": 11852 }, { "epoch": 50.67, "learning_rate": 9.710827176004961e-05, "loss": 0.0006, "step": 11856 }, { "epoch": 50.68, "learning_rate": 9.710436456156226e-05, "loss": 0.0004, "step": 11860 }, { "epoch": 50.7, "learning_rate": 9.71004548039359e-05, "loss": 0.0003, "step": 11864 }, { "epoch": 50.72, "learning_rate": 9.709654248738293e-05, "loss": 0.0018, "step": 11868 }, { "epoch": 50.74, "learning_rate": 9.709262761211592e-05, "loss": 0.0012, "step": 11872 }, { "epoch": 50.75, "learning_rate": 9.708871017834756e-05, "loss": 0.0006, "step": 11876 }, { "epoch": 50.77, "learning_rate": 9.708479018629066e-05, "loss": 0.0007, "step": 11880 }, { "epoch": 50.79, "learning_rate": 9.70808676361582e-05, "loss": 0.0006, "step": 11884 }, { "epoch": 50.8, "learning_rate": 9.707694252816331e-05, "loss": 0.0004, "step": 11888 }, { "epoch": 50.82, "learning_rate": 9.707301486251921e-05, "loss": 0.0002, "step": 11892 }, { "epoch": 50.84, "learning_rate": 9.70690846394393e-05, "loss": 0.0018, "step": 11896 }, { "epoch": 50.85, "learning_rate": 9.70651518591371e-05, "loss": 0.0038, "step": 11900 }, { "epoch": 50.87, "learning_rate": 9.706121652182628e-05, "loss": 0.0006, "step": 11904 }, { "epoch": 50.89, "learning_rate": 9.705727862772064e-05, "loss": 0.0008, "step": 11908 }, { "epoch": 50.91, "learning_rate": 9.705333817703413e-05, "loss": 0.0001, "step": 11912 }, { "epoch": 50.92, "learning_rate": 9.704939516998082e-05, "loss": 0.0004, "step": 11916 }, { "epoch": 50.94, "learning_rate": 9.704544960677492e-05, "loss": 0.0007, "step": 11920 }, { "epoch": 50.96, "learning_rate": 9.704150148763082e-05, "loss": 0.0006, "step": 11924 }, { "epoch": 50.97, "learning_rate": 9.7037550812763e-05, "loss": 0.0005, "step": 11928 }, { "epoch": 50.99, "learning_rate": 9.703359758238609e-05, "loss": 0.0013, "step": 11932 }, { "epoch": 51.01, "learning_rate": 9.702964179671488e-05, "loss": 0.0037, "step": 11936 }, { "epoch": 51.03, "learning_rate": 9.702568345596428e-05, "loss": 0.0006, "step": 11940 }, { "epoch": 51.04, "learning_rate": 9.702172256034933e-05, "loss": 0.004, "step": 11944 }, { "epoch": 51.06, "learning_rate": 9.701775911008524e-05, "loss": 0.0008, "step": 11948 }, { "epoch": 51.08, "learning_rate": 9.701379310538733e-05, "loss": 0.0007, "step": 11952 }, { "epoch": 51.09, "learning_rate": 9.700982454647108e-05, "loss": 0.0003, "step": 11956 }, { "epoch": 51.11, "learning_rate": 9.700585343355208e-05, "loss": 0.0029, "step": 11960 }, { "epoch": 51.13, "learning_rate": 9.70018797668461e-05, "loss": 0.0006, "step": 11964 }, { "epoch": 51.15, "learning_rate": 9.699790354656902e-05, "loss": 0.0013, "step": 11968 }, { "epoch": 51.16, "learning_rate": 9.699392477293686e-05, "loss": 0.0005, "step": 11972 }, { "epoch": 51.18, "learning_rate": 9.698994344616577e-05, "loss": 0.0003, "step": 11976 }, { "epoch": 51.2, "learning_rate": 9.698595956647205e-05, "loss": 0.0008, "step": 11980 }, { "epoch": 51.21, "learning_rate": 9.698197313407218e-05, "loss": 0.0002, "step": 11984 }, { "epoch": 51.23, "learning_rate": 9.697798414918271e-05, "loss": 0.0024, "step": 11988 }, { "epoch": 51.25, "learning_rate": 9.697399261202036e-05, "loss": 0.0003, "step": 11992 }, { "epoch": 51.26, "learning_rate": 9.6969998522802e-05, "loss": 0.0009, "step": 11996 }, { "epoch": 51.28, "learning_rate": 9.696600188174459e-05, "loss": 0.0019, "step": 12000 }, { "epoch": 51.28, "eval_exact_match": 0.5166320166320166, "eval_loss": 0.8132425546646118, "eval_runtime": 140.4988, "eval_samples_per_second": 6.847, "step": 12000 }, { "epoch": 51.3, "learning_rate": 9.696200268906532e-05, "loss": 0.0009, "step": 12004 }, { "epoch": 51.32, "learning_rate": 9.69580009449814e-05, "loss": 0.0016, "step": 12008 }, { "epoch": 51.33, "learning_rate": 9.69539966497103e-05, "loss": 0.0015, "step": 12012 }, { "epoch": 51.35, "learning_rate": 9.694998980346952e-05, "loss": 0.001, "step": 12016 }, { "epoch": 51.37, "learning_rate": 9.694598040647679e-05, "loss": 0.0005, "step": 12020 }, { "epoch": 51.38, "learning_rate": 9.694196845894992e-05, "loss": 0.0003, "step": 12024 }, { "epoch": 51.4, "learning_rate": 9.693795396110686e-05, "loss": 0.0008, "step": 12028 }, { "epoch": 51.42, "learning_rate": 9.693393691316572e-05, "loss": 0.0011, "step": 12032 }, { "epoch": 51.44, "learning_rate": 9.692991731534477e-05, "loss": 0.0025, "step": 12036 }, { "epoch": 51.45, "learning_rate": 9.692589516786237e-05, "loss": 0.0011, "step": 12040 }, { "epoch": 51.47, "learning_rate": 9.692187047093703e-05, "loss": 0.0015, "step": 12044 }, { "epoch": 51.49, "learning_rate": 9.691784322478743e-05, "loss": 0.0006, "step": 12048 }, { "epoch": 51.5, "learning_rate": 9.691381342963235e-05, "loss": 0.0016, "step": 12052 }, { "epoch": 51.52, "learning_rate": 9.690978108569073e-05, "loss": 0.0007, "step": 12056 }, { "epoch": 51.54, "learning_rate": 9.690574619318166e-05, "loss": 0.0009, "step": 12060 }, { "epoch": 51.56, "learning_rate": 9.690170875232431e-05, "loss": 0.0016, "step": 12064 }, { "epoch": 51.57, "learning_rate": 9.689766876333809e-05, "loss": 0.0001, "step": 12068 }, { "epoch": 51.59, "learning_rate": 9.689362622644244e-05, "loss": 0.0005, "step": 12072 }, { "epoch": 51.61, "learning_rate": 9.688958114185702e-05, "loss": 0.0002, "step": 12076 }, { "epoch": 51.62, "learning_rate": 9.688553350980157e-05, "loss": 0.002, "step": 12080 }, { "epoch": 51.64, "learning_rate": 9.6881483330496e-05, "loss": 0.0004, "step": 12084 }, { "epoch": 51.66, "learning_rate": 9.687743060416036e-05, "loss": 0.0014, "step": 12088 }, { "epoch": 51.68, "learning_rate": 9.687337533101484e-05, "loss": 0.0008, "step": 12092 }, { "epoch": 51.69, "learning_rate": 9.686931751127974e-05, "loss": 0.0001, "step": 12096 }, { "epoch": 51.71, "learning_rate": 9.686525714517554e-05, "loss": 0.0004, "step": 12100 }, { "epoch": 51.73, "learning_rate": 9.68611942329228e-05, "loss": 0.0002, "step": 12104 }, { "epoch": 51.74, "learning_rate": 9.685712877474229e-05, "loss": 0.0003, "step": 12108 }, { "epoch": 51.76, "learning_rate": 9.685306077085488e-05, "loss": 0.0014, "step": 12112 }, { "epoch": 51.78, "learning_rate": 9.684899022148154e-05, "loss": 0.0002, "step": 12116 }, { "epoch": 51.79, "learning_rate": 9.684491712684347e-05, "loss": 0.0005, "step": 12120 }, { "epoch": 51.81, "learning_rate": 9.684084148716195e-05, "loss": 0.0002, "step": 12124 }, { "epoch": 51.83, "learning_rate": 9.683676330265838e-05, "loss": 0.0002, "step": 12128 }, { "epoch": 51.85, "learning_rate": 9.683268257355433e-05, "loss": 0.0036, "step": 12132 }, { "epoch": 51.86, "learning_rate": 9.682859930007153e-05, "loss": 0.0003, "step": 12136 }, { "epoch": 51.88, "learning_rate": 9.682451348243178e-05, "loss": 0.0016, "step": 12140 }, { "epoch": 51.9, "learning_rate": 9.68204251208571e-05, "loss": 0.0014, "step": 12144 }, { "epoch": 51.91, "learning_rate": 9.681633421556957e-05, "loss": 0.0015, "step": 12148 }, { "epoch": 51.93, "learning_rate": 9.681224076679147e-05, "loss": 0.0002, "step": 12152 }, { "epoch": 51.95, "learning_rate": 9.680814477474518e-05, "loss": 0.0008, "step": 12156 }, { "epoch": 51.97, "learning_rate": 9.680404623965325e-05, "loss": 0.003, "step": 12160 }, { "epoch": 51.98, "learning_rate": 9.679994516173834e-05, "loss": 0.0016, "step": 12164 }, { "epoch": 52.0, "learning_rate": 9.679584154122323e-05, "loss": 0.0004, "step": 12168 }, { "epoch": 52.02, "learning_rate": 9.679173537833092e-05, "loss": 0.0037, "step": 12172 }, { "epoch": 52.03, "learning_rate": 9.678762667328445e-05, "loss": 0.0006, "step": 12176 }, { "epoch": 52.05, "learning_rate": 9.678351542630706e-05, "loss": 0.0008, "step": 12180 }, { "epoch": 52.07, "learning_rate": 9.677940163762212e-05, "loss": 0.0005, "step": 12184 }, { "epoch": 52.09, "learning_rate": 9.67752853074531e-05, "loss": 0.0003, "step": 12188 }, { "epoch": 52.1, "learning_rate": 9.677116643602367e-05, "loss": 0.0002, "step": 12192 }, { "epoch": 52.12, "learning_rate": 9.676704502355757e-05, "loss": 0.0004, "step": 12196 }, { "epoch": 52.14, "learning_rate": 9.676292107027876e-05, "loss": 0.0009, "step": 12200 }, { "epoch": 52.15, "learning_rate": 9.675879457641124e-05, "loss": 0.0003, "step": 12204 }, { "epoch": 52.17, "learning_rate": 9.675466554217922e-05, "loss": 0.0007, "step": 12208 }, { "epoch": 52.19, "learning_rate": 9.675053396780704e-05, "loss": 0.0003, "step": 12212 }, { "epoch": 52.21, "learning_rate": 9.674639985351915e-05, "loss": 0.0002, "step": 12216 }, { "epoch": 52.22, "learning_rate": 9.674226319954014e-05, "loss": 0.0001, "step": 12220 }, { "epoch": 52.24, "learning_rate": 9.673812400609479e-05, "loss": 0.0002, "step": 12224 }, { "epoch": 52.26, "learning_rate": 9.673398227340794e-05, "loss": 0.0002, "step": 12228 }, { "epoch": 52.27, "learning_rate": 9.672983800170464e-05, "loss": 0.0002, "step": 12232 }, { "epoch": 52.29, "learning_rate": 9.672569119121e-05, "loss": 0.0005, "step": 12236 }, { "epoch": 52.31, "learning_rate": 9.672154184214934e-05, "loss": 0.0017, "step": 12240 }, { "epoch": 52.32, "learning_rate": 9.67173899547481e-05, "loss": 0.0002, "step": 12244 }, { "epoch": 52.34, "learning_rate": 9.671323552923184e-05, "loss": 0.0009, "step": 12248 }, { "epoch": 52.36, "learning_rate": 9.670907856582627e-05, "loss": 0.0002, "step": 12252 }, { "epoch": 52.38, "learning_rate": 9.670491906475722e-05, "loss": 0.0002, "step": 12256 }, { "epoch": 52.39, "learning_rate": 9.670075702625068e-05, "loss": 0.0004, "step": 12260 }, { "epoch": 52.41, "learning_rate": 9.669659245053278e-05, "loss": 0.0017, "step": 12264 }, { "epoch": 52.43, "learning_rate": 9.669242533782974e-05, "loss": 0.0018, "step": 12268 }, { "epoch": 52.44, "learning_rate": 9.668825568836801e-05, "loss": 0.0011, "step": 12272 }, { "epoch": 52.46, "learning_rate": 9.66840835023741e-05, "loss": 0.0002, "step": 12276 }, { "epoch": 52.48, "learning_rate": 9.667990878007466e-05, "loss": 0.0014, "step": 12280 }, { "epoch": 52.5, "learning_rate": 9.667573152169652e-05, "loss": 0.0002, "step": 12284 }, { "epoch": 52.51, "learning_rate": 9.667155172746663e-05, "loss": 0.001, "step": 12288 }, { "epoch": 52.53, "learning_rate": 9.666736939761207e-05, "loss": 0.0007, "step": 12292 }, { "epoch": 52.55, "learning_rate": 9.666318453236006e-05, "loss": 0.0021, "step": 12296 }, { "epoch": 52.56, "learning_rate": 9.665899713193797e-05, "loss": 0.0002, "step": 12300 }, { "epoch": 52.58, "learning_rate": 9.665480719657327e-05, "loss": 0.0011, "step": 12304 }, { "epoch": 52.6, "learning_rate": 9.665061472649364e-05, "loss": 0.0013, "step": 12308 }, { "epoch": 52.62, "learning_rate": 9.66464197219268e-05, "loss": 0.0003, "step": 12312 }, { "epoch": 52.63, "learning_rate": 9.664222218310071e-05, "loss": 0.0006, "step": 12316 }, { "epoch": 52.65, "learning_rate": 9.663802211024337e-05, "loss": 0.0003, "step": 12320 }, { "epoch": 52.67, "learning_rate": 9.663381950358302e-05, "loss": 0.0003, "step": 12324 }, { "epoch": 52.68, "learning_rate": 9.662961436334795e-05, "loss": 0.0006, "step": 12328 }, { "epoch": 52.7, "learning_rate": 9.662540668976662e-05, "loss": 0.0002, "step": 12332 }, { "epoch": 52.72, "learning_rate": 9.662119648306767e-05, "loss": 0.0003, "step": 12336 }, { "epoch": 52.74, "learning_rate": 9.661698374347978e-05, "loss": 0.0009, "step": 12340 }, { "epoch": 52.75, "learning_rate": 9.661276847123184e-05, "loss": 0.0005, "step": 12344 }, { "epoch": 52.77, "learning_rate": 9.66085506665529e-05, "loss": 0.002, "step": 12348 }, { "epoch": 52.79, "learning_rate": 9.660433032967205e-05, "loss": 0.0039, "step": 12352 }, { "epoch": 52.8, "learning_rate": 9.660010746081864e-05, "loss": 0.0002, "step": 12356 }, { "epoch": 52.82, "learning_rate": 9.659588206022203e-05, "loss": 0.0014, "step": 12360 }, { "epoch": 52.84, "learning_rate": 9.659165412811184e-05, "loss": 0.0005, "step": 12364 }, { "epoch": 52.85, "learning_rate": 9.658742366471773e-05, "loss": 0.0002, "step": 12368 }, { "epoch": 52.87, "learning_rate": 9.658319067026958e-05, "loss": 0.0003, "step": 12372 }, { "epoch": 52.89, "learning_rate": 9.657895514499731e-05, "loss": 0.002, "step": 12376 }, { "epoch": 52.91, "learning_rate": 9.657471708913108e-05, "loss": 0.0008, "step": 12380 }, { "epoch": 52.92, "learning_rate": 9.65704765029011e-05, "loss": 0.0003, "step": 12384 }, { "epoch": 52.94, "learning_rate": 9.656623338653778e-05, "loss": 0.0002, "step": 12388 }, { "epoch": 52.96, "learning_rate": 9.656198774027167e-05, "loss": 0.0003, "step": 12392 }, { "epoch": 52.97, "learning_rate": 9.655773956433339e-05, "loss": 0.001, "step": 12396 }, { "epoch": 52.99, "learning_rate": 9.655348885895375e-05, "loss": 0.0004, "step": 12400 }, { "epoch": 53.01, "learning_rate": 9.654923562436368e-05, "loss": 0.0008, "step": 12404 }, { "epoch": 53.03, "learning_rate": 9.654497986079427e-05, "loss": 0.0006, "step": 12408 }, { "epoch": 53.04, "learning_rate": 9.654072156847673e-05, "loss": 0.0003, "step": 12412 }, { "epoch": 53.06, "learning_rate": 9.653646074764242e-05, "loss": 0.0026, "step": 12416 }, { "epoch": 53.08, "learning_rate": 9.653219739852281e-05, "loss": 0.0017, "step": 12420 }, { "epoch": 53.09, "learning_rate": 9.652793152134951e-05, "loss": 0.0005, "step": 12424 }, { "epoch": 53.11, "learning_rate": 9.652366311635432e-05, "loss": 0.0005, "step": 12428 }, { "epoch": 53.13, "learning_rate": 9.65193921837691e-05, "loss": 0.0003, "step": 12432 }, { "epoch": 53.15, "learning_rate": 9.651511872382593e-05, "loss": 0.0009, "step": 12436 }, { "epoch": 53.16, "learning_rate": 9.651084273675695e-05, "loss": 0.0003, "step": 12440 }, { "epoch": 53.18, "learning_rate": 9.650656422279446e-05, "loss": 0.0009, "step": 12444 }, { "epoch": 53.2, "learning_rate": 9.650228318217094e-05, "loss": 0.001, "step": 12448 }, { "epoch": 53.21, "learning_rate": 9.649799961511895e-05, "loss": 0.0011, "step": 12452 }, { "epoch": 53.23, "learning_rate": 9.649371352187123e-05, "loss": 0.0002, "step": 12456 }, { "epoch": 53.25, "learning_rate": 9.648942490266064e-05, "loss": 0.0004, "step": 12460 }, { "epoch": 53.26, "learning_rate": 9.648513375772016e-05, "loss": 0.0004, "step": 12464 }, { "epoch": 53.28, "learning_rate": 9.648084008728293e-05, "loss": 0.0008, "step": 12468 }, { "epoch": 53.3, "learning_rate": 9.647654389158223e-05, "loss": 0.0003, "step": 12472 }, { "epoch": 53.32, "learning_rate": 9.647224517085147e-05, "loss": 0.0012, "step": 12476 }, { "epoch": 53.33, "learning_rate": 9.64679439253242e-05, "loss": 0.0001, "step": 12480 }, { "epoch": 53.35, "learning_rate": 9.646364015523408e-05, "loss": 0.001, "step": 12484 }, { "epoch": 53.37, "learning_rate": 9.645933386081494e-05, "loss": 0.0009, "step": 12488 }, { "epoch": 53.38, "learning_rate": 9.645502504230075e-05, "loss": 0.0007, "step": 12492 }, { "epoch": 53.4, "learning_rate": 9.645071369992559e-05, "loss": 0.0005, "step": 12496 }, { "epoch": 53.42, "learning_rate": 9.644639983392369e-05, "loss": 0.0004, "step": 12500 }, { "epoch": 53.44, "learning_rate": 9.644208344452943e-05, "loss": 0.0002, "step": 12504 }, { "epoch": 53.45, "learning_rate": 9.64377645319773e-05, "loss": 0.0002, "step": 12508 }, { "epoch": 53.47, "learning_rate": 9.643344309650197e-05, "loss": 0.0003, "step": 12512 }, { "epoch": 53.49, "learning_rate": 9.642911913833821e-05, "loss": 0.0004, "step": 12516 }, { "epoch": 53.5, "learning_rate": 9.642479265772091e-05, "loss": 0.0001, "step": 12520 }, { "epoch": 53.52, "learning_rate": 9.642046365488516e-05, "loss": 0.0009, "step": 12524 }, { "epoch": 53.54, "learning_rate": 9.64161321300661e-05, "loss": 0.0019, "step": 12528 }, { "epoch": 53.56, "learning_rate": 9.641179808349913e-05, "loss": 0.0002, "step": 12532 }, { "epoch": 53.57, "learning_rate": 9.640746151541966e-05, "loss": 0.0009, "step": 12536 }, { "epoch": 53.59, "learning_rate": 9.640312242606332e-05, "loss": 0.0007, "step": 12540 }, { "epoch": 53.61, "learning_rate": 9.639878081566582e-05, "loss": 0.0006, "step": 12544 }, { "epoch": 53.62, "learning_rate": 9.639443668446307e-05, "loss": 0.0004, "step": 12548 }, { "epoch": 53.64, "learning_rate": 9.639009003269108e-05, "loss": 0.0008, "step": 12552 }, { "epoch": 53.66, "learning_rate": 9.638574086058597e-05, "loss": 0.0005, "step": 12556 }, { "epoch": 53.68, "learning_rate": 9.638138916838404e-05, "loss": 0.0005, "step": 12560 }, { "epoch": 53.69, "learning_rate": 9.637703495632171e-05, "loss": 0.0003, "step": 12564 }, { "epoch": 53.71, "learning_rate": 9.637267822463556e-05, "loss": 0.0019, "step": 12568 }, { "epoch": 53.73, "learning_rate": 9.63683189735623e-05, "loss": 0.002, "step": 12572 }, { "epoch": 53.74, "learning_rate": 9.63639572033387e-05, "loss": 0.0005, "step": 12576 }, { "epoch": 53.76, "learning_rate": 9.635959291420178e-05, "loss": 0.0002, "step": 12580 }, { "epoch": 53.78, "learning_rate": 9.635522610638865e-05, "loss": 0.0006, "step": 12584 }, { "epoch": 53.79, "learning_rate": 9.635085678013653e-05, "loss": 0.0015, "step": 12588 }, { "epoch": 53.81, "learning_rate": 9.634648493568284e-05, "loss": 0.001, "step": 12592 }, { "epoch": 53.83, "learning_rate": 9.634211057326506e-05, "loss": 0.0006, "step": 12596 }, { "epoch": 53.85, "learning_rate": 9.633773369312085e-05, "loss": 0.0018, "step": 12600 }, { "epoch": 53.86, "learning_rate": 9.633335429548801e-05, "loss": 0.0001, "step": 12604 }, { "epoch": 53.88, "learning_rate": 9.632897238060448e-05, "loss": 0.0004, "step": 12608 }, { "epoch": 53.9, "learning_rate": 9.632458794870831e-05, "loss": 0.0008, "step": 12612 }, { "epoch": 53.91, "learning_rate": 9.632020100003771e-05, "loss": 0.0008, "step": 12616 }, { "epoch": 53.93, "learning_rate": 9.631581153483102e-05, "loss": 0.0005, "step": 12620 }, { "epoch": 53.95, "learning_rate": 9.631141955332673e-05, "loss": 0.0011, "step": 12624 }, { "epoch": 53.97, "learning_rate": 9.630702505576341e-05, "loss": 0.0003, "step": 12628 }, { "epoch": 53.98, "learning_rate": 9.630262804237984e-05, "loss": 0.0006, "step": 12632 }, { "epoch": 54.0, "learning_rate": 9.62982285134149e-05, "loss": 0.0003, "step": 12636 }, { "epoch": 54.02, "learning_rate": 9.629382646910762e-05, "loss": 0.0003, "step": 12640 }, { "epoch": 54.03, "learning_rate": 9.628942190969715e-05, "loss": 0.0023, "step": 12644 }, { "epoch": 54.05, "learning_rate": 9.628501483542278e-05, "loss": 0.0008, "step": 12648 }, { "epoch": 54.07, "learning_rate": 9.628060524652395e-05, "loss": 0.0004, "step": 12652 }, { "epoch": 54.09, "learning_rate": 9.627619314324025e-05, "loss": 0.0001, "step": 12656 }, { "epoch": 54.1, "learning_rate": 9.627177852581135e-05, "loss": 0.0003, "step": 12660 }, { "epoch": 54.12, "learning_rate": 9.626736139447711e-05, "loss": 0.0003, "step": 12664 }, { "epoch": 54.14, "learning_rate": 9.62629417494775e-05, "loss": 0.0002, "step": 12668 }, { "epoch": 54.15, "learning_rate": 9.625851959105265e-05, "loss": 0.0006, "step": 12672 }, { "epoch": 54.17, "learning_rate": 9.625409491944282e-05, "loss": 0.0021, "step": 12676 }, { "epoch": 54.19, "learning_rate": 9.624966773488834e-05, "loss": 0.0002, "step": 12680 }, { "epoch": 54.21, "learning_rate": 9.624523803762982e-05, "loss": 0.0002, "step": 12684 }, { "epoch": 54.22, "learning_rate": 9.624080582790786e-05, "loss": 0.0003, "step": 12688 }, { "epoch": 54.24, "learning_rate": 9.623637110596328e-05, "loss": 0.0006, "step": 12692 }, { "epoch": 54.26, "learning_rate": 9.623193387203701e-05, "loss": 0.0001, "step": 12696 }, { "epoch": 54.27, "learning_rate": 9.622749412637014e-05, "loss": 0.0001, "step": 12700 }, { "epoch": 54.29, "learning_rate": 9.622305186920384e-05, "loss": 0.0003, "step": 12704 }, { "epoch": 54.31, "learning_rate": 9.621860710077949e-05, "loss": 0.0013, "step": 12708 }, { "epoch": 54.32, "learning_rate": 9.621415982133855e-05, "loss": 0.0003, "step": 12712 }, { "epoch": 54.34, "learning_rate": 9.620971003112263e-05, "loss": 0.0003, "step": 12716 }, { "epoch": 54.36, "learning_rate": 9.62052577303735e-05, "loss": 0.0001, "step": 12720 }, { "epoch": 54.38, "learning_rate": 9.620080291933307e-05, "loss": 0.0009, "step": 12724 }, { "epoch": 54.39, "learning_rate": 9.619634559824331e-05, "loss": 0.0008, "step": 12728 }, { "epoch": 54.41, "learning_rate": 9.619188576734642e-05, "loss": 0.0001, "step": 12732 }, { "epoch": 54.43, "learning_rate": 9.618742342688469e-05, "loss": 0.0011, "step": 12736 }, { "epoch": 54.44, "learning_rate": 9.618295857710057e-05, "loss": 0.002, "step": 12740 }, { "epoch": 54.46, "learning_rate": 9.61784912182366e-05, "loss": 0.0002, "step": 12744 }, { "epoch": 54.48, "learning_rate": 9.617402135053552e-05, "loss": 0.0001, "step": 12748 }, { "epoch": 54.5, "learning_rate": 9.616954897424015e-05, "loss": 0.0009, "step": 12752 }, { "epoch": 54.51, "learning_rate": 9.61650740895935e-05, "loss": 0.0002, "step": 12756 }, { "epoch": 54.53, "learning_rate": 9.616059669683864e-05, "loss": 0.0002, "step": 12760 }, { "epoch": 54.55, "learning_rate": 9.615611679621887e-05, "loss": 0.0007, "step": 12764 }, { "epoch": 54.56, "learning_rate": 9.615163438797756e-05, "loss": 0.0003, "step": 12768 }, { "epoch": 54.58, "learning_rate": 9.614714947235821e-05, "loss": 0.0008, "step": 12772 }, { "epoch": 54.6, "learning_rate": 9.614266204960453e-05, "loss": 0.0005, "step": 12776 }, { "epoch": 54.62, "learning_rate": 9.613817211996027e-05, "loss": 0.0013, "step": 12780 }, { "epoch": 54.63, "learning_rate": 9.613367968366941e-05, "loss": 0.0002, "step": 12784 }, { "epoch": 54.65, "learning_rate": 9.612918474097599e-05, "loss": 0.0007, "step": 12788 }, { "epoch": 54.67, "learning_rate": 9.612468729212423e-05, "loss": 0.0006, "step": 12792 }, { "epoch": 54.68, "learning_rate": 9.612018733735844e-05, "loss": 0.0001, "step": 12796 }, { "epoch": 54.7, "learning_rate": 9.611568487692316e-05, "loss": 0.0004, "step": 12800 }, { "epoch": 54.72, "learning_rate": 9.611117991106295e-05, "loss": 0.0002, "step": 12804 }, { "epoch": 54.74, "learning_rate": 9.610667244002259e-05, "loss": 0.0006, "step": 12808 }, { "epoch": 54.75, "learning_rate": 9.610216246404694e-05, "loss": 0.0022, "step": 12812 }, { "epoch": 54.77, "learning_rate": 9.609764998338105e-05, "loss": 0.0012, "step": 12816 }, { "epoch": 54.79, "learning_rate": 9.609313499827008e-05, "loss": 0.0018, "step": 12820 }, { "epoch": 54.8, "learning_rate": 9.60886175089593e-05, "loss": 0.0006, "step": 12824 }, { "epoch": 54.82, "learning_rate": 9.608409751569417e-05, "loss": 0.0012, "step": 12828 }, { "epoch": 54.84, "learning_rate": 9.607957501872022e-05, "loss": 0.0001, "step": 12832 }, { "epoch": 54.85, "learning_rate": 9.607505001828321e-05, "loss": 0.0002, "step": 12836 }, { "epoch": 54.87, "learning_rate": 9.607052251462892e-05, "loss": 0.0004, "step": 12840 }, { "epoch": 54.89, "learning_rate": 9.606599250800336e-05, "loss": 0.0002, "step": 12844 }, { "epoch": 54.91, "learning_rate": 9.606145999865264e-05, "loss": 0.0028, "step": 12848 }, { "epoch": 54.92, "learning_rate": 9.6056924986823e-05, "loss": 0.0002, "step": 12852 }, { "epoch": 54.94, "learning_rate": 9.60523874727608e-05, "loss": 0.0002, "step": 12856 }, { "epoch": 54.96, "learning_rate": 9.60478474567126e-05, "loss": 0.0014, "step": 12860 }, { "epoch": 54.97, "learning_rate": 9.604330493892504e-05, "loss": 0.0003, "step": 12864 }, { "epoch": 54.99, "learning_rate": 9.60387599196449e-05, "loss": 0.0014, "step": 12868 }, { "epoch": 55.01, "learning_rate": 9.603421239911912e-05, "loss": 0.0002, "step": 12872 }, { "epoch": 55.03, "learning_rate": 9.602966237759475e-05, "loss": 0.0001, "step": 12876 }, { "epoch": 55.04, "learning_rate": 9.6025109855319e-05, "loss": 0.0002, "step": 12880 }, { "epoch": 55.06, "learning_rate": 9.602055483253923e-05, "loss": 0.0002, "step": 12884 }, { "epoch": 55.08, "learning_rate": 9.601599730950284e-05, "loss": 0.0005, "step": 12888 }, { "epoch": 55.09, "learning_rate": 9.601143728645749e-05, "loss": 0.0013, "step": 12892 }, { "epoch": 55.11, "learning_rate": 9.600687476365092e-05, "loss": 0.0005, "step": 12896 }, { "epoch": 55.13, "learning_rate": 9.600230974133099e-05, "loss": 0.0002, "step": 12900 }, { "epoch": 55.15, "learning_rate": 9.599774221974573e-05, "loss": 0.0003, "step": 12904 }, { "epoch": 55.16, "learning_rate": 9.599317219914328e-05, "loss": 0.0005, "step": 12908 }, { "epoch": 55.18, "learning_rate": 9.598859967977193e-05, "loss": 0.002, "step": 12912 }, { "epoch": 55.2, "learning_rate": 9.598402466188009e-05, "loss": 0.0008, "step": 12916 }, { "epoch": 55.21, "learning_rate": 9.597944714571631e-05, "loss": 0.0002, "step": 12920 }, { "epoch": 55.23, "learning_rate": 9.597486713152933e-05, "loss": 0.0002, "step": 12924 }, { "epoch": 55.25, "learning_rate": 9.597028461956792e-05, "loss": 0.0011, "step": 12928 }, { "epoch": 55.26, "learning_rate": 9.596569961008108e-05, "loss": 0.0002, "step": 12932 }, { "epoch": 55.28, "learning_rate": 9.596111210331788e-05, "loss": 0.0002, "step": 12936 }, { "epoch": 55.3, "learning_rate": 9.59565220995276e-05, "loss": 0.0016, "step": 12940 }, { "epoch": 55.32, "learning_rate": 9.595192959895958e-05, "loss": 0.0009, "step": 12944 }, { "epoch": 55.33, "learning_rate": 9.594733460186331e-05, "loss": 0.0012, "step": 12948 }, { "epoch": 55.35, "learning_rate": 9.594273710848846e-05, "loss": 0.0018, "step": 12952 }, { "epoch": 55.37, "learning_rate": 9.593813711908482e-05, "loss": 0.001, "step": 12956 }, { "epoch": 55.38, "learning_rate": 9.593353463390227e-05, "loss": 0.0003, "step": 12960 }, { "epoch": 55.4, "learning_rate": 9.592892965319088e-05, "loss": 0.0011, "step": 12964 }, { "epoch": 55.42, "learning_rate": 9.592432217720082e-05, "loss": 0.0001, "step": 12968 }, { "epoch": 55.44, "learning_rate": 9.59197122061824e-05, "loss": 0.0009, "step": 12972 }, { "epoch": 55.45, "learning_rate": 9.591509974038612e-05, "loss": 0.0019, "step": 12976 }, { "epoch": 55.47, "learning_rate": 9.591048478006255e-05, "loss": 0.001, "step": 12980 }, { "epoch": 55.49, "learning_rate": 9.590586732546241e-05, "loss": 0.0003, "step": 12984 }, { "epoch": 55.5, "learning_rate": 9.590124737683655e-05, "loss": 0.0017, "step": 12988 }, { "epoch": 55.52, "learning_rate": 9.589662493443599e-05, "loss": 0.0005, "step": 12992 }, { "epoch": 55.54, "learning_rate": 9.589199999851183e-05, "loss": 0.0007, "step": 12996 }, { "epoch": 55.56, "learning_rate": 9.58873725693154e-05, "loss": 0.0002, "step": 13000 }, { "epoch": 55.56, "eval_exact_match": 0.5145530145530145, "eval_loss": 0.8539115190505981, "eval_runtime": 141.8733, "eval_samples_per_second": 6.781, "step": 13000 }, { "epoch": 55.57, "learning_rate": 9.588274264709806e-05, "loss": 0.0008, "step": 13004 }, { "epoch": 55.59, "learning_rate": 9.587811023211135e-05, "loss": 0.0002, "step": 13008 }, { "epoch": 55.61, "learning_rate": 9.587347532460695e-05, "loss": 0.0002, "step": 13012 }, { "epoch": 55.62, "learning_rate": 9.586883792483668e-05, "loss": 0.0003, "step": 13016 }, { "epoch": 55.64, "learning_rate": 9.586419803305248e-05, "loss": 0.0012, "step": 13020 }, { "epoch": 55.66, "learning_rate": 9.585955564950643e-05, "loss": 0.0015, "step": 13024 }, { "epoch": 55.68, "learning_rate": 9.585491077445075e-05, "loss": 0.0005, "step": 13028 }, { "epoch": 55.69, "learning_rate": 9.585026340813776e-05, "loss": 0.0013, "step": 13032 }, { "epoch": 55.71, "learning_rate": 9.584561355082001e-05, "loss": 0.0002, "step": 13036 }, { "epoch": 55.73, "learning_rate": 9.584096120275007e-05, "loss": 0.001, "step": 13040 }, { "epoch": 55.74, "learning_rate": 9.583630636418074e-05, "loss": 0.0005, "step": 13044 }, { "epoch": 55.76, "learning_rate": 9.583164903536488e-05, "loss": 0.0005, "step": 13048 }, { "epoch": 55.78, "learning_rate": 9.582698921655551e-05, "loss": 0.0014, "step": 13052 }, { "epoch": 55.79, "learning_rate": 9.582232690800584e-05, "loss": 0.0001, "step": 13056 }, { "epoch": 55.81, "learning_rate": 9.581766210996911e-05, "loss": 0.0007, "step": 13060 }, { "epoch": 55.83, "learning_rate": 9.581299482269882e-05, "loss": 0.0001, "step": 13064 }, { "epoch": 55.85, "learning_rate": 9.58083250464485e-05, "loss": 0.0005, "step": 13068 }, { "epoch": 55.86, "learning_rate": 9.580365278147185e-05, "loss": 0.0008, "step": 13072 }, { "epoch": 55.88, "learning_rate": 9.579897802802273e-05, "loss": 0.0006, "step": 13076 }, { "epoch": 55.9, "learning_rate": 9.579430078635509e-05, "loss": 0.0004, "step": 13080 }, { "epoch": 55.91, "learning_rate": 9.578962105672307e-05, "loss": 0.0019, "step": 13084 }, { "epoch": 55.93, "learning_rate": 9.57849388393809e-05, "loss": 0.0005, "step": 13088 }, { "epoch": 55.95, "learning_rate": 9.578025413458296e-05, "loss": 0.0002, "step": 13092 }, { "epoch": 55.97, "learning_rate": 9.577556694258378e-05, "loss": 0.0002, "step": 13096 }, { "epoch": 55.98, "learning_rate": 9.577087726363798e-05, "loss": 0.0028, "step": 13100 }, { "epoch": 56.0, "learning_rate": 9.57661850980004e-05, "loss": 0.0002, "step": 13104 }, { "epoch": 56.02, "learning_rate": 9.576149044592589e-05, "loss": 0.0006, "step": 13108 }, { "epoch": 56.03, "learning_rate": 9.575679330766955e-05, "loss": 0.0017, "step": 13112 }, { "epoch": 56.05, "learning_rate": 9.575209368348659e-05, "loss": 0.0006, "step": 13116 }, { "epoch": 56.07, "learning_rate": 9.57473915736323e-05, "loss": 0.0005, "step": 13120 }, { "epoch": 56.09, "learning_rate": 9.574268697836216e-05, "loss": 0.0003, "step": 13124 }, { "epoch": 56.1, "learning_rate": 9.573797989793174e-05, "loss": 0.0005, "step": 13128 }, { "epoch": 56.12, "learning_rate": 9.57332703325968e-05, "loss": 0.0001, "step": 13132 }, { "epoch": 56.14, "learning_rate": 9.57285582826132e-05, "loss": 0.0011, "step": 13136 }, { "epoch": 56.15, "learning_rate": 9.572384374823695e-05, "loss": 0.0011, "step": 13140 }, { "epoch": 56.17, "learning_rate": 9.571912672972416e-05, "loss": 0.0019, "step": 13144 }, { "epoch": 56.19, "learning_rate": 9.571440722733112e-05, "loss": 0.0002, "step": 13148 }, { "epoch": 56.21, "learning_rate": 9.570968524131423e-05, "loss": 0.002, "step": 13152 }, { "epoch": 56.22, "learning_rate": 9.570496077193005e-05, "loss": 0.0003, "step": 13156 }, { "epoch": 56.24, "learning_rate": 9.570023381943523e-05, "loss": 0.001, "step": 13160 }, { "epoch": 56.26, "learning_rate": 9.56955043840866e-05, "loss": 0.0004, "step": 13164 }, { "epoch": 56.27, "learning_rate": 9.56907724661411e-05, "loss": 0.0009, "step": 13168 }, { "epoch": 56.29, "learning_rate": 9.568603806585581e-05, "loss": 0.0002, "step": 13172 }, { "epoch": 56.31, "learning_rate": 9.568130118348793e-05, "loss": 0.0009, "step": 13176 }, { "epoch": 56.32, "learning_rate": 9.567656181929485e-05, "loss": 0.0005, "step": 13180 }, { "epoch": 56.34, "learning_rate": 9.567181997353403e-05, "loss": 0.0015, "step": 13184 }, { "epoch": 56.36, "learning_rate": 9.566707564646307e-05, "loss": 0.0003, "step": 13188 }, { "epoch": 56.38, "learning_rate": 9.566232883833977e-05, "loss": 0.0001, "step": 13192 }, { "epoch": 56.39, "learning_rate": 9.565757954942198e-05, "loss": 0.0012, "step": 13196 }, { "epoch": 56.41, "learning_rate": 9.565282777996775e-05, "loss": 0.0071, "step": 13200 }, { "epoch": 56.43, "learning_rate": 9.564807353023523e-05, "loss": 0.0007, "step": 13204 }, { "epoch": 56.44, "learning_rate": 9.564331680048273e-05, "loss": 0.0008, "step": 13208 }, { "epoch": 56.46, "learning_rate": 9.563855759096864e-05, "loss": 0.0009, "step": 13212 }, { "epoch": 56.48, "learning_rate": 9.563379590195157e-05, "loss": 0.0002, "step": 13216 }, { "epoch": 56.5, "learning_rate": 9.56290317336902e-05, "loss": 0.001, "step": 13220 }, { "epoch": 56.51, "learning_rate": 9.562426508644336e-05, "loss": 0.0009, "step": 13224 }, { "epoch": 56.53, "learning_rate": 9.561949596046999e-05, "loss": 0.0003, "step": 13228 }, { "epoch": 56.55, "learning_rate": 9.561472435602924e-05, "loss": 0.0023, "step": 13232 }, { "epoch": 56.56, "learning_rate": 9.560995027338033e-05, "loss": 0.0003, "step": 13236 }, { "epoch": 56.58, "learning_rate": 9.560517371278262e-05, "loss": 0.0014, "step": 13240 }, { "epoch": 56.6, "learning_rate": 9.560039467449563e-05, "loss": 0.0001, "step": 13244 }, { "epoch": 56.62, "learning_rate": 9.5595613158779e-05, "loss": 0.0003, "step": 13248 }, { "epoch": 56.63, "learning_rate": 9.55908291658925e-05, "loss": 0.0009, "step": 13252 }, { "epoch": 56.65, "learning_rate": 9.558604269609604e-05, "loss": 0.0009, "step": 13256 }, { "epoch": 56.67, "learning_rate": 9.558125374964967e-05, "loss": 0.001, "step": 13260 }, { "epoch": 56.68, "learning_rate": 9.557646232681356e-05, "loss": 0.0001, "step": 13264 }, { "epoch": 56.7, "learning_rate": 9.557166842784804e-05, "loss": 0.0002, "step": 13268 }, { "epoch": 56.72, "learning_rate": 9.556687205301354e-05, "loss": 0.0011, "step": 13272 }, { "epoch": 56.74, "learning_rate": 9.556207320257065e-05, "loss": 0.0011, "step": 13276 }, { "epoch": 56.75, "learning_rate": 9.55572718767801e-05, "loss": 0.0009, "step": 13280 }, { "epoch": 56.77, "learning_rate": 9.555246807590273e-05, "loss": 0.0006, "step": 13284 }, { "epoch": 56.79, "learning_rate": 9.554766180019952e-05, "loss": 0.0018, "step": 13288 }, { "epoch": 56.8, "learning_rate": 9.55428530499316e-05, "loss": 0.0004, "step": 13292 }, { "epoch": 56.82, "learning_rate": 9.553804182536021e-05, "loss": 0.0015, "step": 13296 }, { "epoch": 56.84, "learning_rate": 9.553322812674676e-05, "loss": 0.0004, "step": 13300 }, { "epoch": 56.85, "learning_rate": 9.552841195435278e-05, "loss": 0.0004, "step": 13304 }, { "epoch": 56.87, "learning_rate": 9.552359330843991e-05, "loss": 0.0004, "step": 13308 }, { "epoch": 56.89, "learning_rate": 9.551877218926994e-05, "loss": 0.0014, "step": 13312 }, { "epoch": 56.91, "learning_rate": 9.55139485971048e-05, "loss": 0.0002, "step": 13316 }, { "epoch": 56.92, "learning_rate": 9.550912253220656e-05, "loss": 0.0017, "step": 13320 }, { "epoch": 56.94, "learning_rate": 9.550429399483743e-05, "loss": 0.0005, "step": 13324 }, { "epoch": 56.96, "learning_rate": 9.549946298525971e-05, "loss": 0.0004, "step": 13328 }, { "epoch": 56.97, "learning_rate": 9.549462950373587e-05, "loss": 0.001, "step": 13332 }, { "epoch": 56.99, "learning_rate": 9.548979355052853e-05, "loss": 0.0013, "step": 13336 }, { "epoch": 57.01, "learning_rate": 9.54849551259004e-05, "loss": 0.0002, "step": 13340 }, { "epoch": 57.03, "learning_rate": 9.548011423011435e-05, "loss": 0.0002, "step": 13344 }, { "epoch": 57.04, "learning_rate": 9.54752708634334e-05, "loss": 0.0002, "step": 13348 }, { "epoch": 57.06, "learning_rate": 9.547042502612067e-05, "loss": 0.0004, "step": 13352 }, { "epoch": 57.08, "learning_rate": 9.546557671843944e-05, "loss": 0.0008, "step": 13356 }, { "epoch": 57.09, "learning_rate": 9.54607259406531e-05, "loss": 0.001, "step": 13360 }, { "epoch": 57.11, "learning_rate": 9.545587269302521e-05, "loss": 0.0007, "step": 13364 }, { "epoch": 57.13, "learning_rate": 9.545101697581942e-05, "loss": 0.0003, "step": 13368 }, { "epoch": 57.15, "learning_rate": 9.544615878929955e-05, "loss": 0.0014, "step": 13372 }, { "epoch": 57.16, "learning_rate": 9.544129813372953e-05, "loss": 0.0009, "step": 13376 }, { "epoch": 57.18, "learning_rate": 9.543643500937345e-05, "loss": 0.0004, "step": 13380 }, { "epoch": 57.2, "learning_rate": 9.54315694164955e-05, "loss": 0.0001, "step": 13384 }, { "epoch": 57.21, "learning_rate": 9.542670135536005e-05, "loss": 0.0054, "step": 13388 }, { "epoch": 57.23, "learning_rate": 9.542183082623155e-05, "loss": 0.0005, "step": 13392 }, { "epoch": 57.25, "learning_rate": 9.541695782937463e-05, "loss": 0.0027, "step": 13396 }, { "epoch": 57.26, "learning_rate": 9.541208236505403e-05, "loss": 0.0008, "step": 13400 }, { "epoch": 57.28, "learning_rate": 9.540720443353464e-05, "loss": 0.001, "step": 13404 }, { "epoch": 57.3, "learning_rate": 9.540232403508146e-05, "loss": 0.0014, "step": 13408 }, { "epoch": 57.32, "learning_rate": 9.539744116995964e-05, "loss": 0.0016, "step": 13412 }, { "epoch": 57.33, "learning_rate": 9.539255583843445e-05, "loss": 0.0004, "step": 13416 }, { "epoch": 57.35, "learning_rate": 9.538766804077134e-05, "loss": 0.0004, "step": 13420 }, { "epoch": 57.37, "learning_rate": 9.538277777723583e-05, "loss": 0.0001, "step": 13424 }, { "epoch": 57.38, "learning_rate": 9.53778850480936e-05, "loss": 0.0002, "step": 13428 }, { "epoch": 57.4, "learning_rate": 9.537298985361049e-05, "loss": 0.0002, "step": 13432 }, { "epoch": 57.42, "learning_rate": 9.536809219405245e-05, "loss": 0.0015, "step": 13436 }, { "epoch": 57.44, "learning_rate": 9.536319206968556e-05, "loss": 0.0014, "step": 13440 }, { "epoch": 57.45, "learning_rate": 9.535828948077602e-05, "loss": 0.0006, "step": 13444 }, { "epoch": 57.47, "learning_rate": 9.53533844275902e-05, "loss": 0.0005, "step": 13448 }, { "epoch": 57.49, "learning_rate": 9.534847691039461e-05, "loss": 0.0007, "step": 13452 }, { "epoch": 57.5, "learning_rate": 9.534356692945583e-05, "loss": 0.0002, "step": 13456 }, { "epoch": 57.52, "learning_rate": 9.533865448504064e-05, "loss": 0.0002, "step": 13460 }, { "epoch": 57.54, "learning_rate": 9.533373957741594e-05, "loss": 0.0012, "step": 13464 }, { "epoch": 57.56, "learning_rate": 9.532882220684871e-05, "loss": 0.0016, "step": 13468 }, { "epoch": 57.57, "learning_rate": 9.532390237360613e-05, "loss": 0.0004, "step": 13472 }, { "epoch": 57.59, "learning_rate": 9.531898007795552e-05, "loss": 0.0001, "step": 13476 }, { "epoch": 57.61, "learning_rate": 9.531405532016424e-05, "loss": 0.0002, "step": 13480 }, { "epoch": 57.62, "learning_rate": 9.53091281004999e-05, "loss": 0.0002, "step": 13484 }, { "epoch": 57.64, "learning_rate": 9.530419841923018e-05, "loss": 0.0002, "step": 13488 }, { "epoch": 57.66, "learning_rate": 9.52992662766229e-05, "loss": 0.001, "step": 13492 }, { "epoch": 57.68, "learning_rate": 9.529433167294601e-05, "loss": 0.0024, "step": 13496 }, { "epoch": 57.69, "learning_rate": 9.528939460846764e-05, "loss": 0.0004, "step": 13500 }, { "epoch": 57.71, "learning_rate": 9.528445508345596e-05, "loss": 0.0003, "step": 13504 }, { "epoch": 57.73, "learning_rate": 9.527951309817936e-05, "loss": 0.0004, "step": 13508 }, { "epoch": 57.74, "learning_rate": 9.527456865290633e-05, "loss": 0.0018, "step": 13512 }, { "epoch": 57.76, "learning_rate": 9.526962174790552e-05, "loss": 0.0003, "step": 13516 }, { "epoch": 57.78, "learning_rate": 9.526467238344566e-05, "loss": 0.0001, "step": 13520 }, { "epoch": 57.79, "learning_rate": 9.525972055979566e-05, "loss": 0.0003, "step": 13524 }, { "epoch": 57.81, "learning_rate": 9.525476627722454e-05, "loss": 0.0001, "step": 13528 }, { "epoch": 57.83, "learning_rate": 9.524980953600146e-05, "loss": 0.0003, "step": 13532 }, { "epoch": 57.85, "learning_rate": 9.524485033639574e-05, "loss": 0.0043, "step": 13536 }, { "epoch": 57.86, "learning_rate": 9.523988867867676e-05, "loss": 0.0001, "step": 13540 }, { "epoch": 57.88, "learning_rate": 9.523492456311414e-05, "loss": 0.0002, "step": 13544 }, { "epoch": 57.9, "learning_rate": 9.522995798997754e-05, "loss": 0.0003, "step": 13548 }, { "epoch": 57.91, "learning_rate": 9.52249889595368e-05, "loss": 0.0023, "step": 13552 }, { "epoch": 57.93, "learning_rate": 9.522001747206186e-05, "loss": 0.0001, "step": 13556 }, { "epoch": 57.95, "learning_rate": 9.521504352782287e-05, "loss": 0.0009, "step": 13560 }, { "epoch": 57.97, "learning_rate": 9.521006712709001e-05, "loss": 0.0022, "step": 13564 }, { "epoch": 57.98, "learning_rate": 9.520508827013366e-05, "loss": 0.0015, "step": 13568 }, { "epoch": 58.0, "learning_rate": 9.520010695722432e-05, "loss": 0.0005, "step": 13572 }, { "epoch": 58.02, "learning_rate": 9.519512318863263e-05, "loss": 0.0001, "step": 13576 }, { "epoch": 58.03, "learning_rate": 9.519013696462934e-05, "loss": 0.0006, "step": 13580 }, { "epoch": 58.05, "learning_rate": 9.518514828548535e-05, "loss": 0.0002, "step": 13584 }, { "epoch": 58.07, "learning_rate": 9.518015715147168e-05, "loss": 0.0044, "step": 13588 }, { "epoch": 58.09, "learning_rate": 9.517516356285951e-05, "loss": 0.0003, "step": 13592 }, { "epoch": 58.1, "learning_rate": 9.517016751992013e-05, "loss": 0.0006, "step": 13596 }, { "epoch": 58.12, "learning_rate": 9.516516902292498e-05, "loss": 0.0052, "step": 13600 }, { "epoch": 58.14, "learning_rate": 9.51601680721456e-05, "loss": 0.0003, "step": 13604 }, { "epoch": 58.15, "learning_rate": 9.515516466785373e-05, "loss": 0.0003, "step": 13608 }, { "epoch": 58.17, "learning_rate": 9.515015881032116e-05, "loss": 0.0005, "step": 13612 }, { "epoch": 58.19, "learning_rate": 9.514515049981987e-05, "loss": 0.0002, "step": 13616 }, { "epoch": 58.21, "learning_rate": 9.514013973662195e-05, "loss": 0.0002, "step": 13620 }, { "epoch": 58.22, "learning_rate": 9.513512652099963e-05, "loss": 0.0006, "step": 13624 }, { "epoch": 58.24, "learning_rate": 9.51301108532253e-05, "loss": 0.0015, "step": 13628 }, { "epoch": 58.26, "learning_rate": 9.512509273357141e-05, "loss": 0.0006, "step": 13632 }, { "epoch": 58.27, "learning_rate": 9.512007216231063e-05, "loss": 0.0003, "step": 13636 }, { "epoch": 58.29, "learning_rate": 9.511504913971572e-05, "loss": 0.0006, "step": 13640 }, { "epoch": 58.31, "learning_rate": 9.511002366605954e-05, "loss": 0.0003, "step": 13644 }, { "epoch": 58.32, "learning_rate": 9.510499574161517e-05, "loss": 0.0002, "step": 13648 }, { "epoch": 58.34, "learning_rate": 9.509996536665573e-05, "loss": 0.0019, "step": 13652 }, { "epoch": 58.36, "learning_rate": 9.509493254145455e-05, "loss": 0.0013, "step": 13656 }, { "epoch": 58.38, "learning_rate": 9.508989726628503e-05, "loss": 0.0002, "step": 13660 }, { "epoch": 58.39, "learning_rate": 9.508485954142076e-05, "loss": 0.0001, "step": 13664 }, { "epoch": 58.41, "learning_rate": 9.507981936713541e-05, "loss": 0.0002, "step": 13668 }, { "epoch": 58.43, "learning_rate": 9.507477674370282e-05, "loss": 0.0004, "step": 13672 }, { "epoch": 58.44, "learning_rate": 9.506973167139694e-05, "loss": 0.0002, "step": 13676 }, { "epoch": 58.46, "learning_rate": 9.50646841504919e-05, "loss": 0.0006, "step": 13680 }, { "epoch": 58.48, "learning_rate": 9.505963418126188e-05, "loss": 0.0001, "step": 13684 }, { "epoch": 58.5, "learning_rate": 9.505458176398128e-05, "loss": 0.0004, "step": 13688 }, { "epoch": 58.51, "learning_rate": 9.504952689892456e-05, "loss": 0.0009, "step": 13692 }, { "epoch": 58.53, "learning_rate": 9.504446958636635e-05, "loss": 0.0003, "step": 13696 }, { "epoch": 58.55, "learning_rate": 9.503940982658145e-05, "loss": 0.0006, "step": 13700 }, { "epoch": 58.56, "learning_rate": 9.50343476198447e-05, "loss": 0.0001, "step": 13704 }, { "epoch": 58.58, "learning_rate": 9.502928296643117e-05, "loss": 0.0003, "step": 13708 }, { "epoch": 58.6, "learning_rate": 9.502421586661598e-05, "loss": 0.0004, "step": 13712 }, { "epoch": 58.62, "learning_rate": 9.501914632067446e-05, "loss": 0.0009, "step": 13716 }, { "epoch": 58.63, "learning_rate": 9.501407432888199e-05, "loss": 0.0002, "step": 13720 }, { "epoch": 58.65, "learning_rate": 9.500899989151416e-05, "loss": 0.0003, "step": 13724 }, { "epoch": 58.67, "learning_rate": 9.500392300884665e-05, "loss": 0.0002, "step": 13728 }, { "epoch": 58.68, "learning_rate": 9.499884368115528e-05, "loss": 0.0002, "step": 13732 }, { "epoch": 58.7, "learning_rate": 9.4993761908716e-05, "loss": 0.0002, "step": 13736 }, { "epoch": 58.72, "learning_rate": 9.498867769180491e-05, "loss": 0.0001, "step": 13740 }, { "epoch": 58.74, "learning_rate": 9.498359103069823e-05, "loss": 0.0001, "step": 13744 }, { "epoch": 58.75, "learning_rate": 9.49785019256723e-05, "loss": 0.0005, "step": 13748 }, { "epoch": 58.77, "learning_rate": 9.497341037700363e-05, "loss": 0.0006, "step": 13752 }, { "epoch": 58.79, "learning_rate": 9.496831638496883e-05, "loss": 0.0017, "step": 13756 }, { "epoch": 58.8, "learning_rate": 9.496321994984463e-05, "loss": 0.0002, "step": 13760 }, { "epoch": 58.82, "learning_rate": 9.495812107190795e-05, "loss": 0.0007, "step": 13764 }, { "epoch": 58.84, "learning_rate": 9.495301975143578e-05, "loss": 0.0004, "step": 13768 }, { "epoch": 58.85, "learning_rate": 9.494791598870528e-05, "loss": 0.0016, "step": 13772 }, { "epoch": 58.87, "learning_rate": 9.494280978399374e-05, "loss": 0.0001, "step": 13776 }, { "epoch": 58.89, "learning_rate": 9.493770113757855e-05, "loss": 0.0004, "step": 13780 }, { "epoch": 58.91, "learning_rate": 9.493259004973732e-05, "loss": 0.0002, "step": 13784 }, { "epoch": 58.92, "learning_rate": 9.492747652074766e-05, "loss": 0.0003, "step": 13788 }, { "epoch": 58.94, "learning_rate": 9.492236055088741e-05, "loss": 0.0019, "step": 13792 }, { "epoch": 58.96, "learning_rate": 9.491724214043453e-05, "loss": 0.0003, "step": 13796 }, { "epoch": 58.97, "learning_rate": 9.491212128966709e-05, "loss": 0.0005, "step": 13800 }, { "epoch": 58.99, "learning_rate": 9.49069979988633e-05, "loss": 0.0002, "step": 13804 }, { "epoch": 59.01, "learning_rate": 9.490187226830148e-05, "loss": 0.0001, "step": 13808 }, { "epoch": 59.03, "learning_rate": 9.489674409826015e-05, "loss": 0.0018, "step": 13812 }, { "epoch": 59.04, "learning_rate": 9.48916134890179e-05, "loss": 0.0022, "step": 13816 }, { "epoch": 59.06, "learning_rate": 9.488648044085345e-05, "loss": 0.0004, "step": 13820 }, { "epoch": 59.08, "learning_rate": 9.488134495404572e-05, "loss": 0.0002, "step": 13824 }, { "epoch": 59.09, "learning_rate": 9.487620702887368e-05, "loss": 0.0005, "step": 13828 }, { "epoch": 59.11, "learning_rate": 9.487106666561647e-05, "loss": 0.0006, "step": 13832 }, { "epoch": 59.13, "learning_rate": 9.486592386455338e-05, "loss": 0.0002, "step": 13836 }, { "epoch": 59.15, "learning_rate": 9.486077862596379e-05, "loss": 0.0003, "step": 13840 }, { "epoch": 59.16, "learning_rate": 9.485563095012727e-05, "loss": 0.0006, "step": 13844 }, { "epoch": 59.18, "learning_rate": 9.485048083732346e-05, "loss": 0.0017, "step": 13848 }, { "epoch": 59.2, "learning_rate": 9.484532828783217e-05, "loss": 0.0001, "step": 13852 }, { "epoch": 59.21, "learning_rate": 9.484017330193333e-05, "loss": 0.0005, "step": 13856 }, { "epoch": 59.23, "learning_rate": 9.483501587990703e-05, "loss": 0.0004, "step": 13860 }, { "epoch": 59.25, "learning_rate": 9.482985602203343e-05, "loss": 0.0009, "step": 13864 }, { "epoch": 59.26, "learning_rate": 9.482469372859288e-05, "loss": 0.0009, "step": 13868 }, { "epoch": 59.28, "learning_rate": 9.481952899986586e-05, "loss": 0.0015, "step": 13872 }, { "epoch": 59.3, "learning_rate": 9.481436183613293e-05, "loss": 0.0009, "step": 13876 }, { "epoch": 59.32, "learning_rate": 9.480919223767484e-05, "loss": 0.0002, "step": 13880 }, { "epoch": 59.33, "learning_rate": 9.480402020477243e-05, "loss": 0.0015, "step": 13884 }, { "epoch": 59.35, "learning_rate": 9.479884573770674e-05, "loss": 0.0029, "step": 13888 }, { "epoch": 59.37, "learning_rate": 9.479366883675884e-05, "loss": 0.0001, "step": 13892 }, { "epoch": 59.38, "learning_rate": 9.478848950221e-05, "loss": 0.0019, "step": 13896 }, { "epoch": 59.4, "learning_rate": 9.478330773434163e-05, "loss": 0.0006, "step": 13900 }, { "epoch": 59.42, "learning_rate": 9.477812353343522e-05, "loss": 0.0002, "step": 13904 }, { "epoch": 59.44, "learning_rate": 9.477293689977245e-05, "loss": 0.0003, "step": 13908 }, { "epoch": 59.45, "learning_rate": 9.476774783363511e-05, "loss": 0.0022, "step": 13912 }, { "epoch": 59.47, "learning_rate": 9.476255633530508e-05, "loss": 0.0003, "step": 13916 }, { "epoch": 59.49, "learning_rate": 9.475736240506444e-05, "loss": 0.0001, "step": 13920 }, { "epoch": 59.5, "learning_rate": 9.475216604319536e-05, "loss": 0.0004, "step": 13924 }, { "epoch": 59.52, "learning_rate": 9.474696724998017e-05, "loss": 0.0004, "step": 13928 }, { "epoch": 59.54, "learning_rate": 9.47417660257013e-05, "loss": 0.0003, "step": 13932 }, { "epoch": 59.56, "learning_rate": 9.473656237064133e-05, "loss": 0.001, "step": 13936 }, { "epoch": 59.57, "learning_rate": 9.473135628508296e-05, "loss": 0.0003, "step": 13940 }, { "epoch": 59.59, "learning_rate": 9.472614776930906e-05, "loss": 0.0003, "step": 13944 }, { "epoch": 59.61, "learning_rate": 9.47209368236026e-05, "loss": 0.0006, "step": 13948 }, { "epoch": 59.62, "learning_rate": 9.471572344824664e-05, "loss": 0.0011, "step": 13952 }, { "epoch": 59.64, "learning_rate": 9.471050764352447e-05, "loss": 0.0007, "step": 13956 }, { "epoch": 59.66, "learning_rate": 9.470528940971944e-05, "loss": 0.0001, "step": 13960 }, { "epoch": 59.68, "learning_rate": 9.470006874711505e-05, "loss": 0.0019, "step": 13964 }, { "epoch": 59.69, "learning_rate": 9.469484565599495e-05, "loss": 0.0002, "step": 13968 }, { "epoch": 59.71, "learning_rate": 9.468962013664288e-05, "loss": 0.0001, "step": 13972 }, { "epoch": 59.73, "learning_rate": 9.468439218934276e-05, "loss": 0.0005, "step": 13976 }, { "epoch": 59.74, "learning_rate": 9.467916181437862e-05, "loss": 0.0017, "step": 13980 }, { "epoch": 59.76, "learning_rate": 9.46739290120346e-05, "loss": 0.0001, "step": 13984 }, { "epoch": 59.78, "learning_rate": 9.466869378259502e-05, "loss": 0.0001, "step": 13988 }, { "epoch": 59.79, "learning_rate": 9.466345612634428e-05, "loss": 0.0012, "step": 13992 }, { "epoch": 59.81, "learning_rate": 9.465821604356696e-05, "loss": 0.0004, "step": 13996 }, { "epoch": 59.83, "learning_rate": 9.465297353454774e-05, "loss": 0.0006, "step": 14000 }, { "epoch": 59.83, "eval_exact_match": 0.524948024948025, "eval_loss": 0.8439415097236633, "eval_runtime": 156.3849, "eval_samples_per_second": 6.151, "step": 14000 }, { "epoch": 59.85, "learning_rate": 9.464772859957143e-05, "loss": 0.0005, "step": 14004 }, { "epoch": 59.86, "learning_rate": 9.4642481238923e-05, "loss": 0.0002, "step": 14008 }, { "epoch": 59.88, "learning_rate": 9.463723145288752e-05, "loss": 0.0002, "step": 14012 }, { "epoch": 59.9, "learning_rate": 9.463197924175022e-05, "loss": 0.0005, "step": 14016 }, { "epoch": 59.91, "learning_rate": 9.462672460579645e-05, "loss": 0.0009, "step": 14020 }, { "epoch": 59.93, "learning_rate": 9.462146754531167e-05, "loss": 0.0002, "step": 14024 }, { "epoch": 59.95, "learning_rate": 9.461620806058152e-05, "loss": 0.001, "step": 14028 }, { "epoch": 59.97, "learning_rate": 9.46109461518917e-05, "loss": 0.0003, "step": 14032 }, { "epoch": 59.98, "learning_rate": 9.460568181952813e-05, "loss": 0.0004, "step": 14036 }, { "epoch": 60.0, "learning_rate": 9.460041506377679e-05, "loss": 0.0001, "step": 14040 }, { "epoch": 60.02, "learning_rate": 9.459514588492382e-05, "loss": 0.0002, "step": 14044 }, { "epoch": 60.03, "learning_rate": 9.45898742832555e-05, "loss": 0.0001, "step": 14048 }, { "epoch": 60.05, "learning_rate": 9.458460025905824e-05, "loss": 0.0016, "step": 14052 }, { "epoch": 60.07, "learning_rate": 9.457932381261855e-05, "loss": 0.0012, "step": 14056 }, { "epoch": 60.09, "learning_rate": 9.45740449442231e-05, "loss": 0.0002, "step": 14060 }, { "epoch": 60.1, "learning_rate": 9.456876365415871e-05, "loss": 0.0013, "step": 14064 }, { "epoch": 60.12, "learning_rate": 9.456347994271229e-05, "loss": 0.0001, "step": 14068 }, { "epoch": 60.14, "learning_rate": 9.455819381017089e-05, "loss": 0.0002, "step": 14072 }, { "epoch": 60.15, "learning_rate": 9.45529052568217e-05, "loss": 0.001, "step": 14076 }, { "epoch": 60.17, "learning_rate": 9.454761428295206e-05, "loss": 0.0001, "step": 14080 }, { "epoch": 60.19, "learning_rate": 9.454232088884942e-05, "loss": 0.0002, "step": 14084 }, { "epoch": 60.21, "learning_rate": 9.453702507480136e-05, "loss": 0.0003, "step": 14088 }, { "epoch": 60.22, "learning_rate": 9.453172684109559e-05, "loss": 0.0002, "step": 14092 }, { "epoch": 60.24, "learning_rate": 9.452642618801997e-05, "loss": 0.0007, "step": 14096 }, { "epoch": 60.26, "learning_rate": 9.452112311586247e-05, "loss": 0.001, "step": 14100 }, { "epoch": 60.27, "learning_rate": 9.451581762491124e-05, "loss": 0.0013, "step": 14104 }, { "epoch": 60.29, "learning_rate": 9.451050971545447e-05, "loss": 0.0014, "step": 14108 }, { "epoch": 60.31, "learning_rate": 9.450519938778056e-05, "loss": 0.0011, "step": 14112 }, { "epoch": 60.32, "learning_rate": 9.449988664217799e-05, "loss": 0.0006, "step": 14116 }, { "epoch": 60.34, "learning_rate": 9.449457147893543e-05, "loss": 0.001, "step": 14120 }, { "epoch": 60.36, "learning_rate": 9.448925389834165e-05, "loss": 0.0007, "step": 14124 }, { "epoch": 60.38, "learning_rate": 9.448393390068553e-05, "loss": 0.001, "step": 14128 }, { "epoch": 60.39, "learning_rate": 9.447861148625609e-05, "loss": 0.0005, "step": 14132 }, { "epoch": 60.41, "learning_rate": 9.447328665534253e-05, "loss": 0.0002, "step": 14136 }, { "epoch": 60.43, "learning_rate": 9.446795940823411e-05, "loss": 0.002, "step": 14140 }, { "epoch": 60.44, "learning_rate": 9.446262974522026e-05, "loss": 0.0011, "step": 14144 }, { "epoch": 60.46, "learning_rate": 9.445729766659055e-05, "loss": 0.0004, "step": 14148 }, { "epoch": 60.48, "learning_rate": 9.445196317263467e-05, "loss": 0.0003, "step": 14152 }, { "epoch": 60.5, "learning_rate": 9.44466262636424e-05, "loss": 0.0003, "step": 14156 }, { "epoch": 60.51, "learning_rate": 9.444128693990375e-05, "loss": 0.0001, "step": 14160 }, { "epoch": 60.53, "learning_rate": 9.443594520170876e-05, "loss": 0.0003, "step": 14164 }, { "epoch": 60.55, "learning_rate": 9.443060104934762e-05, "loss": 0.0013, "step": 14168 }, { "epoch": 60.56, "learning_rate": 9.442525448311072e-05, "loss": 0.0007, "step": 14172 }, { "epoch": 60.58, "learning_rate": 9.441990550328854e-05, "loss": 0.0001, "step": 14176 }, { "epoch": 60.6, "learning_rate": 9.441455411017164e-05, "loss": 0.0003, "step": 14180 }, { "epoch": 60.62, "learning_rate": 9.440920030405078e-05, "loss": 0.0002, "step": 14184 }, { "epoch": 60.63, "learning_rate": 9.440384408521684e-05, "loss": 0.0001, "step": 14188 }, { "epoch": 60.65, "learning_rate": 9.439848545396079e-05, "loss": 0.0005, "step": 14192 }, { "epoch": 60.67, "learning_rate": 9.439312441057379e-05, "loss": 0.0001, "step": 14196 }, { "epoch": 60.68, "learning_rate": 9.438776095534708e-05, "loss": 0.0004, "step": 14200 }, { "epoch": 60.7, "learning_rate": 9.438239508857207e-05, "loss": 0.0016, "step": 14204 }, { "epoch": 60.72, "learning_rate": 9.437702681054025e-05, "loss": 0.0005, "step": 14208 }, { "epoch": 60.74, "learning_rate": 9.437165612154331e-05, "loss": 0.0004, "step": 14212 }, { "epoch": 60.75, "learning_rate": 9.4366283021873e-05, "loss": 0.001, "step": 14216 }, { "epoch": 60.77, "learning_rate": 9.436090751182128e-05, "loss": 0.0016, "step": 14220 }, { "epoch": 60.79, "learning_rate": 9.435552959168016e-05, "loss": 0.0014, "step": 14224 }, { "epoch": 60.8, "learning_rate": 9.435014926174183e-05, "loss": 0.0003, "step": 14228 }, { "epoch": 60.82, "learning_rate": 9.43447665222986e-05, "loss": 0.0003, "step": 14232 }, { "epoch": 60.84, "learning_rate": 9.43393813736429e-05, "loss": 0.0004, "step": 14236 }, { "epoch": 60.85, "learning_rate": 9.433399381606733e-05, "loss": 0.0003, "step": 14240 }, { "epoch": 60.87, "learning_rate": 9.432860384986456e-05, "loss": 0.0008, "step": 14244 }, { "epoch": 60.89, "learning_rate": 9.432321147532743e-05, "loss": 0.0015, "step": 14248 }, { "epoch": 60.91, "learning_rate": 9.431781669274891e-05, "loss": 0.0003, "step": 14252 }, { "epoch": 60.92, "learning_rate": 9.431241950242207e-05, "loss": 0.0002, "step": 14256 }, { "epoch": 60.94, "learning_rate": 9.430701990464017e-05, "loss": 0.0003, "step": 14260 }, { "epoch": 60.96, "learning_rate": 9.430161789969654e-05, "loss": 0.0011, "step": 14264 }, { "epoch": 60.97, "learning_rate": 9.429621348788468e-05, "loss": 0.0001, "step": 14268 }, { "epoch": 60.99, "learning_rate": 9.429080666949821e-05, "loss": 0.0009, "step": 14272 }, { "epoch": 61.01, "learning_rate": 9.428539744483085e-05, "loss": 0.0005, "step": 14276 }, { "epoch": 61.03, "learning_rate": 9.427998581417652e-05, "loss": 0.001, "step": 14280 }, { "epoch": 61.04, "learning_rate": 9.42745717778292e-05, "loss": 0.0003, "step": 14284 }, { "epoch": 61.06, "learning_rate": 9.426915533608304e-05, "loss": 0.0011, "step": 14288 }, { "epoch": 61.08, "learning_rate": 9.42637364892323e-05, "loss": 0.0006, "step": 14292 }, { "epoch": 61.09, "learning_rate": 9.425831523757139e-05, "loss": 0.0002, "step": 14296 }, { "epoch": 61.11, "learning_rate": 9.425289158139486e-05, "loss": 0.0001, "step": 14300 }, { "epoch": 61.13, "learning_rate": 9.424746552099733e-05, "loss": 0.0002, "step": 14304 }, { "epoch": 61.15, "learning_rate": 9.424203705667363e-05, "loss": 0.0003, "step": 14308 }, { "epoch": 61.16, "learning_rate": 9.423660618871866e-05, "loss": 0.0001, "step": 14312 }, { "epoch": 61.18, "learning_rate": 9.423117291742749e-05, "loss": 0.0013, "step": 14316 }, { "epoch": 61.2, "learning_rate": 9.42257372430953e-05, "loss": 0.0006, "step": 14320 }, { "epoch": 61.21, "learning_rate": 9.42202991660174e-05, "loss": 0.0003, "step": 14324 }, { "epoch": 61.23, "learning_rate": 9.421485868648925e-05, "loss": 0.0002, "step": 14328 }, { "epoch": 61.25, "learning_rate": 9.420941580480642e-05, "loss": 0.0009, "step": 14332 }, { "epoch": 61.26, "learning_rate": 9.42039705212646e-05, "loss": 0.0004, "step": 14336 }, { "epoch": 61.28, "learning_rate": 9.419852283615964e-05, "loss": 0.0001, "step": 14340 }, { "epoch": 61.3, "learning_rate": 9.419307274978753e-05, "loss": 0.0004, "step": 14344 }, { "epoch": 61.32, "learning_rate": 9.418762026244434e-05, "loss": 0.0002, "step": 14348 }, { "epoch": 61.33, "learning_rate": 9.41821653744263e-05, "loss": 0.0008, "step": 14352 }, { "epoch": 61.35, "learning_rate": 9.417670808602978e-05, "loss": 0.0034, "step": 14356 }, { "epoch": 61.37, "learning_rate": 9.417124839755127e-05, "loss": 0.0001, "step": 14360 }, { "epoch": 61.38, "learning_rate": 9.416578630928738e-05, "loss": 0.0018, "step": 14364 }, { "epoch": 61.4, "learning_rate": 9.416032182153488e-05, "loss": 0.0002, "step": 14368 }, { "epoch": 61.42, "learning_rate": 9.415485493459063e-05, "loss": 0.0002, "step": 14372 }, { "epoch": 61.44, "learning_rate": 9.414938564875164e-05, "loss": 0.001, "step": 14376 }, { "epoch": 61.45, "learning_rate": 9.414391396431507e-05, "loss": 0.0015, "step": 14380 }, { "epoch": 61.47, "learning_rate": 9.41384398815782e-05, "loss": 0.0003, "step": 14384 }, { "epoch": 61.49, "learning_rate": 9.41329634008384e-05, "loss": 0.0021, "step": 14388 }, { "epoch": 61.5, "learning_rate": 9.412748452239321e-05, "loss": 0.0008, "step": 14392 }, { "epoch": 61.52, "learning_rate": 9.41220032465403e-05, "loss": 0.0006, "step": 14396 }, { "epoch": 61.54, "learning_rate": 9.411651957357746e-05, "loss": 0.0003, "step": 14400 }, { "epoch": 61.56, "learning_rate": 9.411103350380263e-05, "loss": 0.0031, "step": 14404 }, { "epoch": 61.57, "learning_rate": 9.410554503751383e-05, "loss": 0.0003, "step": 14408 }, { "epoch": 61.59, "learning_rate": 9.410005417500928e-05, "loss": 0.0004, "step": 14412 }, { "epoch": 61.61, "learning_rate": 9.409456091658726e-05, "loss": 0.0019, "step": 14416 }, { "epoch": 61.62, "learning_rate": 9.408906526254624e-05, "loss": 0.0001, "step": 14420 }, { "epoch": 61.64, "learning_rate": 9.408356721318479e-05, "loss": 0.0002, "step": 14424 }, { "epoch": 61.66, "learning_rate": 9.40780667688016e-05, "loss": 0.0003, "step": 14428 }, { "epoch": 61.68, "learning_rate": 9.407256392969552e-05, "loss": 0.0002, "step": 14432 }, { "epoch": 61.69, "learning_rate": 9.40670586961655e-05, "loss": 0.0013, "step": 14436 }, { "epoch": 61.71, "learning_rate": 9.406155106851064e-05, "loss": 0.0003, "step": 14440 }, { "epoch": 61.73, "learning_rate": 9.405604104703019e-05, "loss": 0.0003, "step": 14444 }, { "epoch": 61.74, "learning_rate": 9.405052863202346e-05, "loss": 0.0003, "step": 14448 }, { "epoch": 61.76, "learning_rate": 9.404501382378996e-05, "loss": 0.0004, "step": 14452 }, { "epoch": 61.78, "learning_rate": 9.40394966226293e-05, "loss": 0.0019, "step": 14456 }, { "epoch": 61.79, "learning_rate": 9.403397702884123e-05, "loss": 0.0004, "step": 14460 }, { "epoch": 61.81, "learning_rate": 9.40284550427256e-05, "loss": 0.0008, "step": 14464 }, { "epoch": 61.83, "learning_rate": 9.402293066458246e-05, "loss": 0.0001, "step": 14468 }, { "epoch": 61.85, "learning_rate": 9.401740389471191e-05, "loss": 0.0005, "step": 14472 }, { "epoch": 61.86, "learning_rate": 9.401187473341422e-05, "loss": 0.0002, "step": 14476 }, { "epoch": 61.88, "learning_rate": 9.400634318098979e-05, "loss": 0.0002, "step": 14480 }, { "epoch": 61.9, "learning_rate": 9.400080923773915e-05, "loss": 0.0005, "step": 14484 }, { "epoch": 61.91, "learning_rate": 9.399527290396293e-05, "loss": 0.0002, "step": 14488 }, { "epoch": 61.93, "learning_rate": 9.398973417996195e-05, "loss": 0.0007, "step": 14492 }, { "epoch": 61.95, "learning_rate": 9.39841930660371e-05, "loss": 0.0002, "step": 14496 }, { "epoch": 61.97, "learning_rate": 9.397864956248942e-05, "loss": 0.001, "step": 14500 }, { "epoch": 61.98, "learning_rate": 9.39731036696201e-05, "loss": 0.0002, "step": 14504 }, { "epoch": 62.0, "learning_rate": 9.396755538773045e-05, "loss": 0.0014, "step": 14508 }, { "epoch": 62.02, "learning_rate": 9.396200471712188e-05, "loss": 0.0001, "step": 14512 }, { "epoch": 62.03, "learning_rate": 9.395645165809597e-05, "loss": 0.0004, "step": 14516 }, { "epoch": 62.05, "learning_rate": 9.395089621095439e-05, "loss": 0.0003, "step": 14520 }, { "epoch": 62.07, "learning_rate": 9.3945338375999e-05, "loss": 0.0008, "step": 14524 }, { "epoch": 62.09, "learning_rate": 9.393977815353169e-05, "loss": 0.0001, "step": 14528 }, { "epoch": 62.1, "learning_rate": 9.393421554385461e-05, "loss": 0.0003, "step": 14532 }, { "epoch": 62.12, "learning_rate": 9.392865054726996e-05, "loss": 0.0004, "step": 14536 }, { "epoch": 62.14, "learning_rate": 9.392308316408005e-05, "loss": 0.0001, "step": 14540 }, { "epoch": 62.15, "learning_rate": 9.391751339458737e-05, "loss": 0.0014, "step": 14544 }, { "epoch": 62.17, "learning_rate": 9.391194123909451e-05, "loss": 0.0002, "step": 14548 }, { "epoch": 62.19, "learning_rate": 9.390636669790422e-05, "loss": 0.0001, "step": 14552 }, { "epoch": 62.21, "learning_rate": 9.390078977131933e-05, "loss": 0.0005, "step": 14556 }, { "epoch": 62.22, "learning_rate": 9.389521045964287e-05, "loss": 0.0005, "step": 14560 }, { "epoch": 62.24, "learning_rate": 9.388962876317793e-05, "loss": 0.0003, "step": 14564 }, { "epoch": 62.26, "learning_rate": 9.388404468222775e-05, "loss": 0.0002, "step": 14568 }, { "epoch": 62.27, "learning_rate": 9.387845821709573e-05, "loss": 0.0003, "step": 14572 }, { "epoch": 62.29, "learning_rate": 9.387286936808539e-05, "loss": 0.0001, "step": 14576 }, { "epoch": 62.31, "learning_rate": 9.386727813550034e-05, "loss": 0.001, "step": 14580 }, { "epoch": 62.32, "learning_rate": 9.386168451964434e-05, "loss": 0.0002, "step": 14584 }, { "epoch": 62.34, "learning_rate": 9.38560885208213e-05, "loss": 0.0004, "step": 14588 }, { "epoch": 62.36, "learning_rate": 9.385049013933526e-05, "loss": 0.0005, "step": 14592 }, { "epoch": 62.38, "learning_rate": 9.384488937549036e-05, "loss": 0.0004, "step": 14596 }, { "epoch": 62.39, "learning_rate": 9.383928622959091e-05, "loss": 0.0003, "step": 14600 }, { "epoch": 62.41, "learning_rate": 9.383368070194127e-05, "loss": 0.0001, "step": 14604 }, { "epoch": 62.43, "learning_rate": 9.382807279284602e-05, "loss": 0.0005, "step": 14608 }, { "epoch": 62.44, "learning_rate": 9.382246250260984e-05, "loss": 0.0026, "step": 14612 }, { "epoch": 62.46, "learning_rate": 9.381684983153751e-05, "loss": 0.0001, "step": 14616 }, { "epoch": 62.48, "learning_rate": 9.381123477993398e-05, "loss": 0.0002, "step": 14620 }, { "epoch": 62.5, "learning_rate": 9.380561734810429e-05, "loss": 0.0002, "step": 14624 }, { "epoch": 62.51, "learning_rate": 9.379999753635365e-05, "loss": 0.0001, "step": 14628 }, { "epoch": 62.53, "learning_rate": 9.379437534498737e-05, "loss": 0.0002, "step": 14632 }, { "epoch": 62.55, "learning_rate": 9.37887507743109e-05, "loss": 0.0002, "step": 14636 }, { "epoch": 62.56, "learning_rate": 9.378312382462981e-05, "loss": 0.0022, "step": 14640 }, { "epoch": 62.58, "learning_rate": 9.377749449624984e-05, "loss": 0.0001, "step": 14644 }, { "epoch": 62.6, "learning_rate": 9.377186278947678e-05, "loss": 0.002, "step": 14648 }, { "epoch": 62.62, "learning_rate": 9.376622870461663e-05, "loss": 0.0005, "step": 14652 }, { "epoch": 62.63, "learning_rate": 9.376059224197547e-05, "loss": 0.0003, "step": 14656 }, { "epoch": 62.65, "learning_rate": 9.375495340185952e-05, "loss": 0.0012, "step": 14660 }, { "epoch": 62.67, "learning_rate": 9.374931218457515e-05, "loss": 0.0001, "step": 14664 }, { "epoch": 62.68, "learning_rate": 9.374366859042883e-05, "loss": 0.0001, "step": 14668 }, { "epoch": 62.7, "learning_rate": 9.373802261972717e-05, "loss": 0.0002, "step": 14672 }, { "epoch": 62.72, "learning_rate": 9.373237427277692e-05, "loss": 0.0001, "step": 14676 }, { "epoch": 62.74, "learning_rate": 9.372672354988494e-05, "loss": 0.0003, "step": 14680 }, { "epoch": 62.75, "learning_rate": 9.372107045135823e-05, "loss": 0.0001, "step": 14684 }, { "epoch": 62.77, "learning_rate": 9.371541497750393e-05, "loss": 0.0016, "step": 14688 }, { "epoch": 62.79, "learning_rate": 9.370975712862929e-05, "loss": 0.0001, "step": 14692 }, { "epoch": 62.8, "learning_rate": 9.370409690504169e-05, "loss": 0.0005, "step": 14696 }, { "epoch": 62.82, "learning_rate": 9.369843430704864e-05, "loss": 0.0001, "step": 14700 }, { "epoch": 62.84, "learning_rate": 9.369276933495781e-05, "loss": 0.0009, "step": 14704 }, { "epoch": 62.85, "learning_rate": 9.368710198907695e-05, "loss": 0.0001, "step": 14708 }, { "epoch": 62.87, "learning_rate": 9.368143226971395e-05, "loss": 0.0001, "step": 14712 }, { "epoch": 62.89, "learning_rate": 9.367576017717689e-05, "loss": 0.0002, "step": 14716 }, { "epoch": 62.91, "learning_rate": 9.367008571177388e-05, "loss": 0.0023, "step": 14720 }, { "epoch": 62.92, "learning_rate": 9.366440887381324e-05, "loss": 0.0011, "step": 14724 }, { "epoch": 62.94, "learning_rate": 9.365872966360336e-05, "loss": 0.0002, "step": 14728 }, { "epoch": 62.96, "learning_rate": 9.36530480814528e-05, "loss": 0.0006, "step": 14732 }, { "epoch": 62.97, "learning_rate": 9.364736412767026e-05, "loss": 0.0002, "step": 14736 }, { "epoch": 62.99, "learning_rate": 9.364167780256452e-05, "loss": 0.0006, "step": 14740 }, { "epoch": 63.01, "learning_rate": 9.363598910644449e-05, "loss": 0.0007, "step": 14744 }, { "epoch": 63.03, "learning_rate": 9.363029803961927e-05, "loss": 0.0003, "step": 14748 }, { "epoch": 63.04, "learning_rate": 9.362460460239804e-05, "loss": 0.0001, "step": 14752 }, { "epoch": 63.06, "learning_rate": 9.361890879509012e-05, "loss": 0.0001, "step": 14756 }, { "epoch": 63.08, "learning_rate": 9.361321061800494e-05, "loss": 0.0003, "step": 14760 }, { "epoch": 63.09, "learning_rate": 9.36075100714521e-05, "loss": 0.0003, "step": 14764 }, { "epoch": 63.11, "learning_rate": 9.36018071557413e-05, "loss": 0.0007, "step": 14768 }, { "epoch": 63.13, "learning_rate": 9.359610187118238e-05, "loss": 0.0001, "step": 14772 }, { "epoch": 63.15, "learning_rate": 9.359039421808529e-05, "loss": 0.0002, "step": 14776 }, { "epoch": 63.16, "learning_rate": 9.358468419676012e-05, "loss": 0.0013, "step": 14780 }, { "epoch": 63.18, "learning_rate": 9.357897180751709e-05, "loss": 0.0001, "step": 14784 }, { "epoch": 63.2, "learning_rate": 9.357325705066658e-05, "loss": 0.0001, "step": 14788 }, { "epoch": 63.21, "learning_rate": 9.356753992651903e-05, "loss": 0.0008, "step": 14792 }, { "epoch": 63.23, "learning_rate": 9.356182043538506e-05, "loss": 0.0001, "step": 14796 }, { "epoch": 63.25, "learning_rate": 9.355609857757541e-05, "loss": 0.0001, "step": 14800 }, { "epoch": 63.26, "learning_rate": 9.355037435340095e-05, "loss": 0.0002, "step": 14804 }, { "epoch": 63.28, "learning_rate": 9.354464776317265e-05, "loss": 0.0002, "step": 14808 }, { "epoch": 63.3, "learning_rate": 9.353891880720163e-05, "loss": 0.0002, "step": 14812 }, { "epoch": 63.32, "learning_rate": 9.353318748579916e-05, "loss": 0.0008, "step": 14816 }, { "epoch": 63.33, "learning_rate": 9.352745379927661e-05, "loss": 0.0008, "step": 14820 }, { "epoch": 63.35, "learning_rate": 9.35217177479455e-05, "loss": 0.0002, "step": 14824 }, { "epoch": 63.37, "learning_rate": 9.351597933211742e-05, "loss": 0.0002, "step": 14828 }, { "epoch": 63.38, "learning_rate": 9.35102385521042e-05, "loss": 0.0001, "step": 14832 }, { "epoch": 63.4, "learning_rate": 9.350449540821767e-05, "loss": 0.0002, "step": 14836 }, { "epoch": 63.42, "learning_rate": 9.349874990076988e-05, "loss": 0.0002, "step": 14840 }, { "epoch": 63.44, "learning_rate": 9.349300203007296e-05, "loss": 0.0021, "step": 14844 }, { "epoch": 63.45, "learning_rate": 9.348725179643922e-05, "loss": 0.0005, "step": 14848 }, { "epoch": 63.47, "learning_rate": 9.348149920018104e-05, "loss": 0.0001, "step": 14852 }, { "epoch": 63.49, "learning_rate": 9.347574424161093e-05, "loss": 0.0002, "step": 14856 }, { "epoch": 63.5, "learning_rate": 9.346998692104162e-05, "loss": 0.0009, "step": 14860 }, { "epoch": 63.52, "learning_rate": 9.346422723878585e-05, "loss": 0.0012, "step": 14864 }, { "epoch": 63.54, "learning_rate": 9.345846519515654e-05, "loss": 0.0001, "step": 14868 }, { "epoch": 63.56, "learning_rate": 9.345270079046675e-05, "loss": 0.001, "step": 14872 }, { "epoch": 63.57, "learning_rate": 9.344693402502964e-05, "loss": 0.0013, "step": 14876 }, { "epoch": 63.59, "learning_rate": 9.344116489915855e-05, "loss": 0.0005, "step": 14880 }, { "epoch": 63.61, "learning_rate": 9.343539341316687e-05, "loss": 0.0007, "step": 14884 }, { "epoch": 63.62, "learning_rate": 9.342961956736817e-05, "loss": 0.0009, "step": 14888 }, { "epoch": 63.64, "learning_rate": 9.342384336207614e-05, "loss": 0.0002, "step": 14892 }, { "epoch": 63.66, "learning_rate": 9.341806479760463e-05, "loss": 0.0014, "step": 14896 }, { "epoch": 63.68, "learning_rate": 9.341228387426752e-05, "loss": 0.0001, "step": 14900 }, { "epoch": 63.69, "learning_rate": 9.340650059237893e-05, "loss": 0.0002, "step": 14904 }, { "epoch": 63.71, "learning_rate": 9.340071495225305e-05, "loss": 0.0008, "step": 14908 }, { "epoch": 63.73, "learning_rate": 9.33949269542042e-05, "loss": 0.001, "step": 14912 }, { "epoch": 63.74, "learning_rate": 9.338913659854684e-05, "loss": 0.0001, "step": 14916 }, { "epoch": 63.76, "learning_rate": 9.338334388559557e-05, "loss": 0.0004, "step": 14920 }, { "epoch": 63.78, "learning_rate": 9.33775488156651e-05, "loss": 0.0003, "step": 14924 }, { "epoch": 63.79, "learning_rate": 9.337175138907024e-05, "loss": 0.0007, "step": 14928 }, { "epoch": 63.81, "learning_rate": 9.336595160612598e-05, "loss": 0.0004, "step": 14932 }, { "epoch": 63.83, "learning_rate": 9.336014946714743e-05, "loss": 0.0002, "step": 14936 }, { "epoch": 63.85, "learning_rate": 9.335434497244979e-05, "loss": 0.0003, "step": 14940 }, { "epoch": 63.86, "learning_rate": 9.334853812234845e-05, "loss": 0.0003, "step": 14944 }, { "epoch": 63.88, "learning_rate": 9.334272891715885e-05, "loss": 0.0005, "step": 14948 }, { "epoch": 63.9, "learning_rate": 9.333691735719662e-05, "loss": 0.0001, "step": 14952 }, { "epoch": 63.91, "learning_rate": 9.333110344277749e-05, "loss": 0.0002, "step": 14956 }, { "epoch": 63.93, "learning_rate": 9.332528717421734e-05, "loss": 0.001, "step": 14960 }, { "epoch": 63.95, "learning_rate": 9.331946855183215e-05, "loss": 0.0001, "step": 14964 }, { "epoch": 63.97, "learning_rate": 9.331364757593803e-05, "loss": 0.0008, "step": 14968 }, { "epoch": 63.98, "learning_rate": 9.330782424685125e-05, "loss": 0.0002, "step": 14972 }, { "epoch": 64.0, "learning_rate": 9.330199856488817e-05, "loss": 0.0005, "step": 14976 }, { "epoch": 64.02, "learning_rate": 9.329617053036531e-05, "loss": 0.0001, "step": 14980 }, { "epoch": 64.03, "learning_rate": 9.329034014359929e-05, "loss": 0.0002, "step": 14984 }, { "epoch": 64.05, "learning_rate": 9.328450740490687e-05, "loss": 0.0001, "step": 14988 }, { "epoch": 64.07, "learning_rate": 9.327867231460495e-05, "loss": 0.0004, "step": 14992 }, { "epoch": 64.09, "learning_rate": 9.327283487301052e-05, "loss": 0.0003, "step": 14996 }, { "epoch": 64.1, "learning_rate": 9.326699508044075e-05, "loss": 0.0028, "step": 15000 }, { "epoch": 64.1, "eval_exact_match": 0.501039501039501, "eval_loss": 0.8530089259147644, "eval_runtime": 139.8575, "eval_samples_per_second": 6.878, "step": 15000 }, { "epoch": 64.12, "learning_rate": 9.326115293721289e-05, "loss": 0.0002, "step": 15004 }, { "epoch": 64.14, "learning_rate": 9.325530844364437e-05, "loss": 0.0003, "step": 15008 }, { "epoch": 64.15, "learning_rate": 9.324946160005268e-05, "loss": 0.0002, "step": 15012 }, { "epoch": 64.17, "learning_rate": 9.324361240675548e-05, "loss": 0.0002, "step": 15016 }, { "epoch": 64.19, "learning_rate": 9.323776086407058e-05, "loss": 0.0002, "step": 15020 }, { "epoch": 64.21, "learning_rate": 9.323190697231586e-05, "loss": 0.0001, "step": 15024 }, { "epoch": 64.22, "learning_rate": 9.322605073180937e-05, "loss": 0.0003, "step": 15028 }, { "epoch": 64.24, "learning_rate": 9.322019214286926e-05, "loss": 0.0001, "step": 15032 }, { "epoch": 64.26, "learning_rate": 9.321433120581384e-05, "loss": 0.0007, "step": 15036 }, { "epoch": 64.27, "learning_rate": 9.320846792096152e-05, "loss": 0.0002, "step": 15040 }, { "epoch": 64.29, "learning_rate": 9.320260228863084e-05, "loss": 0.0002, "step": 15044 }, { "epoch": 64.31, "learning_rate": 9.31967343091405e-05, "loss": 0.0007, "step": 15048 }, { "epoch": 64.32, "learning_rate": 9.319086398280928e-05, "loss": 0.0001, "step": 15052 }, { "epoch": 64.34, "learning_rate": 9.318499130995613e-05, "loss": 0.0007, "step": 15056 }, { "epoch": 64.36, "learning_rate": 9.317911629090007e-05, "loss": 0.0001, "step": 15060 }, { "epoch": 64.38, "learning_rate": 9.317323892596033e-05, "loss": 0.0003, "step": 15064 }, { "epoch": 64.39, "learning_rate": 9.316735921545618e-05, "loss": 0.0001, "step": 15068 }, { "epoch": 64.41, "learning_rate": 9.316147715970709e-05, "loss": 0.0001, "step": 15072 }, { "epoch": 64.43, "learning_rate": 9.315559275903261e-05, "loss": 0.0005, "step": 15076 }, { "epoch": 64.44, "learning_rate": 9.314970601375245e-05, "loss": 0.0005, "step": 15080 }, { "epoch": 64.46, "learning_rate": 9.314381692418642e-05, "loss": 0.0001, "step": 15084 }, { "epoch": 64.48, "learning_rate": 9.313792549065446e-05, "loss": 0.0003, "step": 15088 }, { "epoch": 64.5, "learning_rate": 9.313203171347667e-05, "loss": 0.0002, "step": 15092 }, { "epoch": 64.51, "learning_rate": 9.312613559297325e-05, "loss": 0.002, "step": 15096 }, { "epoch": 64.53, "learning_rate": 9.312023712946452e-05, "loss": 0.0001, "step": 15100 }, { "epoch": 64.55, "learning_rate": 9.311433632327093e-05, "loss": 0.0001, "step": 15104 }, { "epoch": 64.56, "learning_rate": 9.310843317471308e-05, "loss": 0.0005, "step": 15108 }, { "epoch": 64.58, "learning_rate": 9.310252768411167e-05, "loss": 0.0017, "step": 15112 }, { "epoch": 64.6, "learning_rate": 9.309661985178756e-05, "loss": 0.0002, "step": 15116 }, { "epoch": 64.62, "learning_rate": 9.309070967806171e-05, "loss": 0.0001, "step": 15120 }, { "epoch": 64.63, "learning_rate": 9.308479716325522e-05, "loss": 0.0001, "step": 15124 }, { "epoch": 64.65, "learning_rate": 9.307888230768929e-05, "loss": 0.0001, "step": 15128 }, { "epoch": 64.67, "learning_rate": 9.307296511168528e-05, "loss": 0.0001, "step": 15132 }, { "epoch": 64.68, "learning_rate": 9.306704557556468e-05, "loss": 0.0014, "step": 15136 }, { "epoch": 64.7, "learning_rate": 9.306112369964908e-05, "loss": 0.0001, "step": 15140 }, { "epoch": 64.72, "learning_rate": 9.305519948426022e-05, "loss": 0.0001, "step": 15144 }, { "epoch": 64.74, "learning_rate": 9.304927292971994e-05, "loss": 0.0002, "step": 15148 }, { "epoch": 64.75, "learning_rate": 9.304334403635023e-05, "loss": 0.0003, "step": 15152 }, { "epoch": 64.77, "learning_rate": 9.303741280447322e-05, "loss": 0.0007, "step": 15156 }, { "epoch": 64.79, "learning_rate": 9.303147923441112e-05, "loss": 0.0001, "step": 15160 }, { "epoch": 64.8, "learning_rate": 9.302554332648633e-05, "loss": 0.0001, "step": 15164 }, { "epoch": 64.82, "learning_rate": 9.30196050810213e-05, "loss": 0.0003, "step": 15168 }, { "epoch": 64.84, "learning_rate": 9.301366449833868e-05, "loss": 0.0007, "step": 15172 }, { "epoch": 64.85, "learning_rate": 9.30077215787612e-05, "loss": 0.0002, "step": 15176 }, { "epoch": 64.87, "learning_rate": 9.300177632261176e-05, "loss": 0.0001, "step": 15180 }, { "epoch": 64.89, "learning_rate": 9.299582873021333e-05, "loss": 0.0021, "step": 15184 }, { "epoch": 64.91, "learning_rate": 9.298987880188907e-05, "loss": 0.0002, "step": 15188 }, { "epoch": 64.92, "learning_rate": 9.29839265379622e-05, "loss": 0.0003, "step": 15192 }, { "epoch": 64.94, "learning_rate": 9.297797193875612e-05, "loss": 0.0003, "step": 15196 }, { "epoch": 64.96, "learning_rate": 9.297201500459433e-05, "loss": 0.0001, "step": 15200 }, { "epoch": 64.97, "learning_rate": 9.296605573580049e-05, "loss": 0.0009, "step": 15204 }, { "epoch": 64.99, "learning_rate": 9.296009413269833e-05, "loss": 0.0003, "step": 15208 }, { "epoch": 65.01, "learning_rate": 9.295413019561174e-05, "loss": 0.0018, "step": 15212 }, { "epoch": 65.03, "learning_rate": 9.294816392486476e-05, "loss": 0.0001, "step": 15216 }, { "epoch": 65.04, "learning_rate": 9.294219532078151e-05, "loss": 0.0002, "step": 15220 }, { "epoch": 65.06, "learning_rate": 9.293622438368628e-05, "loss": 0.0002, "step": 15224 }, { "epoch": 65.08, "learning_rate": 9.293025111390343e-05, "loss": 0.0001, "step": 15228 }, { "epoch": 65.09, "learning_rate": 9.292427551175753e-05, "loss": 0.0001, "step": 15232 }, { "epoch": 65.11, "learning_rate": 9.291829757757319e-05, "loss": 0.0003, "step": 15236 }, { "epoch": 65.13, "learning_rate": 9.291231731167521e-05, "loss": 0.0008, "step": 15240 }, { "epoch": 65.15, "learning_rate": 9.290633471438847e-05, "loss": 0.0004, "step": 15244 }, { "epoch": 65.16, "learning_rate": 9.290034978603803e-05, "loss": 0.0008, "step": 15248 }, { "epoch": 65.18, "learning_rate": 9.289436252694902e-05, "loss": 0.0009, "step": 15252 }, { "epoch": 65.2, "learning_rate": 9.288837293744673e-05, "loss": 0.0004, "step": 15256 }, { "epoch": 65.21, "learning_rate": 9.288238101785658e-05, "loss": 0.0007, "step": 15260 }, { "epoch": 65.23, "learning_rate": 9.287638676850409e-05, "loss": 0.0002, "step": 15264 }, { "epoch": 65.25, "learning_rate": 9.287039018971493e-05, "loss": 0.0006, "step": 15268 }, { "epoch": 65.26, "learning_rate": 9.286439128181488e-05, "loss": 0.0001, "step": 15272 }, { "epoch": 65.28, "learning_rate": 9.285839004512986e-05, "loss": 0.0006, "step": 15276 }, { "epoch": 65.3, "learning_rate": 9.285238647998592e-05, "loss": 0.0001, "step": 15280 }, { "epoch": 65.32, "learning_rate": 9.284638058670924e-05, "loss": 0.0003, "step": 15284 }, { "epoch": 65.33, "learning_rate": 9.284037236562608e-05, "loss": 0.0002, "step": 15288 }, { "epoch": 65.35, "learning_rate": 9.283436181706288e-05, "loss": 0.0003, "step": 15292 }, { "epoch": 65.37, "learning_rate": 9.28283489413462e-05, "loss": 0.0007, "step": 15296 }, { "epoch": 65.38, "learning_rate": 9.28223337388027e-05, "loss": 0.0004, "step": 15300 }, { "epoch": 65.4, "learning_rate": 9.281631620975917e-05, "loss": 0.0001, "step": 15304 }, { "epoch": 65.42, "learning_rate": 9.281029635454256e-05, "loss": 0.0004, "step": 15308 }, { "epoch": 65.44, "learning_rate": 9.280427417347991e-05, "loss": 0.0004, "step": 15312 }, { "epoch": 65.45, "learning_rate": 9.27982496668984e-05, "loss": 0.0006, "step": 15316 }, { "epoch": 65.47, "learning_rate": 9.279222283512534e-05, "loss": 0.0002, "step": 15320 }, { "epoch": 65.49, "learning_rate": 9.278619367848818e-05, "loss": 0.0008, "step": 15324 }, { "epoch": 65.5, "learning_rate": 9.278016219731442e-05, "loss": 0.0005, "step": 15328 }, { "epoch": 65.52, "learning_rate": 9.277412839193183e-05, "loss": 0.0011, "step": 15332 }, { "epoch": 65.54, "learning_rate": 9.276809226266814e-05, "loss": 0.0004, "step": 15336 }, { "epoch": 65.56, "learning_rate": 9.276205380985134e-05, "loss": 0.0008, "step": 15340 }, { "epoch": 65.57, "learning_rate": 9.27560130338095e-05, "loss": 0.0004, "step": 15344 }, { "epoch": 65.59, "learning_rate": 9.274996993487074e-05, "loss": 0.0003, "step": 15348 }, { "epoch": 65.61, "learning_rate": 9.274392451336346e-05, "loss": 0.0013, "step": 15352 }, { "epoch": 65.62, "learning_rate": 9.273787676961607e-05, "loss": 0.0002, "step": 15356 }, { "epoch": 65.64, "learning_rate": 9.273182670395713e-05, "loss": 0.0005, "step": 15360 }, { "epoch": 65.66, "learning_rate": 9.272577431671534e-05, "loss": 0.0003, "step": 15364 }, { "epoch": 65.68, "learning_rate": 9.271971960821952e-05, "loss": 0.0006, "step": 15368 }, { "epoch": 65.69, "learning_rate": 9.271366257879861e-05, "loss": 0.0004, "step": 15372 }, { "epoch": 65.71, "learning_rate": 9.27076032287817e-05, "loss": 0.0003, "step": 15376 }, { "epoch": 65.73, "learning_rate": 9.270154155849799e-05, "loss": 0.0013, "step": 15380 }, { "epoch": 65.74, "learning_rate": 9.269547756827679e-05, "loss": 0.001, "step": 15384 }, { "epoch": 65.76, "learning_rate": 9.268941125844755e-05, "loss": 0.0001, "step": 15388 }, { "epoch": 65.78, "learning_rate": 9.268334262933986e-05, "loss": 0.0002, "step": 15392 }, { "epoch": 65.79, "learning_rate": 9.267727168128342e-05, "loss": 0.0002, "step": 15396 }, { "epoch": 65.81, "learning_rate": 9.267119841460805e-05, "loss": 0.0002, "step": 15400 }, { "epoch": 65.83, "learning_rate": 9.266512282964372e-05, "loss": 0.0006, "step": 15404 }, { "epoch": 65.85, "learning_rate": 9.26590449267205e-05, "loss": 0.0001, "step": 15408 }, { "epoch": 65.86, "learning_rate": 9.265296470616861e-05, "loss": 0.0001, "step": 15412 }, { "epoch": 65.88, "learning_rate": 9.264688216831836e-05, "loss": 0.0002, "step": 15416 }, { "epoch": 65.9, "learning_rate": 9.264079731350024e-05, "loss": 0.0001, "step": 15420 }, { "epoch": 65.91, "learning_rate": 9.263471014204481e-05, "loss": 0.0001, "step": 15424 }, { "epoch": 65.93, "learning_rate": 9.262862065428278e-05, "loss": 0.0001, "step": 15428 }, { "epoch": 65.95, "learning_rate": 9.2622528850545e-05, "loss": 0.0001, "step": 15432 }, { "epoch": 65.97, "learning_rate": 9.261643473116244e-05, "loss": 0.0003, "step": 15436 }, { "epoch": 65.98, "learning_rate": 9.261033829646617e-05, "loss": 0.0002, "step": 15440 }, { "epoch": 66.0, "learning_rate": 9.26042395467874e-05, "loss": 0.0001, "step": 15444 }, { "epoch": 66.02, "learning_rate": 9.259813848245749e-05, "loss": 0.0004, "step": 15448 }, { "epoch": 66.03, "learning_rate": 9.259203510380789e-05, "loss": 0.0001, "step": 15452 }, { "epoch": 66.05, "learning_rate": 9.25859294111702e-05, "loss": 0.0008, "step": 15456 }, { "epoch": 66.07, "learning_rate": 9.257982140487613e-05, "loss": 0.0002, "step": 15460 }, { "epoch": 66.09, "learning_rate": 9.257371108525753e-05, "loss": 0.0001, "step": 15464 }, { "epoch": 66.1, "learning_rate": 9.256759845264636e-05, "loss": 0.0004, "step": 15468 }, { "epoch": 66.12, "learning_rate": 9.256148350737472e-05, "loss": 0.0002, "step": 15472 }, { "epoch": 66.14, "learning_rate": 9.255536624977484e-05, "loss": 0.0001, "step": 15476 }, { "epoch": 66.15, "learning_rate": 9.254924668017903e-05, "loss": 0.0004, "step": 15480 }, { "epoch": 66.17, "learning_rate": 9.25431247989198e-05, "loss": 0.0001, "step": 15484 }, { "epoch": 66.19, "learning_rate": 9.253700060632972e-05, "loss": 0.0005, "step": 15488 }, { "epoch": 66.21, "learning_rate": 9.253087410274153e-05, "loss": 0.0001, "step": 15492 }, { "epoch": 66.22, "learning_rate": 9.252474528848807e-05, "loss": 0.0001, "step": 15496 }, { "epoch": 66.24, "learning_rate": 9.251861416390232e-05, "loss": 0.0002, "step": 15500 }, { "epoch": 66.26, "learning_rate": 9.251248072931737e-05, "loss": 0.0005, "step": 15504 }, { "epoch": 66.27, "learning_rate": 9.250634498506643e-05, "loss": 0.0001, "step": 15508 }, { "epoch": 66.29, "learning_rate": 9.25002069314829e-05, "loss": 0.0011, "step": 15512 }, { "epoch": 66.31, "learning_rate": 9.249406656890019e-05, "loss": 0.0002, "step": 15516 }, { "epoch": 66.32, "learning_rate": 9.248792389765193e-05, "loss": 0.0, "step": 15520 }, { "epoch": 66.34, "learning_rate": 9.248177891807186e-05, "loss": 0.0002, "step": 15524 }, { "epoch": 66.36, "learning_rate": 9.247563163049381e-05, "loss": 0.0012, "step": 15528 }, { "epoch": 66.38, "learning_rate": 9.246948203525176e-05, "loss": 0.0001, "step": 15532 }, { "epoch": 66.39, "learning_rate": 9.246333013267983e-05, "loss": 0.0001, "step": 15536 }, { "epoch": 66.41, "learning_rate": 9.245717592311225e-05, "loss": 0.0003, "step": 15540 }, { "epoch": 66.43, "learning_rate": 9.245101940688333e-05, "loss": 0.0001, "step": 15544 }, { "epoch": 66.44, "learning_rate": 9.24448605843276e-05, "loss": 0.0014, "step": 15548 }, { "epoch": 66.46, "learning_rate": 9.243869945577962e-05, "loss": 0.0013, "step": 15552 }, { "epoch": 66.48, "learning_rate": 9.243253602157413e-05, "loss": 0.0001, "step": 15556 }, { "epoch": 66.5, "learning_rate": 9.242637028204602e-05, "loss": 0.0004, "step": 15560 }, { "epoch": 66.51, "learning_rate": 9.242020223753023e-05, "loss": 0.0001, "step": 15564 }, { "epoch": 66.53, "learning_rate": 9.241403188836188e-05, "loss": 0.0011, "step": 15568 }, { "epoch": 66.55, "learning_rate": 9.240785923487619e-05, "loss": 0.0002, "step": 15572 }, { "epoch": 66.56, "learning_rate": 9.240168427740852e-05, "loss": 0.0004, "step": 15576 }, { "epoch": 66.58, "learning_rate": 9.239550701629436e-05, "loss": 0.0001, "step": 15580 }, { "epoch": 66.6, "learning_rate": 9.23893274518693e-05, "loss": 0.0001, "step": 15584 }, { "epoch": 66.62, "learning_rate": 9.238314558446907e-05, "loss": 0.0012, "step": 15588 }, { "epoch": 66.63, "learning_rate": 9.237696141442952e-05, "loss": 0.0015, "step": 15592 }, { "epoch": 66.65, "learning_rate": 9.237077494208668e-05, "loss": 0.0014, "step": 15596 }, { "epoch": 66.67, "learning_rate": 9.236458616777659e-05, "loss": 0.0001, "step": 15600 }, { "epoch": 66.68, "learning_rate": 9.235839509183551e-05, "loss": 0.0001, "step": 15604 }, { "epoch": 66.7, "learning_rate": 9.23522017145998e-05, "loss": 0.0002, "step": 15608 }, { "epoch": 66.72, "learning_rate": 9.234600603640594e-05, "loss": 0.0013, "step": 15612 }, { "epoch": 66.74, "learning_rate": 9.233980805759052e-05, "loss": 0.0002, "step": 15616 }, { "epoch": 66.75, "learning_rate": 9.23336077784903e-05, "loss": 0.0004, "step": 15620 }, { "epoch": 66.77, "learning_rate": 9.23274051994421e-05, "loss": 0.0002, "step": 15624 }, { "epoch": 66.79, "learning_rate": 9.232120032078292e-05, "loss": 0.0003, "step": 15628 }, { "epoch": 66.8, "learning_rate": 9.231499314284987e-05, "loss": 0.0001, "step": 15632 }, { "epoch": 66.82, "learning_rate": 9.230878366598017e-05, "loss": 0.0001, "step": 15636 }, { "epoch": 66.84, "learning_rate": 9.230257189051118e-05, "loss": 0.0003, "step": 15640 }, { "epoch": 66.85, "learning_rate": 9.229635781678038e-05, "loss": 0.0003, "step": 15644 }, { "epoch": 66.87, "learning_rate": 9.229014144512537e-05, "loss": 0.0002, "step": 15648 }, { "epoch": 66.89, "learning_rate": 9.22839227758839e-05, "loss": 0.0004, "step": 15652 }, { "epoch": 66.91, "learning_rate": 9.22777018093938e-05, "loss": 0.0004, "step": 15656 }, { "epoch": 66.92, "learning_rate": 9.227147854599306e-05, "loss": 0.0017, "step": 15660 }, { "epoch": 66.94, "learning_rate": 9.226525298601979e-05, "loss": 0.0001, "step": 15664 }, { "epoch": 66.96, "learning_rate": 9.225902512981221e-05, "loss": 0.0003, "step": 15668 }, { "epoch": 66.97, "learning_rate": 9.225279497770867e-05, "loss": 0.0001, "step": 15672 }, { "epoch": 66.99, "learning_rate": 9.224656253004766e-05, "loss": 0.0015, "step": 15676 }, { "epoch": 67.01, "learning_rate": 9.224032778716781e-05, "loss": 0.0005, "step": 15680 }, { "epoch": 67.03, "learning_rate": 9.223409074940777e-05, "loss": 0.0001, "step": 15684 }, { "epoch": 67.04, "learning_rate": 9.222785141710646e-05, "loss": 0.0002, "step": 15688 }, { "epoch": 67.06, "learning_rate": 9.222160979060286e-05, "loss": 0.0003, "step": 15692 }, { "epoch": 67.08, "learning_rate": 9.221536587023603e-05, "loss": 0.0005, "step": 15696 }, { "epoch": 67.09, "learning_rate": 9.220911965634521e-05, "loss": 0.0008, "step": 15700 }, { "epoch": 67.11, "learning_rate": 9.220287114926978e-05, "loss": 0.0001, "step": 15704 }, { "epoch": 67.13, "learning_rate": 9.219662034934917e-05, "loss": 0.0, "step": 15708 }, { "epoch": 67.15, "learning_rate": 9.219036725692302e-05, "loss": 0.0002, "step": 15712 }, { "epoch": 67.16, "learning_rate": 9.218411187233103e-05, "loss": 0.0, "step": 15716 }, { "epoch": 67.18, "learning_rate": 9.217785419591307e-05, "loss": 0.0012, "step": 15720 }, { "epoch": 67.2, "learning_rate": 9.217159422800909e-05, "loss": 0.0002, "step": 15724 }, { "epoch": 67.21, "learning_rate": 9.216533196895922e-05, "loss": 0.0004, "step": 15728 }, { "epoch": 67.23, "learning_rate": 9.215906741910365e-05, "loss": 0.0005, "step": 15732 }, { "epoch": 67.25, "learning_rate": 9.215280057878276e-05, "loss": 0.0001, "step": 15736 }, { "epoch": 67.26, "learning_rate": 9.214653144833699e-05, "loss": 0.0001, "step": 15740 }, { "epoch": 67.28, "learning_rate": 9.214026002810697e-05, "loss": 0.0001, "step": 15744 }, { "epoch": 67.3, "learning_rate": 9.213398631843337e-05, "loss": 0.0005, "step": 15748 }, { "epoch": 67.32, "learning_rate": 9.212771031965709e-05, "loss": 0.0008, "step": 15752 }, { "epoch": 67.33, "learning_rate": 9.212143203211907e-05, "loss": 0.0003, "step": 15756 }, { "epoch": 67.35, "learning_rate": 9.21151514561604e-05, "loss": 0.0001, "step": 15760 }, { "epoch": 67.37, "learning_rate": 9.210886859212233e-05, "loss": 0.0001, "step": 15764 }, { "epoch": 67.38, "learning_rate": 9.210258344034617e-05, "loss": 0.0001, "step": 15768 }, { "epoch": 67.4, "learning_rate": 9.209629600117341e-05, "loss": 0.0001, "step": 15772 }, { "epoch": 67.42, "learning_rate": 9.209000627494562e-05, "loss": 0.0001, "step": 15776 }, { "epoch": 67.44, "learning_rate": 9.208371426200453e-05, "loss": 0.0002, "step": 15780 }, { "epoch": 67.45, "learning_rate": 9.207741996269197e-05, "loss": 0.0001, "step": 15784 }, { "epoch": 67.47, "learning_rate": 9.207112337734989e-05, "loss": 0.0014, "step": 15788 }, { "epoch": 67.49, "learning_rate": 9.206482450632041e-05, "loss": 0.0001, "step": 15792 }, { "epoch": 67.5, "learning_rate": 9.205852334994574e-05, "loss": 0.0001, "step": 15796 }, { "epoch": 67.52, "learning_rate": 9.20522199085682e-05, "loss": 0.0005, "step": 15800 }, { "epoch": 67.54, "learning_rate": 9.204591418253023e-05, "loss": 0.0004, "step": 15804 }, { "epoch": 67.56, "learning_rate": 9.203960617217446e-05, "loss": 0.0001, "step": 15808 }, { "epoch": 67.57, "learning_rate": 9.203329587784359e-05, "loss": 0.0003, "step": 15812 }, { "epoch": 67.59, "learning_rate": 9.202698329988042e-05, "loss": 0.0002, "step": 15816 }, { "epoch": 67.61, "learning_rate": 9.202066843862794e-05, "loss": 0.0003, "step": 15820 }, { "epoch": 67.62, "learning_rate": 9.201435129442922e-05, "loss": 0.0006, "step": 15824 }, { "epoch": 67.64, "learning_rate": 9.200803186762745e-05, "loss": 0.0001, "step": 15828 }, { "epoch": 67.66, "learning_rate": 9.200171015856597e-05, "loss": 0.0001, "step": 15832 }, { "epoch": 67.68, "learning_rate": 9.199538616758827e-05, "loss": 0.0003, "step": 15836 }, { "epoch": 67.69, "learning_rate": 9.198905989503785e-05, "loss": 0.0001, "step": 15840 }, { "epoch": 67.71, "learning_rate": 9.198273134125848e-05, "loss": 0.0005, "step": 15844 }, { "epoch": 67.73, "learning_rate": 9.197640050659396e-05, "loss": 0.0006, "step": 15848 }, { "epoch": 67.74, "learning_rate": 9.197006739138822e-05, "loss": 0.0003, "step": 15852 }, { "epoch": 67.76, "learning_rate": 9.196373199598536e-05, "loss": 0.0006, "step": 15856 }, { "epoch": 67.78, "learning_rate": 9.195739432072955e-05, "loss": 0.0002, "step": 15860 }, { "epoch": 67.79, "learning_rate": 9.195105436596514e-05, "loss": 0.0003, "step": 15864 }, { "epoch": 67.81, "learning_rate": 9.194471213203655e-05, "loss": 0.0001, "step": 15868 }, { "epoch": 67.83, "learning_rate": 9.193836761928838e-05, "loss": 0.0001, "step": 15872 }, { "epoch": 67.85, "learning_rate": 9.193202082806528e-05, "loss": 0.0003, "step": 15876 }, { "epoch": 67.86, "learning_rate": 9.192567175871209e-05, "loss": 0.0001, "step": 15880 }, { "epoch": 67.88, "learning_rate": 9.191932041157375e-05, "loss": 0.0001, "step": 15884 }, { "epoch": 67.9, "learning_rate": 9.191296678699532e-05, "loss": 0.0005, "step": 15888 }, { "epoch": 67.91, "learning_rate": 9.190661088532198e-05, "loss": 0.0006, "step": 15892 }, { "epoch": 67.93, "learning_rate": 9.190025270689905e-05, "loss": 0.0001, "step": 15896 }, { "epoch": 67.95, "learning_rate": 9.189389225207196e-05, "loss": 0.0001, "step": 15900 }, { "epoch": 67.97, "learning_rate": 9.188752952118627e-05, "loss": 0.0002, "step": 15904 }, { "epoch": 67.98, "learning_rate": 9.188116451458765e-05, "loss": 0.0024, "step": 15908 }, { "epoch": 68.0, "learning_rate": 9.187479723262193e-05, "loss": 0.0001, "step": 15912 }, { "epoch": 68.02, "learning_rate": 9.186842767563501e-05, "loss": 0.0005, "step": 15916 }, { "epoch": 68.03, "learning_rate": 9.186205584397296e-05, "loss": 0.0006, "step": 15920 }, { "epoch": 68.05, "learning_rate": 9.185568173798195e-05, "loss": 0.0001, "step": 15924 }, { "epoch": 68.07, "learning_rate": 9.184930535800828e-05, "loss": 0.0001, "step": 15928 }, { "epoch": 68.09, "learning_rate": 9.184292670439839e-05, "loss": 0.0004, "step": 15932 }, { "epoch": 68.1, "learning_rate": 9.18365457774988e-05, "loss": 0.0004, "step": 15936 }, { "epoch": 68.12, "learning_rate": 9.183016257765619e-05, "loss": 0.0001, "step": 15940 }, { "epoch": 68.14, "learning_rate": 9.182377710521737e-05, "loss": 0.0001, "step": 15944 }, { "epoch": 68.15, "learning_rate": 9.181738936052924e-05, "loss": 0.0001, "step": 15948 }, { "epoch": 68.17, "learning_rate": 9.181099934393884e-05, "loss": 0.0005, "step": 15952 }, { "epoch": 68.19, "learning_rate": 9.180460705579333e-05, "loss": 0.0003, "step": 15956 }, { "epoch": 68.21, "learning_rate": 9.179821249644e-05, "loss": 0.0001, "step": 15960 }, { "epoch": 68.22, "learning_rate": 9.179181566622628e-05, "loss": 0.0001, "step": 15964 }, { "epoch": 68.24, "learning_rate": 9.178541656549971e-05, "loss": 0.0001, "step": 15968 }, { "epoch": 68.26, "learning_rate": 9.17790151946079e-05, "loss": 0.0004, "step": 15972 }, { "epoch": 68.27, "learning_rate": 9.177261155389864e-05, "loss": 0.0006, "step": 15976 }, { "epoch": 68.29, "learning_rate": 9.17662056437199e-05, "loss": 0.001, "step": 15980 }, { "epoch": 68.31, "learning_rate": 9.175979746441963e-05, "loss": 0.0002, "step": 15984 }, { "epoch": 68.32, "learning_rate": 9.175338701634601e-05, "loss": 0.0002, "step": 15988 }, { "epoch": 68.34, "learning_rate": 9.174697429984732e-05, "loss": 0.0001, "step": 15992 }, { "epoch": 68.36, "learning_rate": 9.174055931527197e-05, "loss": 0.0008, "step": 15996 }, { "epoch": 68.38, "learning_rate": 9.173414206296844e-05, "loss": 0.0014, "step": 16000 }, { "epoch": 68.38, "eval_exact_match": 0.5135135135135135, "eval_loss": 0.9037817120552063, "eval_runtime": 142.7035, "eval_samples_per_second": 6.741, "step": 16000 }, { "epoch": 68.39, "learning_rate": 9.17277225432854e-05, "loss": 0.0002, "step": 16004 }, { "epoch": 68.41, "learning_rate": 9.172130075657161e-05, "loss": 0.0001, "step": 16008 }, { "epoch": 68.43, "learning_rate": 9.171487670317598e-05, "loss": 0.0015, "step": 16012 }, { "epoch": 68.44, "learning_rate": 9.170845038344749e-05, "loss": 0.0004, "step": 16016 }, { "epoch": 68.46, "learning_rate": 9.17020217977353e-05, "loss": 0.0005, "step": 16020 }, { "epoch": 68.48, "learning_rate": 9.169559094638866e-05, "loss": 0.0002, "step": 16024 }, { "epoch": 68.5, "learning_rate": 9.168915782975696e-05, "loss": 0.0001, "step": 16028 }, { "epoch": 68.51, "learning_rate": 9.168272244818971e-05, "loss": 0.0002, "step": 16032 }, { "epoch": 68.53, "learning_rate": 9.167628480203652e-05, "loss": 0.0009, "step": 16036 }, { "epoch": 68.55, "learning_rate": 9.166984489164714e-05, "loss": 0.0, "step": 16040 }, { "epoch": 68.56, "learning_rate": 9.166340271737146e-05, "loss": 0.0012, "step": 16044 }, { "epoch": 68.58, "learning_rate": 9.16569582795595e-05, "loss": 0.0001, "step": 16048 }, { "epoch": 68.6, "learning_rate": 9.165051157856132e-05, "loss": 0.0002, "step": 16052 }, { "epoch": 68.62, "learning_rate": 9.164406261472721e-05, "loss": 0.0001, "step": 16056 }, { "epoch": 68.63, "learning_rate": 9.163761138840752e-05, "loss": 0.0002, "step": 16060 }, { "epoch": 68.65, "learning_rate": 9.163115789995276e-05, "loss": 0.0002, "step": 16064 }, { "epoch": 68.67, "learning_rate": 9.162470214971351e-05, "loss": 0.002, "step": 16068 }, { "epoch": 68.68, "learning_rate": 9.161824413804052e-05, "loss": 0.0009, "step": 16072 }, { "epoch": 68.7, "learning_rate": 9.161178386528467e-05, "loss": 0.0001, "step": 16076 }, { "epoch": 68.72, "learning_rate": 9.160532133179689e-05, "loss": 0.0012, "step": 16080 }, { "epoch": 68.74, "learning_rate": 9.159885653792834e-05, "loss": 0.0001, "step": 16084 }, { "epoch": 68.75, "learning_rate": 9.159238948403022e-05, "loss": 0.0001, "step": 16088 }, { "epoch": 68.77, "learning_rate": 9.158592017045387e-05, "loss": 0.0002, "step": 16092 }, { "epoch": 68.79, "learning_rate": 9.157944859755076e-05, "loss": 0.0003, "step": 16096 }, { "epoch": 68.8, "learning_rate": 9.157297476567253e-05, "loss": 0.0002, "step": 16100 }, { "epoch": 68.82, "learning_rate": 9.156649867517084e-05, "loss": 0.0002, "step": 16104 }, { "epoch": 68.84, "learning_rate": 9.156002032639757e-05, "loss": 0.0007, "step": 16108 }, { "epoch": 68.85, "learning_rate": 9.155353971970466e-05, "loss": 0.0001, "step": 16112 }, { "epoch": 68.87, "learning_rate": 9.15470568554442e-05, "loss": 0.0042, "step": 16116 }, { "epoch": 68.89, "learning_rate": 9.154057173396842e-05, "loss": 0.0009, "step": 16120 }, { "epoch": 68.91, "learning_rate": 9.153408435562961e-05, "loss": 0.0009, "step": 16124 }, { "epoch": 68.92, "learning_rate": 9.152759472078027e-05, "loss": 0.0022, "step": 16128 }, { "epoch": 68.94, "learning_rate": 9.152110282977293e-05, "loss": 0.0005, "step": 16132 }, { "epoch": 68.96, "learning_rate": 9.151460868296034e-05, "loss": 0.0013, "step": 16136 }, { "epoch": 68.97, "learning_rate": 9.150811228069527e-05, "loss": 0.0002, "step": 16140 }, { "epoch": 68.99, "learning_rate": 9.15016136233307e-05, "loss": 0.0009, "step": 16144 }, { "epoch": 69.01, "learning_rate": 9.149511271121967e-05, "loss": 0.0008, "step": 16148 }, { "epoch": 69.03, "learning_rate": 9.14886095447154e-05, "loss": 0.0001, "step": 16152 }, { "epoch": 69.04, "learning_rate": 9.148210412417117e-05, "loss": 0.0003, "step": 16156 }, { "epoch": 69.06, "learning_rate": 9.147559644994044e-05, "loss": 0.0031, "step": 16160 }, { "epoch": 69.08, "learning_rate": 9.146908652237676e-05, "loss": 0.0001, "step": 16164 }, { "epoch": 69.09, "learning_rate": 9.146257434183379e-05, "loss": 0.0004, "step": 16168 }, { "epoch": 69.11, "learning_rate": 9.145605990866534e-05, "loss": 0.0001, "step": 16172 }, { "epoch": 69.13, "learning_rate": 9.144954322322534e-05, "loss": 0.0001, "step": 16176 }, { "epoch": 69.15, "learning_rate": 9.144302428586783e-05, "loss": 0.0004, "step": 16180 }, { "epoch": 69.16, "learning_rate": 9.143650309694699e-05, "loss": 0.0001, "step": 16184 }, { "epoch": 69.18, "learning_rate": 9.14299796568171e-05, "loss": 0.0002, "step": 16188 }, { "epoch": 69.2, "learning_rate": 9.142345396583257e-05, "loss": 0.0007, "step": 16192 }, { "epoch": 69.21, "learning_rate": 9.141692602434795e-05, "loss": 0.0001, "step": 16196 }, { "epoch": 69.23, "learning_rate": 9.141039583271787e-05, "loss": 0.0001, "step": 16200 }, { "epoch": 69.25, "learning_rate": 9.140386339129714e-05, "loss": 0.0018, "step": 16204 }, { "epoch": 69.26, "learning_rate": 9.139732870044064e-05, "loss": 0.0001, "step": 16208 }, { "epoch": 69.28, "learning_rate": 9.139079176050341e-05, "loss": 0.0004, "step": 16212 }, { "epoch": 69.3, "learning_rate": 9.138425257184059e-05, "loss": 0.0019, "step": 16216 }, { "epoch": 69.32, "learning_rate": 9.137771113480744e-05, "loss": 0.0013, "step": 16220 }, { "epoch": 69.33, "learning_rate": 9.137116744975937e-05, "loss": 0.0011, "step": 16224 }, { "epoch": 69.35, "learning_rate": 9.136462151705187e-05, "loss": 0.0002, "step": 16228 }, { "epoch": 69.37, "learning_rate": 9.135807333704058e-05, "loss": 0.0001, "step": 16232 }, { "epoch": 69.38, "learning_rate": 9.135152291008128e-05, "loss": 0.0001, "step": 16236 }, { "epoch": 69.4, "learning_rate": 9.134497023652982e-05, "loss": 0.0002, "step": 16240 }, { "epoch": 69.42, "learning_rate": 9.13384153167422e-05, "loss": 0.0001, "step": 16244 }, { "epoch": 69.44, "learning_rate": 9.133185815107458e-05, "loss": 0.0001, "step": 16248 }, { "epoch": 69.45, "learning_rate": 9.132529873988317e-05, "loss": 0.0, "step": 16252 }, { "epoch": 69.47, "learning_rate": 9.131873708352434e-05, "loss": 0.0017, "step": 16256 }, { "epoch": 69.49, "learning_rate": 9.13121731823546e-05, "loss": 0.0003, "step": 16260 }, { "epoch": 69.5, "learning_rate": 9.130560703673054e-05, "loss": 0.0002, "step": 16264 }, { "epoch": 69.52, "learning_rate": 9.12990386470089e-05, "loss": 0.0001, "step": 16268 }, { "epoch": 69.54, "learning_rate": 9.129246801354654e-05, "loss": 0.0002, "step": 16272 }, { "epoch": 69.56, "learning_rate": 9.128589513670043e-05, "loss": 0.0001, "step": 16276 }, { "epoch": 69.57, "learning_rate": 9.127932001682766e-05, "loss": 0.0003, "step": 16280 }, { "epoch": 69.59, "learning_rate": 9.127274265428548e-05, "loss": 0.0001, "step": 16284 }, { "epoch": 69.61, "learning_rate": 9.12661630494312e-05, "loss": 0.0004, "step": 16288 }, { "epoch": 69.62, "learning_rate": 9.12595812026223e-05, "loss": 0.0006, "step": 16292 }, { "epoch": 69.64, "learning_rate": 9.125299711421638e-05, "loss": 0.0001, "step": 16296 }, { "epoch": 69.66, "learning_rate": 9.12464107845711e-05, "loss": 0.0002, "step": 16300 }, { "epoch": 69.68, "learning_rate": 9.123982221404435e-05, "loss": 0.0001, "step": 16304 }, { "epoch": 69.69, "learning_rate": 9.123323140299404e-05, "loss": 0.0002, "step": 16308 }, { "epoch": 69.71, "learning_rate": 9.122663835177825e-05, "loss": 0.001, "step": 16312 }, { "epoch": 69.73, "learning_rate": 9.12200430607552e-05, "loss": 0.0004, "step": 16316 }, { "epoch": 69.74, "learning_rate": 9.121344553028317e-05, "loss": 0.0001, "step": 16320 }, { "epoch": 69.76, "learning_rate": 9.120684576072062e-05, "loss": 0.0017, "step": 16324 }, { "epoch": 69.78, "learning_rate": 9.12002437524261e-05, "loss": 0.0008, "step": 16328 }, { "epoch": 69.79, "learning_rate": 9.11936395057583e-05, "loss": 0.0001, "step": 16332 }, { "epoch": 69.81, "learning_rate": 9.118703302107603e-05, "loss": 0.0006, "step": 16336 }, { "epoch": 69.83, "learning_rate": 9.118042429873818e-05, "loss": 0.0005, "step": 16340 }, { "epoch": 69.85, "learning_rate": 9.117381333910385e-05, "loss": 0.0001, "step": 16344 }, { "epoch": 69.86, "learning_rate": 9.116720014253215e-05, "loss": 0.0001, "step": 16348 }, { "epoch": 69.88, "learning_rate": 9.116058470938241e-05, "loss": 0.0021, "step": 16352 }, { "epoch": 69.9, "learning_rate": 9.115396704001401e-05, "loss": 0.0004, "step": 16356 }, { "epoch": 69.91, "learning_rate": 9.11473471347865e-05, "loss": 0.0001, "step": 16360 }, { "epoch": 69.93, "learning_rate": 9.114072499405955e-05, "loss": 0.0002, "step": 16364 }, { "epoch": 69.95, "learning_rate": 9.113410061819292e-05, "loss": 0.0001, "step": 16368 }, { "epoch": 69.97, "learning_rate": 9.112747400754648e-05, "loss": 0.0001, "step": 16372 }, { "epoch": 69.98, "learning_rate": 9.112084516248029e-05, "loss": 0.0002, "step": 16376 }, { "epoch": 70.0, "learning_rate": 9.111421408335447e-05, "loss": 0.0002, "step": 16380 }, { "epoch": 70.02, "learning_rate": 9.11075807705293e-05, "loss": 0.0007, "step": 16384 }, { "epoch": 70.03, "learning_rate": 9.110094522436512e-05, "loss": 0.0002, "step": 16388 }, { "epoch": 70.05, "learning_rate": 9.109430744522247e-05, "loss": 0.0017, "step": 16392 }, { "epoch": 70.07, "learning_rate": 9.108766743346195e-05, "loss": 0.0003, "step": 16396 }, { "epoch": 70.09, "learning_rate": 9.108102518944433e-05, "loss": 0.0001, "step": 16400 }, { "epoch": 70.1, "learning_rate": 9.107438071353047e-05, "loss": 0.0004, "step": 16404 }, { "epoch": 70.12, "learning_rate": 9.106773400608135e-05, "loss": 0.0003, "step": 16408 }, { "epoch": 70.14, "learning_rate": 9.106108506745807e-05, "loss": 0.0002, "step": 16412 }, { "epoch": 70.15, "learning_rate": 9.105443389802191e-05, "loss": 0.0003, "step": 16416 }, { "epoch": 70.17, "learning_rate": 9.104778049813417e-05, "loss": 0.0001, "step": 16420 }, { "epoch": 70.19, "learning_rate": 9.104112486815633e-05, "loss": 0.0019, "step": 16424 }, { "epoch": 70.21, "learning_rate": 9.103446700845e-05, "loss": 0.0001, "step": 16428 }, { "epoch": 70.22, "learning_rate": 9.102780691937689e-05, "loss": 0.0015, "step": 16432 }, { "epoch": 70.24, "learning_rate": 9.102114460129885e-05, "loss": 0.0001, "step": 16436 }, { "epoch": 70.26, "learning_rate": 9.101448005457783e-05, "loss": 0.0002, "step": 16440 }, { "epoch": 70.27, "learning_rate": 9.100781327957588e-05, "loss": 0.0001, "step": 16444 }, { "epoch": 70.29, "learning_rate": 9.100114427665524e-05, "loss": 0.0001, "step": 16448 }, { "epoch": 70.31, "learning_rate": 9.099447304617823e-05, "loss": 0.0001, "step": 16452 }, { "epoch": 70.32, "learning_rate": 9.098779958850728e-05, "loss": 0.0001, "step": 16456 }, { "epoch": 70.34, "learning_rate": 9.098112390400492e-05, "loss": 0.0012, "step": 16460 }, { "epoch": 70.36, "learning_rate": 9.097444599303389e-05, "loss": 0.0002, "step": 16464 }, { "epoch": 70.38, "learning_rate": 9.096776585595697e-05, "loss": 0.0001, "step": 16468 }, { "epoch": 70.39, "learning_rate": 9.096108349313707e-05, "loss": 0.0006, "step": 16472 }, { "epoch": 70.41, "learning_rate": 9.095439890493728e-05, "loss": 0.0002, "step": 16476 }, { "epoch": 70.43, "learning_rate": 9.094771209172072e-05, "loss": 0.0003, "step": 16480 }, { "epoch": 70.44, "learning_rate": 9.094102305385072e-05, "loss": 0.0003, "step": 16484 }, { "epoch": 70.46, "learning_rate": 9.093433179169065e-05, "loss": 0.0002, "step": 16488 }, { "epoch": 70.48, "learning_rate": 9.092763830560407e-05, "loss": 0.0002, "step": 16492 }, { "epoch": 70.5, "learning_rate": 9.092094259595462e-05, "loss": 0.0001, "step": 16496 }, { "epoch": 70.51, "learning_rate": 9.091424466310608e-05, "loss": 0.0002, "step": 16500 }, { "epoch": 70.53, "learning_rate": 9.090754450742233e-05, "loss": 0.0001, "step": 16504 }, { "epoch": 70.55, "learning_rate": 9.090084212926739e-05, "loss": 0.0001, "step": 16508 }, { "epoch": 70.56, "learning_rate": 9.08941375290054e-05, "loss": 0.0002, "step": 16512 }, { "epoch": 70.58, "learning_rate": 9.088743070700061e-05, "loss": 0.0001, "step": 16516 }, { "epoch": 70.6, "learning_rate": 9.088072166361738e-05, "loss": 0.0001, "step": 16520 }, { "epoch": 70.62, "learning_rate": 9.087401039922025e-05, "loss": 0.0002, "step": 16524 }, { "epoch": 70.63, "learning_rate": 9.08672969141738e-05, "loss": 0.0001, "step": 16528 }, { "epoch": 70.65, "learning_rate": 9.086058120884275e-05, "loss": 0.0005, "step": 16532 }, { "epoch": 70.67, "learning_rate": 9.085386328359202e-05, "loss": 0.0001, "step": 16536 }, { "epoch": 70.68, "learning_rate": 9.084714313878653e-05, "loss": 0.0007, "step": 16540 }, { "epoch": 70.7, "learning_rate": 9.084042077479142e-05, "loss": 0.0003, "step": 16544 }, { "epoch": 70.72, "learning_rate": 9.083369619197189e-05, "loss": 0.0, "step": 16548 }, { "epoch": 70.74, "learning_rate": 9.082696939069329e-05, "loss": 0.0001, "step": 16552 }, { "epoch": 70.75, "learning_rate": 9.082024037132106e-05, "loss": 0.0005, "step": 16556 }, { "epoch": 70.77, "learning_rate": 9.08135091342208e-05, "loss": 0.0002, "step": 16560 }, { "epoch": 70.79, "learning_rate": 9.080677567975823e-05, "loss": 0.0002, "step": 16564 }, { "epoch": 70.8, "learning_rate": 9.080004000829913e-05, "loss": 0.0002, "step": 16568 }, { "epoch": 70.82, "learning_rate": 9.079330212020949e-05, "loss": 0.0001, "step": 16572 }, { "epoch": 70.84, "learning_rate": 9.078656201585533e-05, "loss": 0.0001, "step": 16576 }, { "epoch": 70.85, "learning_rate": 9.077981969560285e-05, "loss": 0.0012, "step": 16580 }, { "epoch": 70.87, "learning_rate": 9.077307515981837e-05, "loss": 0.0005, "step": 16584 }, { "epoch": 70.89, "learning_rate": 9.07663284088683e-05, "loss": 0.0001, "step": 16588 }, { "epoch": 70.91, "learning_rate": 9.07595794431192e-05, "loss": 0.0007, "step": 16592 }, { "epoch": 70.92, "learning_rate": 9.075282826293769e-05, "loss": 0.0006, "step": 16596 }, { "epoch": 70.94, "learning_rate": 9.074607486869062e-05, "loss": 0.0001, "step": 16600 }, { "epoch": 70.96, "learning_rate": 9.073931926074486e-05, "loss": 0.0003, "step": 16604 }, { "epoch": 70.97, "learning_rate": 9.073256143946742e-05, "loss": 0.0008, "step": 16608 }, { "epoch": 70.99, "learning_rate": 9.072580140522549e-05, "loss": 0.0003, "step": 16612 }, { "epoch": 71.01, "learning_rate": 9.071903915838631e-05, "loss": 0.0003, "step": 16616 }, { "epoch": 71.03, "learning_rate": 9.071227469931727e-05, "loss": 0.0003, "step": 16620 }, { "epoch": 71.04, "learning_rate": 9.070550802838587e-05, "loss": 0.0001, "step": 16624 }, { "epoch": 71.06, "learning_rate": 9.069873914595975e-05, "loss": 0.0001, "step": 16628 }, { "epoch": 71.08, "learning_rate": 9.069196805240666e-05, "loss": 0.0016, "step": 16632 }, { "epoch": 71.09, "learning_rate": 9.068519474809446e-05, "loss": 0.0001, "step": 16636 }, { "epoch": 71.11, "learning_rate": 9.067841923339113e-05, "loss": 0.0001, "step": 16640 }, { "epoch": 71.13, "learning_rate": 9.06716415086648e-05, "loss": 0.0001, "step": 16644 }, { "epoch": 71.15, "learning_rate": 9.066486157428368e-05, "loss": 0.0003, "step": 16648 }, { "epoch": 71.16, "learning_rate": 9.065807943061612e-05, "loss": 0.0004, "step": 16652 }, { "epoch": 71.18, "learning_rate": 9.065129507803058e-05, "loss": 0.0003, "step": 16656 }, { "epoch": 71.2, "learning_rate": 9.064450851689567e-05, "loss": 0.0007, "step": 16660 }, { "epoch": 71.21, "learning_rate": 9.063771974758009e-05, "loss": 0.0005, "step": 16664 }, { "epoch": 71.23, "learning_rate": 9.063092877045265e-05, "loss": 0.0034, "step": 16668 }, { "epoch": 71.25, "learning_rate": 9.062413558588232e-05, "loss": 0.0002, "step": 16672 }, { "epoch": 71.26, "learning_rate": 9.061734019423817e-05, "loss": 0.0001, "step": 16676 }, { "epoch": 71.28, "learning_rate": 9.061054259588936e-05, "loss": 0.0001, "step": 16680 }, { "epoch": 71.3, "learning_rate": 9.060374279120523e-05, "loss": 0.0001, "step": 16684 }, { "epoch": 71.32, "learning_rate": 9.059694078055517e-05, "loss": 0.0001, "step": 16688 }, { "epoch": 71.33, "learning_rate": 9.059013656430877e-05, "loss": 0.0001, "step": 16692 }, { "epoch": 71.35, "learning_rate": 9.058333014283566e-05, "loss": 0.0001, "step": 16696 }, { "epoch": 71.37, "learning_rate": 9.057652151650566e-05, "loss": 0.0013, "step": 16700 }, { "epoch": 71.38, "learning_rate": 9.056971068568866e-05, "loss": 0.0001, "step": 16704 }, { "epoch": 71.4, "learning_rate": 9.056289765075468e-05, "loss": 0.0003, "step": 16708 }, { "epoch": 71.42, "learning_rate": 9.055608241207387e-05, "loss": 0.0003, "step": 16712 }, { "epoch": 71.44, "learning_rate": 9.054926497001651e-05, "loss": 0.0001, "step": 16716 }, { "epoch": 71.45, "learning_rate": 9.054244532495297e-05, "loss": 0.0001, "step": 16720 }, { "epoch": 71.47, "learning_rate": 9.053562347725377e-05, "loss": 0.0003, "step": 16724 }, { "epoch": 71.49, "learning_rate": 9.05287994272895e-05, "loss": 0.0001, "step": 16728 }, { "epoch": 71.5, "learning_rate": 9.052197317543096e-05, "loss": 0.0008, "step": 16732 }, { "epoch": 71.52, "learning_rate": 9.051514472204896e-05, "loss": 0.002, "step": 16736 }, { "epoch": 71.54, "learning_rate": 9.050831406751452e-05, "loss": 0.0001, "step": 16740 }, { "epoch": 71.56, "learning_rate": 9.050148121219873e-05, "loss": 0.0001, "step": 16744 }, { "epoch": 71.57, "learning_rate": 9.04946461564728e-05, "loss": 0.0002, "step": 16748 }, { "epoch": 71.59, "learning_rate": 9.048780890070811e-05, "loss": 0.0007, "step": 16752 }, { "epoch": 71.61, "learning_rate": 9.048096944527609e-05, "loss": 0.0001, "step": 16756 }, { "epoch": 71.62, "learning_rate": 9.047412779054834e-05, "loss": 0.0001, "step": 16760 }, { "epoch": 71.64, "learning_rate": 9.046728393689654e-05, "loss": 0.0007, "step": 16764 }, { "epoch": 71.66, "learning_rate": 9.046043788469252e-05, "loss": 0.0005, "step": 16768 }, { "epoch": 71.68, "learning_rate": 9.045358963430824e-05, "loss": 0.0006, "step": 16772 }, { "epoch": 71.69, "learning_rate": 9.044673918611573e-05, "loss": 0.0002, "step": 16776 }, { "epoch": 71.71, "learning_rate": 9.043988654048719e-05, "loss": 0.0004, "step": 16780 }, { "epoch": 71.73, "learning_rate": 9.04330316977949e-05, "loss": 0.0006, "step": 16784 }, { "epoch": 71.74, "learning_rate": 9.042617465841127e-05, "loss": 0.0009, "step": 16788 }, { "epoch": 71.76, "learning_rate": 9.041931542270888e-05, "loss": 0.0001, "step": 16792 }, { "epoch": 71.78, "learning_rate": 9.041245399106036e-05, "loss": 0.0003, "step": 16796 }, { "epoch": 71.79, "learning_rate": 9.040559036383848e-05, "loss": 0.0001, "step": 16800 }, { "epoch": 71.81, "learning_rate": 9.039872454141613e-05, "loss": 0.0001, "step": 16804 }, { "epoch": 71.83, "learning_rate": 9.039185652416635e-05, "loss": 0.0002, "step": 16808 }, { "epoch": 71.85, "learning_rate": 9.038498631246227e-05, "loss": 0.0003, "step": 16812 }, { "epoch": 71.86, "learning_rate": 9.037811390667711e-05, "loss": 0.0002, "step": 16816 }, { "epoch": 71.88, "learning_rate": 9.037123930718426e-05, "loss": 0.0001, "step": 16820 }, { "epoch": 71.9, "learning_rate": 9.036436251435723e-05, "loss": 0.0001, "step": 16824 }, { "epoch": 71.91, "learning_rate": 9.03574835285696e-05, "loss": 0.0002, "step": 16828 }, { "epoch": 71.93, "learning_rate": 9.035060235019513e-05, "loss": 0.0001, "step": 16832 }, { "epoch": 71.95, "learning_rate": 9.034371897960763e-05, "loss": 0.0011, "step": 16836 }, { "epoch": 71.97, "learning_rate": 9.033683341718112e-05, "loss": 0.0001, "step": 16840 }, { "epoch": 71.98, "learning_rate": 9.032994566328963e-05, "loss": 0.0005, "step": 16844 }, { "epoch": 72.0, "learning_rate": 9.032305571830739e-05, "loss": 0.0001, "step": 16848 }, { "epoch": 72.02, "learning_rate": 9.031616358260873e-05, "loss": 0.0006, "step": 16852 }, { "epoch": 72.03, "learning_rate": 9.03092692565681e-05, "loss": 0.0006, "step": 16856 }, { "epoch": 72.05, "learning_rate": 9.030237274056003e-05, "loss": 0.0001, "step": 16860 }, { "epoch": 72.07, "learning_rate": 9.029547403495925e-05, "loss": 0.0001, "step": 16864 }, { "epoch": 72.09, "learning_rate": 9.028857314014052e-05, "loss": 0.0001, "step": 16868 }, { "epoch": 72.1, "learning_rate": 9.028167005647878e-05, "loss": 0.0001, "step": 16872 }, { "epoch": 72.12, "learning_rate": 9.027476478434906e-05, "loss": 0.001, "step": 16876 }, { "epoch": 72.14, "learning_rate": 9.026785732412652e-05, "loss": 0.0002, "step": 16880 }, { "epoch": 72.15, "learning_rate": 9.026094767618645e-05, "loss": 0.0001, "step": 16884 }, { "epoch": 72.17, "learning_rate": 9.025403584090421e-05, "loss": 0.0007, "step": 16888 }, { "epoch": 72.19, "learning_rate": 9.024712181865536e-05, "loss": 0.0001, "step": 16892 }, { "epoch": 72.21, "learning_rate": 9.02402056098155e-05, "loss": 0.0001, "step": 16896 }, { "epoch": 72.22, "learning_rate": 9.023328721476037e-05, "loss": 0.0006, "step": 16900 }, { "epoch": 72.24, "learning_rate": 9.022636663386587e-05, "loss": 0.004, "step": 16904 }, { "epoch": 72.26, "learning_rate": 9.0219443867508e-05, "loss": 0.0001, "step": 16908 }, { "epoch": 72.27, "learning_rate": 9.021251891606284e-05, "loss": 0.0001, "step": 16912 }, { "epoch": 72.29, "learning_rate": 9.020559177990662e-05, "loss": 0.0001, "step": 16916 }, { "epoch": 72.31, "learning_rate": 9.019866245941569e-05, "loss": 0.0001, "step": 16920 }, { "epoch": 72.32, "learning_rate": 9.01917309549665e-05, "loss": 0.0001, "step": 16924 }, { "epoch": 72.34, "learning_rate": 9.018479726693565e-05, "loss": 0.0001, "step": 16928 }, { "epoch": 72.36, "learning_rate": 9.017786139569985e-05, "loss": 0.0001, "step": 16932 }, { "epoch": 72.38, "learning_rate": 9.017092334163591e-05, "loss": 0.0001, "step": 16936 }, { "epoch": 72.39, "learning_rate": 9.016398310512075e-05, "loss": 0.0001, "step": 16940 }, { "epoch": 72.41, "learning_rate": 9.015704068653144e-05, "loss": 0.0002, "step": 16944 }, { "epoch": 72.43, "learning_rate": 9.015009608624516e-05, "loss": 0.0001, "step": 16948 }, { "epoch": 72.44, "learning_rate": 9.01431493046392e-05, "loss": 0.0001, "step": 16952 }, { "epoch": 72.46, "learning_rate": 9.0136200342091e-05, "loss": 0.0006, "step": 16956 }, { "epoch": 72.48, "learning_rate": 9.012924919897805e-05, "loss": 0.0003, "step": 16960 }, { "epoch": 72.5, "learning_rate": 9.0122295875678e-05, "loss": 0.0002, "step": 16964 }, { "epoch": 72.51, "learning_rate": 9.011534037256866e-05, "loss": 0.0002, "step": 16968 }, { "epoch": 72.53, "learning_rate": 9.010838269002787e-05, "loss": 0.0004, "step": 16972 }, { "epoch": 72.55, "learning_rate": 9.010142282843365e-05, "loss": 0.0009, "step": 16976 }, { "epoch": 72.56, "learning_rate": 9.009446078816414e-05, "loss": 0.0002, "step": 16980 }, { "epoch": 72.58, "learning_rate": 9.008749656959758e-05, "loss": 0.0004, "step": 16984 }, { "epoch": 72.6, "learning_rate": 9.00805301731123e-05, "loss": 0.0, "step": 16988 }, { "epoch": 72.62, "learning_rate": 9.00735615990868e-05, "loss": 0.0005, "step": 16992 }, { "epoch": 72.63, "learning_rate": 9.006659084789967e-05, "loss": 0.0002, "step": 16996 }, { "epoch": 72.65, "learning_rate": 9.005961791992965e-05, "loss": 0.0018, "step": 17000 }, { "epoch": 72.65, "eval_exact_match": 0.5197505197505198, "eval_loss": 0.90228271484375, "eval_runtime": 140.6077, "eval_samples_per_second": 6.842, "step": 17000 }, { "epoch": 72.67, "learning_rate": 9.005264281555554e-05, "loss": 0.0001, "step": 17004 }, { "epoch": 72.68, "learning_rate": 9.004566553515629e-05, "loss": 0.0001, "step": 17008 }, { "epoch": 72.7, "learning_rate": 9.0038686079111e-05, "loss": 0.0002, "step": 17012 }, { "epoch": 72.72, "learning_rate": 9.003170444779882e-05, "loss": 0.0006, "step": 17016 }, { "epoch": 72.74, "learning_rate": 9.00247206415991e-05, "loss": 0.0011, "step": 17020 }, { "epoch": 72.75, "learning_rate": 9.001773466089123e-05, "loss": 0.0013, "step": 17024 }, { "epoch": 72.77, "learning_rate": 9.001074650605477e-05, "loss": 0.0001, "step": 17028 }, { "epoch": 72.79, "learning_rate": 9.000375617746937e-05, "loss": 0.0012, "step": 17032 }, { "epoch": 72.8, "learning_rate": 8.999676367551479e-05, "loss": 0.0004, "step": 17036 }, { "epoch": 72.82, "learning_rate": 8.998976900057097e-05, "loss": 0.0001, "step": 17040 }, { "epoch": 72.84, "learning_rate": 8.99827721530179e-05, "loss": 0.0001, "step": 17044 }, { "epoch": 72.85, "learning_rate": 8.997577313323571e-05, "loss": 0.0001, "step": 17048 }, { "epoch": 72.87, "learning_rate": 8.996877194160466e-05, "loss": 0.0002, "step": 17052 }, { "epoch": 72.89, "learning_rate": 8.996176857850511e-05, "loss": 0.0001, "step": 17056 }, { "epoch": 72.91, "learning_rate": 8.995476304431756e-05, "loss": 0.0002, "step": 17060 }, { "epoch": 72.92, "learning_rate": 8.99477553394226e-05, "loss": 0.0008, "step": 17064 }, { "epoch": 72.94, "learning_rate": 8.994074546420096e-05, "loss": 0.0001, "step": 17068 }, { "epoch": 72.96, "learning_rate": 8.993373341903348e-05, "loss": 0.0, "step": 17072 }, { "epoch": 72.97, "learning_rate": 8.992671920430111e-05, "loss": 0.0011, "step": 17076 }, { "epoch": 72.99, "learning_rate": 8.991970282038493e-05, "loss": 0.0002, "step": 17080 }, { "epoch": 73.01, "learning_rate": 8.991268426766616e-05, "loss": 0.0002, "step": 17084 }, { "epoch": 73.03, "learning_rate": 8.990566354652606e-05, "loss": 0.0002, "step": 17088 }, { "epoch": 73.04, "learning_rate": 8.98986406573461e-05, "loss": 0.0001, "step": 17092 }, { "epoch": 73.06, "learning_rate": 8.989161560050782e-05, "loss": 0.0015, "step": 17096 }, { "epoch": 73.08, "learning_rate": 8.988458837639289e-05, "loss": 0.0009, "step": 17100 }, { "epoch": 73.09, "learning_rate": 8.987755898538307e-05, "loss": 0.0001, "step": 17104 }, { "epoch": 73.11, "learning_rate": 8.987052742786028e-05, "loss": 0.0001, "step": 17108 }, { "epoch": 73.13, "learning_rate": 8.986349370420652e-05, "loss": 0.0029, "step": 17112 }, { "epoch": 73.15, "learning_rate": 8.985645781480396e-05, "loss": 0.0005, "step": 17116 }, { "epoch": 73.16, "learning_rate": 8.984941976003481e-05, "loss": 0.0, "step": 17120 }, { "epoch": 73.18, "learning_rate": 8.984237954028148e-05, "loss": 0.0001, "step": 17124 }, { "epoch": 73.2, "learning_rate": 8.983533715592645e-05, "loss": 0.0002, "step": 17128 }, { "epoch": 73.21, "learning_rate": 8.982829260735231e-05, "loss": 0.0008, "step": 17132 }, { "epoch": 73.23, "learning_rate": 8.982124589494178e-05, "loss": 0.0001, "step": 17136 }, { "epoch": 73.25, "learning_rate": 8.981419701907773e-05, "loss": 0.0015, "step": 17140 }, { "epoch": 73.26, "learning_rate": 8.980714598014311e-05, "loss": 0.0002, "step": 17144 }, { "epoch": 73.28, "learning_rate": 8.980009277852099e-05, "loss": 0.0001, "step": 17148 }, { "epoch": 73.3, "learning_rate": 8.979303741459457e-05, "loss": 0.0001, "step": 17152 }, { "epoch": 73.32, "learning_rate": 8.978597988874715e-05, "loss": 0.0001, "step": 17156 }, { "epoch": 73.33, "learning_rate": 8.977892020136216e-05, "loss": 0.0, "step": 17160 }, { "epoch": 73.35, "learning_rate": 8.977185835282316e-05, "loss": 0.0001, "step": 17164 }, { "epoch": 73.37, "learning_rate": 8.976479434351382e-05, "loss": 0.0003, "step": 17168 }, { "epoch": 73.38, "learning_rate": 8.975772817381789e-05, "loss": 0.0001, "step": 17172 }, { "epoch": 73.4, "learning_rate": 8.97506598441193e-05, "loss": 0.0002, "step": 17176 }, { "epoch": 73.42, "learning_rate": 8.974358935480205e-05, "loss": 0.0002, "step": 17180 }, { "epoch": 73.44, "learning_rate": 8.973651670625028e-05, "loss": 0.0001, "step": 17184 }, { "epoch": 73.45, "learning_rate": 8.972944189884824e-05, "loss": 0.0001, "step": 17188 }, { "epoch": 73.47, "learning_rate": 8.97223649329803e-05, "loss": 0.0001, "step": 17192 }, { "epoch": 73.49, "learning_rate": 8.971528580903093e-05, "loss": 0.0001, "step": 17196 }, { "epoch": 73.5, "learning_rate": 8.970820452738475e-05, "loss": 0.0003, "step": 17200 }, { "epoch": 73.52, "learning_rate": 8.970112108842649e-05, "loss": 0.0001, "step": 17204 }, { "epoch": 73.54, "learning_rate": 8.969403549254097e-05, "loss": 0.0001, "step": 17208 }, { "epoch": 73.56, "learning_rate": 8.968694774011312e-05, "loss": 0.0001, "step": 17212 }, { "epoch": 73.57, "learning_rate": 8.967985783152805e-05, "loss": 0.0, "step": 17216 }, { "epoch": 73.59, "learning_rate": 8.967276576717094e-05, "loss": 0.0001, "step": 17220 }, { "epoch": 73.61, "learning_rate": 8.966567154742709e-05, "loss": 0.0001, "step": 17224 }, { "epoch": 73.62, "learning_rate": 8.965857517268193e-05, "loss": 0.0, "step": 17228 }, { "epoch": 73.64, "learning_rate": 8.965147664332099e-05, "loss": 0.0005, "step": 17232 }, { "epoch": 73.66, "learning_rate": 8.964437595972993e-05, "loss": 0.0001, "step": 17236 }, { "epoch": 73.68, "learning_rate": 8.963727312229452e-05, "loss": 0.0005, "step": 17240 }, { "epoch": 73.69, "learning_rate": 8.963016813140066e-05, "loss": 0.0002, "step": 17244 }, { "epoch": 73.71, "learning_rate": 8.962306098743435e-05, "loss": 0.0003, "step": 17248 }, { "epoch": 73.73, "learning_rate": 8.961595169078172e-05, "loss": 0.0001, "step": 17252 }, { "epoch": 73.74, "learning_rate": 8.960884024182902e-05, "loss": 0.0001, "step": 17256 }, { "epoch": 73.76, "learning_rate": 8.96017266409626e-05, "loss": 0.0003, "step": 17260 }, { "epoch": 73.78, "learning_rate": 8.959461088856893e-05, "loss": 0.0, "step": 17264 }, { "epoch": 73.79, "learning_rate": 8.95874929850346e-05, "loss": 0.0, "step": 17268 }, { "epoch": 73.81, "learning_rate": 8.958037293074634e-05, "loss": 0.0004, "step": 17272 }, { "epoch": 73.83, "learning_rate": 8.957325072609095e-05, "loss": 0.0001, "step": 17276 }, { "epoch": 73.85, "learning_rate": 8.95661263714554e-05, "loss": 0.0001, "step": 17280 }, { "epoch": 73.86, "learning_rate": 8.955899986722673e-05, "loss": 0.001, "step": 17284 }, { "epoch": 73.88, "learning_rate": 8.955187121379213e-05, "loss": 0.0003, "step": 17288 }, { "epoch": 73.9, "learning_rate": 8.954474041153889e-05, "loss": 0.0001, "step": 17292 }, { "epoch": 73.91, "learning_rate": 8.95376074608544e-05, "loss": 0.0007, "step": 17296 }, { "epoch": 73.93, "learning_rate": 8.953047236212621e-05, "loss": 0.0001, "step": 17300 }, { "epoch": 73.95, "learning_rate": 8.952333511574196e-05, "loss": 0.0001, "step": 17304 }, { "epoch": 73.97, "learning_rate": 8.951619572208942e-05, "loss": 0.0001, "step": 17308 }, { "epoch": 73.98, "learning_rate": 8.950905418155644e-05, "loss": 0.0024, "step": 17312 }, { "epoch": 74.0, "learning_rate": 8.950191049453104e-05, "loss": 0.0003, "step": 17316 }, { "epoch": 74.02, "learning_rate": 8.949476466140132e-05, "loss": 0.0022, "step": 17320 }, { "epoch": 74.03, "learning_rate": 8.94876166825555e-05, "loss": 0.0001, "step": 17324 }, { "epoch": 74.05, "learning_rate": 8.948046655838193e-05, "loss": 0.0001, "step": 17328 }, { "epoch": 74.07, "learning_rate": 8.947331428926906e-05, "loss": 0.0008, "step": 17332 }, { "epoch": 74.09, "learning_rate": 8.94661598756055e-05, "loss": 0.0004, "step": 17336 }, { "epoch": 74.1, "learning_rate": 8.94590033177799e-05, "loss": 0.0002, "step": 17340 }, { "epoch": 74.12, "learning_rate": 8.945184461618111e-05, "loss": 0.0008, "step": 17344 }, { "epoch": 74.14, "learning_rate": 8.944468377119801e-05, "loss": 0.0002, "step": 17348 }, { "epoch": 74.15, "learning_rate": 8.943752078321968e-05, "loss": 0.0005, "step": 17352 }, { "epoch": 74.17, "learning_rate": 8.943035565263526e-05, "loss": 0.0011, "step": 17356 }, { "epoch": 74.19, "learning_rate": 8.942318837983403e-05, "loss": 0.0002, "step": 17360 }, { "epoch": 74.21, "learning_rate": 8.94160189652054e-05, "loss": 0.0031, "step": 17364 }, { "epoch": 74.22, "learning_rate": 8.940884740913884e-05, "loss": 0.0001, "step": 17368 }, { "epoch": 74.24, "learning_rate": 8.940167371202401e-05, "loss": 0.0001, "step": 17372 }, { "epoch": 74.26, "learning_rate": 8.939449787425062e-05, "loss": 0.0028, "step": 17376 }, { "epoch": 74.27, "learning_rate": 8.938731989620857e-05, "loss": 0.0, "step": 17380 }, { "epoch": 74.29, "learning_rate": 8.938013977828778e-05, "loss": 0.0, "step": 17384 }, { "epoch": 74.31, "learning_rate": 8.937295752087839e-05, "loss": 0.0001, "step": 17388 }, { "epoch": 74.32, "learning_rate": 8.936577312437056e-05, "loss": 0.0016, "step": 17392 }, { "epoch": 74.34, "learning_rate": 8.935858658915466e-05, "loss": 0.0001, "step": 17396 }, { "epoch": 74.36, "learning_rate": 8.935139791562109e-05, "loss": 0.0, "step": 17400 }, { "epoch": 74.38, "learning_rate": 8.93442071041604e-05, "loss": 0.0002, "step": 17404 }, { "epoch": 74.39, "learning_rate": 8.933701415516329e-05, "loss": 0.0018, "step": 17408 }, { "epoch": 74.41, "learning_rate": 8.932981906902053e-05, "loss": 0.0, "step": 17412 }, { "epoch": 74.43, "learning_rate": 8.932262184612304e-05, "loss": 0.0001, "step": 17416 }, { "epoch": 74.44, "learning_rate": 8.93154224868618e-05, "loss": 0.0001, "step": 17420 }, { "epoch": 74.46, "learning_rate": 8.930822099162798e-05, "loss": 0.0002, "step": 17424 }, { "epoch": 74.48, "learning_rate": 8.930101736081284e-05, "loss": 0.0001, "step": 17428 }, { "epoch": 74.5, "learning_rate": 8.929381159480772e-05, "loss": 0.0001, "step": 17432 }, { "epoch": 74.51, "learning_rate": 8.92866036940041e-05, "loss": 0.0001, "step": 17436 }, { "epoch": 74.53, "learning_rate": 8.927939365879359e-05, "loss": 0.0003, "step": 17440 }, { "epoch": 74.55, "learning_rate": 8.92721814895679e-05, "loss": 0.0001, "step": 17444 }, { "epoch": 74.56, "learning_rate": 8.926496718671889e-05, "loss": 0.0005, "step": 17448 }, { "epoch": 74.58, "learning_rate": 8.925775075063847e-05, "loss": 0.0005, "step": 17452 }, { "epoch": 74.6, "learning_rate": 8.925053218171871e-05, "loss": 0.0001, "step": 17456 }, { "epoch": 74.62, "learning_rate": 8.924331148035179e-05, "loss": 0.0001, "step": 17460 }, { "epoch": 74.63, "learning_rate": 8.923608864693003e-05, "loss": 0.0001, "step": 17464 }, { "epoch": 74.65, "learning_rate": 8.92288636818458e-05, "loss": 0.0001, "step": 17468 }, { "epoch": 74.67, "learning_rate": 8.922163658549164e-05, "loss": 0.0001, "step": 17472 }, { "epoch": 74.68, "learning_rate": 8.92144073582602e-05, "loss": 0.0001, "step": 17476 }, { "epoch": 74.7, "learning_rate": 8.920717600054425e-05, "loss": 0.0005, "step": 17480 }, { "epoch": 74.72, "learning_rate": 8.919994251273664e-05, "loss": 0.0001, "step": 17484 }, { "epoch": 74.74, "learning_rate": 8.919270689523036e-05, "loss": 0.0001, "step": 17488 }, { "epoch": 74.75, "learning_rate": 8.918546914841853e-05, "loss": 0.0001, "step": 17492 }, { "epoch": 74.77, "learning_rate": 8.917822927269434e-05, "loss": 0.0009, "step": 17496 }, { "epoch": 74.79, "learning_rate": 8.917098726845117e-05, "loss": 0.0, "step": 17500 }, { "epoch": 74.8, "learning_rate": 8.916374313608244e-05, "loss": 0.0009, "step": 17504 }, { "epoch": 74.82, "learning_rate": 8.915649687598174e-05, "loss": 0.0001, "step": 17508 }, { "epoch": 74.84, "learning_rate": 8.914924848854271e-05, "loss": 0.0001, "step": 17512 }, { "epoch": 74.85, "learning_rate": 8.914199797415921e-05, "loss": 0.0004, "step": 17516 }, { "epoch": 74.87, "learning_rate": 8.91347453332251e-05, "loss": 0.0001, "step": 17520 }, { "epoch": 74.89, "learning_rate": 8.912749056613444e-05, "loss": 0.0023, "step": 17524 }, { "epoch": 74.91, "learning_rate": 8.912023367328137e-05, "loss": 0.0002, "step": 17528 }, { "epoch": 74.92, "learning_rate": 8.911297465506015e-05, "loss": 0.0001, "step": 17532 }, { "epoch": 74.94, "learning_rate": 8.910571351186515e-05, "loss": 0.0003, "step": 17536 }, { "epoch": 74.96, "learning_rate": 8.909845024409086e-05, "loss": 0.0011, "step": 17540 }, { "epoch": 74.97, "learning_rate": 8.90911848521319e-05, "loss": 0.0001, "step": 17544 }, { "epoch": 74.99, "learning_rate": 8.908391733638299e-05, "loss": 0.0001, "step": 17548 }, { "epoch": 75.01, "learning_rate": 8.907664769723895e-05, "loss": 0.0001, "step": 17552 }, { "epoch": 75.03, "learning_rate": 8.906937593509476e-05, "loss": 0.0001, "step": 17556 }, { "epoch": 75.04, "learning_rate": 8.906210205034546e-05, "loss": 0.0003, "step": 17560 }, { "epoch": 75.06, "learning_rate": 8.905482604338626e-05, "loss": 0.0012, "step": 17564 }, { "epoch": 75.08, "learning_rate": 8.904754791461245e-05, "loss": 0.0001, "step": 17568 }, { "epoch": 75.09, "learning_rate": 8.904026766441944e-05, "loss": 0.0002, "step": 17572 }, { "epoch": 75.11, "learning_rate": 8.903298529320275e-05, "loss": 0.0004, "step": 17576 }, { "epoch": 75.13, "learning_rate": 8.902570080135805e-05, "loss": 0.0003, "step": 17580 }, { "epoch": 75.15, "learning_rate": 8.901841418928108e-05, "loss": 0.0, "step": 17584 }, { "epoch": 75.16, "learning_rate": 8.901112545736771e-05, "loss": 0.0002, "step": 17588 }, { "epoch": 75.18, "learning_rate": 8.900383460601395e-05, "loss": 0.0002, "step": 17592 }, { "epoch": 75.2, "learning_rate": 8.899654163561592e-05, "loss": 0.0013, "step": 17596 }, { "epoch": 75.21, "learning_rate": 8.898924654656979e-05, "loss": 0.0003, "step": 17600 }, { "epoch": 75.23, "learning_rate": 8.898194933927194e-05, "loss": 0.0001, "step": 17604 }, { "epoch": 75.25, "learning_rate": 8.89746500141188e-05, "loss": 0.0002, "step": 17608 }, { "epoch": 75.26, "learning_rate": 8.896734857150695e-05, "loss": 0.0004, "step": 17612 }, { "epoch": 75.28, "learning_rate": 8.896004501183305e-05, "loss": 0.0003, "step": 17616 }, { "epoch": 75.3, "learning_rate": 8.895273933549391e-05, "loss": 0.0006, "step": 17620 }, { "epoch": 75.32, "learning_rate": 8.894543154288644e-05, "loss": 0.0005, "step": 17624 }, { "epoch": 75.33, "learning_rate": 8.893812163440767e-05, "loss": 0.001, "step": 17628 }, { "epoch": 75.35, "learning_rate": 8.893080961045472e-05, "loss": 0.0002, "step": 17632 }, { "epoch": 75.37, "learning_rate": 8.892349547142488e-05, "loss": 0.0001, "step": 17636 }, { "epoch": 75.38, "learning_rate": 8.891617921771548e-05, "loss": 0.0001, "step": 17640 }, { "epoch": 75.4, "learning_rate": 8.890886084972406e-05, "loss": 0.001, "step": 17644 }, { "epoch": 75.42, "learning_rate": 8.890154036784817e-05, "loss": 0.0003, "step": 17648 }, { "epoch": 75.44, "learning_rate": 8.889421777248556e-05, "loss": 0.0002, "step": 17652 }, { "epoch": 75.45, "learning_rate": 8.888689306403402e-05, "loss": 0.001, "step": 17656 }, { "epoch": 75.47, "learning_rate": 8.887956624289154e-05, "loss": 0.0001, "step": 17660 }, { "epoch": 75.49, "learning_rate": 8.887223730945616e-05, "loss": 0.0001, "step": 17664 }, { "epoch": 75.5, "learning_rate": 8.886490626412604e-05, "loss": 0.0004, "step": 17668 }, { "epoch": 75.52, "learning_rate": 8.885757310729948e-05, "loss": 0.0002, "step": 17672 }, { "epoch": 75.54, "learning_rate": 8.885023783937491e-05, "loss": 0.0007, "step": 17676 }, { "epoch": 75.56, "learning_rate": 8.88429004607508e-05, "loss": 0.0001, "step": 17680 }, { "epoch": 75.57, "learning_rate": 8.883556097182582e-05, "loss": 0.0003, "step": 17684 }, { "epoch": 75.59, "learning_rate": 8.882821937299873e-05, "loss": 0.0004, "step": 17688 }, { "epoch": 75.61, "learning_rate": 8.882087566466833e-05, "loss": 0.0008, "step": 17692 }, { "epoch": 75.62, "learning_rate": 8.881352984723365e-05, "loss": 0.0001, "step": 17696 }, { "epoch": 75.64, "learning_rate": 8.880618192109379e-05, "loss": 0.0001, "step": 17700 }, { "epoch": 75.66, "learning_rate": 8.879883188664793e-05, "loss": 0.0003, "step": 17704 }, { "epoch": 75.68, "learning_rate": 8.879147974429538e-05, "loss": 0.0001, "step": 17708 }, { "epoch": 75.69, "learning_rate": 8.87841254944356e-05, "loss": 0.0013, "step": 17712 }, { "epoch": 75.71, "learning_rate": 8.877676913746813e-05, "loss": 0.0001, "step": 17716 }, { "epoch": 75.73, "learning_rate": 8.876941067379264e-05, "loss": 0.0001, "step": 17720 }, { "epoch": 75.74, "learning_rate": 8.876205010380891e-05, "loss": 0.0002, "step": 17724 }, { "epoch": 75.76, "learning_rate": 8.875468742791682e-05, "loss": 0.0002, "step": 17728 }, { "epoch": 75.78, "learning_rate": 8.874732264651639e-05, "loss": 0.0001, "step": 17732 }, { "epoch": 75.79, "learning_rate": 8.873995576000774e-05, "loss": 0.0004, "step": 17736 }, { "epoch": 75.81, "learning_rate": 8.87325867687911e-05, "loss": 0.0001, "step": 17740 }, { "epoch": 75.83, "learning_rate": 8.872521567326683e-05, "loss": 0.0001, "step": 17744 }, { "epoch": 75.85, "learning_rate": 8.871784247383539e-05, "loss": 0.0002, "step": 17748 }, { "epoch": 75.86, "learning_rate": 8.871046717089735e-05, "loss": 0.0001, "step": 17752 }, { "epoch": 75.88, "learning_rate": 8.870308976485344e-05, "loss": 0.0, "step": 17756 }, { "epoch": 75.9, "learning_rate": 8.869571025610442e-05, "loss": 0.0002, "step": 17760 }, { "epoch": 75.91, "learning_rate": 8.868832864505125e-05, "loss": 0.0, "step": 17764 }, { "epoch": 75.93, "learning_rate": 8.868094493209493e-05, "loss": 0.0001, "step": 17768 }, { "epoch": 75.95, "learning_rate": 8.867355911763667e-05, "loss": 0.0005, "step": 17772 }, { "epoch": 75.97, "learning_rate": 8.866617120207767e-05, "loss": 0.0002, "step": 17776 }, { "epoch": 75.98, "learning_rate": 8.865878118581932e-05, "loss": 0.0005, "step": 17780 }, { "epoch": 76.0, "learning_rate": 8.865138906926316e-05, "loss": 0.0015, "step": 17784 }, { "epoch": 76.02, "learning_rate": 8.864399485281074e-05, "loss": 0.0001, "step": 17788 }, { "epoch": 76.03, "learning_rate": 8.863659853686384e-05, "loss": 0.0001, "step": 17792 }, { "epoch": 76.05, "learning_rate": 8.862920012182423e-05, "loss": 0.0004, "step": 17796 }, { "epoch": 76.07, "learning_rate": 8.862179960809391e-05, "loss": 0.0004, "step": 17800 }, { "epoch": 76.09, "learning_rate": 8.861439699607492e-05, "loss": 0.0001, "step": 17804 }, { "epoch": 76.1, "learning_rate": 8.860699228616945e-05, "loss": 0.0001, "step": 17808 }, { "epoch": 76.12, "learning_rate": 8.859958547877978e-05, "loss": 0.0006, "step": 17812 }, { "epoch": 76.14, "learning_rate": 8.859217657430831e-05, "loss": 0.0012, "step": 17816 }, { "epoch": 76.15, "learning_rate": 8.858476557315758e-05, "loss": 0.0001, "step": 17820 }, { "epoch": 76.17, "learning_rate": 8.857735247573022e-05, "loss": 0.0005, "step": 17824 }, { "epoch": 76.19, "learning_rate": 8.856993728242896e-05, "loss": 0.0001, "step": 17828 }, { "epoch": 76.21, "learning_rate": 8.856251999365666e-05, "loss": 0.0002, "step": 17832 }, { "epoch": 76.22, "learning_rate": 8.855510060981633e-05, "loss": 0.0001, "step": 17836 }, { "epoch": 76.24, "learning_rate": 8.8547679131311e-05, "loss": 0.0004, "step": 17840 }, { "epoch": 76.26, "learning_rate": 8.854025555854395e-05, "loss": 0.0001, "step": 17844 }, { "epoch": 76.27, "learning_rate": 8.853282989191842e-05, "loss": 0.0001, "step": 17848 }, { "epoch": 76.29, "learning_rate": 8.852540213183789e-05, "loss": 0.0001, "step": 17852 }, { "epoch": 76.31, "learning_rate": 8.851797227870589e-05, "loss": 0.001, "step": 17856 }, { "epoch": 76.32, "learning_rate": 8.851054033292604e-05, "loss": 0.0002, "step": 17860 }, { "epoch": 76.34, "learning_rate": 8.850310629490218e-05, "loss": 0.0001, "step": 17864 }, { "epoch": 76.36, "learning_rate": 8.849567016503814e-05, "loss": 0.0005, "step": 17868 }, { "epoch": 76.38, "learning_rate": 8.848823194373796e-05, "loss": 0.0002, "step": 17872 }, { "epoch": 76.39, "learning_rate": 8.848079163140573e-05, "loss": 0.0002, "step": 17876 }, { "epoch": 76.41, "learning_rate": 8.847334922844566e-05, "loss": 0.0001, "step": 17880 }, { "epoch": 76.43, "learning_rate": 8.846590473526209e-05, "loss": 0.0004, "step": 17884 }, { "epoch": 76.44, "learning_rate": 8.845845815225951e-05, "loss": 0.0012, "step": 17888 }, { "epoch": 76.46, "learning_rate": 8.845100947984246e-05, "loss": 0.0001, "step": 17892 }, { "epoch": 76.48, "learning_rate": 8.844355871841563e-05, "loss": 0.0002, "step": 17896 }, { "epoch": 76.5, "learning_rate": 8.84361058683838e-05, "loss": 0.0005, "step": 17900 }, { "epoch": 76.51, "learning_rate": 8.842865093015189e-05, "loss": 0.0001, "step": 17904 }, { "epoch": 76.53, "learning_rate": 8.84211939041249e-05, "loss": 0.0005, "step": 17908 }, { "epoch": 76.55, "learning_rate": 8.8413734790708e-05, "loss": 0.0001, "step": 17912 }, { "epoch": 76.56, "learning_rate": 8.840627359030642e-05, "loss": 0.0012, "step": 17916 }, { "epoch": 76.58, "learning_rate": 8.83988103033255e-05, "loss": 0.0001, "step": 17920 }, { "epoch": 76.6, "learning_rate": 8.839134493017074e-05, "loss": 0.0003, "step": 17924 }, { "epoch": 76.62, "learning_rate": 8.83838774712477e-05, "loss": 0.0002, "step": 17928 }, { "epoch": 76.63, "learning_rate": 8.837640792696212e-05, "loss": 0.0002, "step": 17932 }, { "epoch": 76.65, "learning_rate": 8.836893629771977e-05, "loss": 0.0006, "step": 17936 }, { "epoch": 76.67, "learning_rate": 8.836146258392661e-05, "loss": 0.0004, "step": 17940 }, { "epoch": 76.68, "learning_rate": 8.835398678598867e-05, "loss": 0.0008, "step": 17944 }, { "epoch": 76.7, "learning_rate": 8.83465089043121e-05, "loss": 0.0001, "step": 17948 }, { "epoch": 76.72, "learning_rate": 8.833902893930317e-05, "loss": 0.0006, "step": 17952 }, { "epoch": 76.74, "learning_rate": 8.833154689136826e-05, "loss": 0.0016, "step": 17956 }, { "epoch": 76.75, "learning_rate": 8.832406276091386e-05, "loss": 0.0003, "step": 17960 }, { "epoch": 76.77, "learning_rate": 8.831657654834658e-05, "loss": 0.0006, "step": 17964 }, { "epoch": 76.79, "learning_rate": 8.830908825407314e-05, "loss": 0.0005, "step": 17968 }, { "epoch": 76.8, "learning_rate": 8.830159787850036e-05, "loss": 0.0008, "step": 17972 }, { "epoch": 76.82, "learning_rate": 8.829410542203522e-05, "loss": 0.0011, "step": 17976 }, { "epoch": 76.84, "learning_rate": 8.828661088508473e-05, "loss": 0.0003, "step": 17980 }, { "epoch": 76.85, "learning_rate": 8.82791142680561e-05, "loss": 0.0014, "step": 17984 }, { "epoch": 76.87, "learning_rate": 8.827161557135659e-05, "loss": 0.0001, "step": 17988 }, { "epoch": 76.89, "learning_rate": 8.826411479539363e-05, "loss": 0.0001, "step": 17992 }, { "epoch": 76.91, "learning_rate": 8.825661194057469e-05, "loss": 0.001, "step": 17996 }, { "epoch": 76.92, "learning_rate": 8.824910700730742e-05, "loss": 0.0001, "step": 18000 }, { "epoch": 76.92, "eval_exact_match": 0.5135135135135135, "eval_loss": 0.9052860736846924, "eval_runtime": 171.5376, "eval_samples_per_second": 5.608, "step": 18000 }, { "epoch": 76.94, "learning_rate": 8.824159999599955e-05, "loss": 0.0002, "step": 18004 }, { "epoch": 76.96, "learning_rate": 8.823409090705892e-05, "loss": 0.0014, "step": 18008 }, { "epoch": 76.97, "learning_rate": 8.822657974089352e-05, "loss": 0.0001, "step": 18012 }, { "epoch": 76.99, "learning_rate": 8.82190664979114e-05, "loss": 0.0014, "step": 18016 }, { "epoch": 77.01, "learning_rate": 8.821155117852074e-05, "loss": 0.0003, "step": 18020 }, { "epoch": 77.03, "learning_rate": 8.820403378312987e-05, "loss": 0.0002, "step": 18024 }, { "epoch": 77.04, "learning_rate": 8.819651431214717e-05, "loss": 0.0, "step": 18028 }, { "epoch": 77.06, "learning_rate": 8.81889927659812e-05, "loss": 0.0001, "step": 18032 }, { "epoch": 77.08, "learning_rate": 8.818146914504058e-05, "loss": 0.0001, "step": 18036 }, { "epoch": 77.09, "learning_rate": 8.817394344973406e-05, "loss": 0.0001, "step": 18040 }, { "epoch": 77.11, "learning_rate": 8.816641568047052e-05, "loss": 0.0004, "step": 18044 }, { "epoch": 77.13, "learning_rate": 8.815888583765893e-05, "loss": 0.0001, "step": 18048 }, { "epoch": 77.15, "learning_rate": 8.815135392170836e-05, "loss": 0.0001, "step": 18052 }, { "epoch": 77.16, "learning_rate": 8.814381993302805e-05, "loss": 0.0001, "step": 18056 }, { "epoch": 77.18, "learning_rate": 8.813628387202728e-05, "loss": 0.0002, "step": 18060 }, { "epoch": 77.2, "learning_rate": 8.81287457391155e-05, "loss": 0.0003, "step": 18064 }, { "epoch": 77.21, "learning_rate": 8.812120553470223e-05, "loss": 0.0001, "step": 18068 }, { "epoch": 77.23, "learning_rate": 8.811366325919715e-05, "loss": 0.0011, "step": 18072 }, { "epoch": 77.25, "learning_rate": 8.810611891301e-05, "loss": 0.0001, "step": 18076 }, { "epoch": 77.26, "learning_rate": 8.809857249655066e-05, "loss": 0.0003, "step": 18080 }, { "epoch": 77.28, "learning_rate": 8.809102401022916e-05, "loss": 0.0001, "step": 18084 }, { "epoch": 77.3, "learning_rate": 8.808347345445555e-05, "loss": 0.0001, "step": 18088 }, { "epoch": 77.32, "learning_rate": 8.807592082964006e-05, "loss": 0.0001, "step": 18092 }, { "epoch": 77.33, "learning_rate": 8.806836613619303e-05, "loss": 0.0001, "step": 18096 }, { "epoch": 77.35, "learning_rate": 8.806080937452489e-05, "loss": 0.0, "step": 18100 }, { "epoch": 77.37, "learning_rate": 8.80532505450462e-05, "loss": 0.0005, "step": 18104 }, { "epoch": 77.38, "learning_rate": 8.804568964816762e-05, "loss": 0.0009, "step": 18108 }, { "epoch": 77.4, "learning_rate": 8.803812668429992e-05, "loss": 0.0001, "step": 18112 }, { "epoch": 77.42, "learning_rate": 8.8030561653854e-05, "loss": 0.0025, "step": 18116 }, { "epoch": 77.44, "learning_rate": 8.802299455724086e-05, "loss": 0.0012, "step": 18120 }, { "epoch": 77.45, "learning_rate": 8.80154253948716e-05, "loss": 0.0001, "step": 18124 }, { "epoch": 77.47, "learning_rate": 8.800785416715747e-05, "loss": 0.0003, "step": 18128 }, { "epoch": 77.49, "learning_rate": 8.80002808745098e-05, "loss": 0.0005, "step": 18132 }, { "epoch": 77.5, "learning_rate": 8.799270551734002e-05, "loss": 0.0017, "step": 18136 }, { "epoch": 77.52, "learning_rate": 8.798512809605973e-05, "loss": 0.0009, "step": 18140 }, { "epoch": 77.54, "learning_rate": 8.797754861108056e-05, "loss": 0.0006, "step": 18144 }, { "epoch": 77.56, "learning_rate": 8.796996706281433e-05, "loss": 0.0003, "step": 18148 }, { "epoch": 77.57, "learning_rate": 8.796238345167293e-05, "loss": 0.0001, "step": 18152 }, { "epoch": 77.59, "learning_rate": 8.795479777806838e-05, "loss": 0.0001, "step": 18156 }, { "epoch": 77.61, "learning_rate": 8.794721004241277e-05, "loss": 0.0001, "step": 18160 }, { "epoch": 77.62, "learning_rate": 8.79396202451184e-05, "loss": 0.0007, "step": 18164 }, { "epoch": 77.64, "learning_rate": 8.793202838659753e-05, "loss": 0.0001, "step": 18168 }, { "epoch": 77.66, "learning_rate": 8.792443446726268e-05, "loss": 0.0017, "step": 18172 }, { "epoch": 77.68, "learning_rate": 8.79168384875264e-05, "loss": 0.0001, "step": 18176 }, { "epoch": 77.69, "learning_rate": 8.790924044780139e-05, "loss": 0.0001, "step": 18180 }, { "epoch": 77.71, "learning_rate": 8.790164034850043e-05, "loss": 0.0002, "step": 18184 }, { "epoch": 77.73, "learning_rate": 8.789403819003642e-05, "loss": 0.0003, "step": 18188 }, { "epoch": 77.74, "learning_rate": 8.78864339728224e-05, "loss": 0.0001, "step": 18192 }, { "epoch": 77.76, "learning_rate": 8.78788276972715e-05, "loss": 0.0002, "step": 18196 }, { "epoch": 77.78, "learning_rate": 8.787121936379692e-05, "loss": 0.0001, "step": 18200 }, { "epoch": 77.79, "learning_rate": 8.786360897281206e-05, "loss": 0.0013, "step": 18204 }, { "epoch": 77.81, "learning_rate": 8.785599652473037e-05, "loss": 0.0012, "step": 18208 }, { "epoch": 77.83, "learning_rate": 8.784838201996544e-05, "loss": 0.0003, "step": 18212 }, { "epoch": 77.85, "learning_rate": 8.784076545893094e-05, "loss": 0.0001, "step": 18216 }, { "epoch": 77.86, "learning_rate": 8.783314684204067e-05, "loss": 0.0001, "step": 18220 }, { "epoch": 77.88, "learning_rate": 8.782552616970856e-05, "loss": 0.0007, "step": 18224 }, { "epoch": 77.9, "learning_rate": 8.781790344234863e-05, "loss": 0.0005, "step": 18228 }, { "epoch": 77.91, "learning_rate": 8.781027866037501e-05, "loss": 0.0012, "step": 18232 }, { "epoch": 77.93, "learning_rate": 8.780265182420195e-05, "loss": 0.0, "step": 18236 }, { "epoch": 77.95, "learning_rate": 8.779502293424381e-05, "loss": 0.0002, "step": 18240 }, { "epoch": 77.97, "learning_rate": 8.778739199091506e-05, "loss": 0.0003, "step": 18244 }, { "epoch": 77.98, "learning_rate": 8.777975899463029e-05, "loss": 0.0002, "step": 18248 }, { "epoch": 78.0, "learning_rate": 8.777212394580418e-05, "loss": 0.0012, "step": 18252 }, { "epoch": 78.02, "learning_rate": 8.776448684485155e-05, "loss": 0.0001, "step": 18256 }, { "epoch": 78.03, "learning_rate": 8.775684769218731e-05, "loss": 0.0, "step": 18260 }, { "epoch": 78.05, "learning_rate": 8.774920648822646e-05, "loss": 0.0002, "step": 18264 }, { "epoch": 78.07, "learning_rate": 8.77415632333842e-05, "loss": 0.0002, "step": 18268 }, { "epoch": 78.09, "learning_rate": 8.773391792807575e-05, "loss": 0.0001, "step": 18272 }, { "epoch": 78.1, "learning_rate": 8.772627057271646e-05, "loss": 0.0002, "step": 18276 }, { "epoch": 78.12, "learning_rate": 8.771862116772182e-05, "loss": 0.0001, "step": 18280 }, { "epoch": 78.14, "learning_rate": 8.771096971350741e-05, "loss": 0.0, "step": 18284 }, { "epoch": 78.15, "learning_rate": 8.770331621048893e-05, "loss": 0.0001, "step": 18288 }, { "epoch": 78.17, "learning_rate": 8.769566065908219e-05, "loss": 0.0001, "step": 18292 }, { "epoch": 78.19, "learning_rate": 8.76880030597031e-05, "loss": 0.0001, "step": 18296 }, { "epoch": 78.21, "learning_rate": 8.768034341276772e-05, "loss": 0.0009, "step": 18300 }, { "epoch": 78.22, "learning_rate": 8.767268171869214e-05, "loss": 0.0002, "step": 18304 }, { "epoch": 78.24, "learning_rate": 8.766501797789266e-05, "loss": 0.0001, "step": 18308 }, { "epoch": 78.26, "learning_rate": 8.765735219078561e-05, "loss": 0.0, "step": 18312 }, { "epoch": 78.27, "learning_rate": 8.764968435778751e-05, "loss": 0.0001, "step": 18316 }, { "epoch": 78.29, "learning_rate": 8.76420144793149e-05, "loss": 0.0002, "step": 18320 }, { "epoch": 78.31, "learning_rate": 8.763434255578449e-05, "loss": 0.0004, "step": 18324 }, { "epoch": 78.32, "learning_rate": 8.76266685876131e-05, "loss": 0.0006, "step": 18328 }, { "epoch": 78.34, "learning_rate": 8.761899257521766e-05, "loss": 0.0001, "step": 18332 }, { "epoch": 78.36, "learning_rate": 8.761131451901517e-05, "loss": 0.0001, "step": 18336 }, { "epoch": 78.38, "learning_rate": 8.760363441942279e-05, "loss": 0.0, "step": 18340 }, { "epoch": 78.39, "learning_rate": 8.759595227685778e-05, "loss": 0.0001, "step": 18344 }, { "epoch": 78.41, "learning_rate": 8.75882680917375e-05, "loss": 0.0001, "step": 18348 }, { "epoch": 78.43, "learning_rate": 8.758058186447942e-05, "loss": 0.0014, "step": 18352 }, { "epoch": 78.44, "learning_rate": 8.757289359550111e-05, "loss": 0.0011, "step": 18356 }, { "epoch": 78.46, "learning_rate": 8.75652032852203e-05, "loss": 0.0004, "step": 18360 }, { "epoch": 78.48, "learning_rate": 8.755751093405478e-05, "loss": 0.0001, "step": 18364 }, { "epoch": 78.5, "learning_rate": 8.754981654242246e-05, "loss": 0.0001, "step": 18368 }, { "epoch": 78.51, "learning_rate": 8.754212011074139e-05, "loss": 0.0, "step": 18372 }, { "epoch": 78.53, "learning_rate": 8.753442163942969e-05, "loss": 0.0017, "step": 18376 }, { "epoch": 78.55, "learning_rate": 8.752672112890563e-05, "loss": 0.0001, "step": 18380 }, { "epoch": 78.56, "learning_rate": 8.751901857958756e-05, "loss": 0.0002, "step": 18384 }, { "epoch": 78.58, "learning_rate": 8.751131399189396e-05, "loss": 0.0001, "step": 18388 }, { "epoch": 78.6, "learning_rate": 8.750360736624342e-05, "loss": 0.0008, "step": 18392 }, { "epoch": 78.62, "learning_rate": 8.749589870305462e-05, "loss": 0.0021, "step": 18396 }, { "epoch": 78.63, "learning_rate": 8.748818800274635e-05, "loss": 0.0002, "step": 18400 }, { "epoch": 78.65, "learning_rate": 8.748047526573755e-05, "loss": 0.0001, "step": 18404 }, { "epoch": 78.67, "learning_rate": 8.747276049244726e-05, "loss": 0.0001, "step": 18408 }, { "epoch": 78.68, "learning_rate": 8.746504368329457e-05, "loss": 0.0001, "step": 18412 }, { "epoch": 78.7, "learning_rate": 8.74573248386988e-05, "loss": 0.0001, "step": 18416 }, { "epoch": 78.72, "learning_rate": 8.744960395907922e-05, "loss": 0.0003, "step": 18420 }, { "epoch": 78.74, "learning_rate": 8.744188104485535e-05, "loss": 0.0001, "step": 18424 }, { "epoch": 78.75, "learning_rate": 8.743415609644678e-05, "loss": 0.0001, "step": 18428 }, { "epoch": 78.77, "learning_rate": 8.742642911427317e-05, "loss": 0.0001, "step": 18432 }, { "epoch": 78.79, "learning_rate": 8.741870009875434e-05, "loss": 0.0, "step": 18436 }, { "epoch": 78.8, "learning_rate": 8.741096905031017e-05, "loss": 0.0, "step": 18440 }, { "epoch": 78.82, "learning_rate": 8.740323596936073e-05, "loss": 0.0001, "step": 18444 }, { "epoch": 78.84, "learning_rate": 8.739550085632611e-05, "loss": 0.0002, "step": 18448 }, { "epoch": 78.85, "learning_rate": 8.738776371162657e-05, "loss": 0.0004, "step": 18452 }, { "epoch": 78.87, "learning_rate": 8.738002453568245e-05, "loss": 0.0001, "step": 18456 }, { "epoch": 78.89, "learning_rate": 8.737228332891423e-05, "loss": 0.0007, "step": 18460 }, { "epoch": 78.91, "learning_rate": 8.736454009174249e-05, "loss": 0.0018, "step": 18464 }, { "epoch": 78.92, "learning_rate": 8.735679482458787e-05, "loss": 0.0001, "step": 18468 }, { "epoch": 78.94, "learning_rate": 8.734904752787121e-05, "loss": 0.0001, "step": 18472 }, { "epoch": 78.96, "learning_rate": 8.734129820201339e-05, "loss": 0.0005, "step": 18476 }, { "epoch": 78.97, "learning_rate": 8.733354684743542e-05, "loss": 0.0001, "step": 18480 }, { "epoch": 78.99, "learning_rate": 8.732579346455844e-05, "loss": 0.0006, "step": 18484 }, { "epoch": 79.01, "learning_rate": 8.731803805380369e-05, "loss": 0.0001, "step": 18488 }, { "epoch": 79.03, "learning_rate": 8.731028061559249e-05, "loss": 0.0001, "step": 18492 }, { "epoch": 79.04, "learning_rate": 8.730252115034632e-05, "loss": 0.0001, "step": 18496 }, { "epoch": 79.06, "learning_rate": 8.729475965848673e-05, "loss": 0.0001, "step": 18500 }, { "epoch": 79.08, "learning_rate": 8.728699614043539e-05, "loss": 0.0001, "step": 18504 }, { "epoch": 79.09, "learning_rate": 8.72792305966141e-05, "loss": 0.0002, "step": 18508 }, { "epoch": 79.11, "learning_rate": 8.727146302744473e-05, "loss": 0.0001, "step": 18512 }, { "epoch": 79.13, "learning_rate": 8.726369343334932e-05, "loss": 0.0001, "step": 18516 }, { "epoch": 79.15, "learning_rate": 8.725592181474997e-05, "loss": 0.0002, "step": 18520 }, { "epoch": 79.16, "learning_rate": 8.72481481720689e-05, "loss": 0.0004, "step": 18524 }, { "epoch": 79.18, "learning_rate": 8.724037250572845e-05, "loss": 0.0001, "step": 18528 }, { "epoch": 79.2, "learning_rate": 8.723259481615107e-05, "loss": 0.0007, "step": 18532 }, { "epoch": 79.21, "learning_rate": 8.72248151037593e-05, "loss": 0.0008, "step": 18536 }, { "epoch": 79.23, "learning_rate": 8.721703336897582e-05, "loss": 0.0001, "step": 18540 }, { "epoch": 79.25, "learning_rate": 8.72092496122234e-05, "loss": 0.0, "step": 18544 }, { "epoch": 79.26, "learning_rate": 8.720146383392492e-05, "loss": 0.0001, "step": 18548 }, { "epoch": 79.28, "learning_rate": 8.719367603450338e-05, "loss": 0.0001, "step": 18552 }, { "epoch": 79.3, "learning_rate": 8.718588621438188e-05, "loss": 0.0001, "step": 18556 }, { "epoch": 79.32, "learning_rate": 8.717809437398366e-05, "loss": 0.0003, "step": 18560 }, { "epoch": 79.33, "learning_rate": 8.717030051373199e-05, "loss": 0.0001, "step": 18564 }, { "epoch": 79.35, "learning_rate": 8.716250463405034e-05, "loss": 0.0002, "step": 18568 }, { "epoch": 79.37, "learning_rate": 8.715470673536226e-05, "loss": 0.0002, "step": 18572 }, { "epoch": 79.38, "learning_rate": 8.714690681809138e-05, "loss": 0.0001, "step": 18576 }, { "epoch": 79.4, "learning_rate": 8.713910488266148e-05, "loss": 0.0, "step": 18580 }, { "epoch": 79.42, "learning_rate": 8.713130092949644e-05, "loss": 0.0001, "step": 18584 }, { "epoch": 79.44, "learning_rate": 8.712349495902021e-05, "loss": 0.0002, "step": 18588 }, { "epoch": 79.45, "learning_rate": 8.711568697165691e-05, "loss": 0.0001, "step": 18592 }, { "epoch": 79.47, "learning_rate": 8.710787696783072e-05, "loss": 0.0001, "step": 18596 }, { "epoch": 79.49, "learning_rate": 8.710006494796597e-05, "loss": 0.0006, "step": 18600 }, { "epoch": 79.5, "learning_rate": 8.709225091248708e-05, "loss": 0.0005, "step": 18604 }, { "epoch": 79.52, "learning_rate": 8.708443486181855e-05, "loss": 0.0004, "step": 18608 }, { "epoch": 79.54, "learning_rate": 8.707661679638505e-05, "loss": 0.0002, "step": 18612 }, { "epoch": 79.56, "learning_rate": 8.706879671661132e-05, "loss": 0.0001, "step": 18616 }, { "epoch": 79.57, "learning_rate": 8.706097462292223e-05, "loss": 0.0001, "step": 18620 }, { "epoch": 79.59, "learning_rate": 8.70531505157427e-05, "loss": 0.0001, "step": 18624 }, { "epoch": 79.61, "learning_rate": 8.704532439549787e-05, "loss": 0.0003, "step": 18628 }, { "epoch": 79.62, "learning_rate": 8.703749626261289e-05, "loss": 0.0002, "step": 18632 }, { "epoch": 79.64, "learning_rate": 8.702966611751306e-05, "loss": 0.0001, "step": 18636 }, { "epoch": 79.66, "learning_rate": 8.70218339606238e-05, "loss": 0.0004, "step": 18640 }, { "epoch": 79.68, "learning_rate": 8.70139997923706e-05, "loss": 0.0001, "step": 18644 }, { "epoch": 79.69, "learning_rate": 8.70061636131791e-05, "loss": 0.0006, "step": 18648 }, { "epoch": 79.71, "learning_rate": 8.699832542347504e-05, "loss": 0.0001, "step": 18652 }, { "epoch": 79.73, "learning_rate": 8.699048522368425e-05, "loss": 0.0007, "step": 18656 }, { "epoch": 79.74, "learning_rate": 8.698264301423267e-05, "loss": 0.0013, "step": 18660 }, { "epoch": 79.76, "learning_rate": 8.697479879554638e-05, "loss": 0.0, "step": 18664 }, { "epoch": 79.78, "learning_rate": 8.696695256805154e-05, "loss": 0.0002, "step": 18668 }, { "epoch": 79.79, "learning_rate": 8.695910433217443e-05, "loss": 0.0004, "step": 18672 }, { "epoch": 79.81, "learning_rate": 8.695125408834145e-05, "loss": 0.0, "step": 18676 }, { "epoch": 79.83, "learning_rate": 8.694340183697908e-05, "loss": 0.0001, "step": 18680 }, { "epoch": 79.85, "learning_rate": 8.693554757851392e-05, "loss": 0.0005, "step": 18684 }, { "epoch": 79.86, "learning_rate": 8.692769131337271e-05, "loss": 0.0001, "step": 18688 }, { "epoch": 79.88, "learning_rate": 8.691983304198225e-05, "loss": 0.0003, "step": 18692 }, { "epoch": 79.9, "learning_rate": 8.69119727647695e-05, "loss": 0.0001, "step": 18696 }, { "epoch": 79.91, "learning_rate": 8.690411048216147e-05, "loss": 0.0001, "step": 18700 }, { "epoch": 79.93, "learning_rate": 8.689624619458534e-05, "loss": 0.0003, "step": 18704 }, { "epoch": 79.95, "learning_rate": 8.688837990246834e-05, "loss": 0.0001, "step": 18708 }, { "epoch": 79.97, "learning_rate": 8.688051160623786e-05, "loss": 0.0003, "step": 18712 }, { "epoch": 79.98, "learning_rate": 8.68726413063214e-05, "loss": 0.0008, "step": 18716 }, { "epoch": 80.0, "learning_rate": 8.686476900314648e-05, "loss": 0.0009, "step": 18720 }, { "epoch": 80.02, "learning_rate": 8.685689469714086e-05, "loss": 0.0, "step": 18724 }, { "epoch": 80.03, "learning_rate": 8.684901838873232e-05, "loss": 0.0003, "step": 18728 }, { "epoch": 80.05, "learning_rate": 8.684114007834876e-05, "loss": 0.0001, "step": 18732 }, { "epoch": 80.07, "learning_rate": 8.683325976641823e-05, "loss": 0.0, "step": 18736 }, { "epoch": 80.09, "learning_rate": 8.682537745336882e-05, "loss": 0.0007, "step": 18740 }, { "epoch": 80.1, "learning_rate": 8.681749313962882e-05, "loss": 0.0, "step": 18744 }, { "epoch": 80.12, "learning_rate": 8.680960682562653e-05, "loss": 0.0001, "step": 18748 }, { "epoch": 80.14, "learning_rate": 8.680171851179044e-05, "loss": 0.0001, "step": 18752 }, { "epoch": 80.15, "learning_rate": 8.679382819854908e-05, "loss": 0.0001, "step": 18756 }, { "epoch": 80.17, "learning_rate": 8.678593588633118e-05, "loss": 0.0002, "step": 18760 }, { "epoch": 80.19, "learning_rate": 8.677804157556549e-05, "loss": 0.0003, "step": 18764 }, { "epoch": 80.21, "learning_rate": 8.677014526668087e-05, "loss": 0.0006, "step": 18768 }, { "epoch": 80.22, "learning_rate": 8.676224696010637e-05, "loss": 0.0015, "step": 18772 }, { "epoch": 80.24, "learning_rate": 8.675434665627107e-05, "loss": 0.0001, "step": 18776 }, { "epoch": 80.26, "learning_rate": 8.674644435560419e-05, "loss": 0.0001, "step": 18780 }, { "epoch": 80.27, "learning_rate": 8.673854005853508e-05, "loss": 0.0001, "step": 18784 }, { "epoch": 80.29, "learning_rate": 8.673063376549315e-05, "loss": 0.0, "step": 18788 }, { "epoch": 80.31, "learning_rate": 8.672272547690793e-05, "loss": 0.0001, "step": 18792 }, { "epoch": 80.32, "learning_rate": 8.67148151932091e-05, "loss": 0.0001, "step": 18796 }, { "epoch": 80.34, "learning_rate": 8.67069029148264e-05, "loss": 0.0, "step": 18800 }, { "epoch": 80.36, "learning_rate": 8.669898864218968e-05, "loss": 0.0, "step": 18804 }, { "epoch": 80.38, "learning_rate": 8.669107237572896e-05, "loss": 0.0, "step": 18808 }, { "epoch": 80.39, "learning_rate": 8.668315411587432e-05, "loss": 0.0001, "step": 18812 }, { "epoch": 80.41, "learning_rate": 8.66752338630559e-05, "loss": 0.0007, "step": 18816 }, { "epoch": 80.43, "learning_rate": 8.666731161770404e-05, "loss": 0.0001, "step": 18820 }, { "epoch": 80.44, "learning_rate": 8.665938738024915e-05, "loss": 0.0003, "step": 18824 }, { "epoch": 80.46, "learning_rate": 8.665146115112173e-05, "loss": 0.0, "step": 18828 }, { "epoch": 80.48, "learning_rate": 8.664353293075243e-05, "loss": 0.0001, "step": 18832 }, { "epoch": 80.5, "learning_rate": 8.663560271957196e-05, "loss": 0.0001, "step": 18836 }, { "epoch": 80.51, "learning_rate": 8.662767051801116e-05, "loss": 0.0001, "step": 18840 }, { "epoch": 80.53, "learning_rate": 8.6619736326501e-05, "loss": 0.0001, "step": 18844 }, { "epoch": 80.55, "learning_rate": 8.661180014547252e-05, "loss": 0.0001, "step": 18848 }, { "epoch": 80.56, "learning_rate": 8.66038619753569e-05, "loss": 0.0, "step": 18852 }, { "epoch": 80.58, "learning_rate": 8.65959218165854e-05, "loss": 0.0007, "step": 18856 }, { "epoch": 80.6, "learning_rate": 8.65879796695894e-05, "loss": 0.0002, "step": 18860 }, { "epoch": 80.62, "learning_rate": 8.658003553480042e-05, "loss": 0.0007, "step": 18864 }, { "epoch": 80.63, "learning_rate": 8.657208941265002e-05, "loss": 0.0002, "step": 18868 }, { "epoch": 80.65, "learning_rate": 8.656414130356994e-05, "loss": 0.0, "step": 18872 }, { "epoch": 80.67, "learning_rate": 8.655619120799195e-05, "loss": 0.0001, "step": 18876 }, { "epoch": 80.68, "learning_rate": 8.654823912634801e-05, "loss": 0.0001, "step": 18880 }, { "epoch": 80.7, "learning_rate": 8.654028505907012e-05, "loss": 0.0002, "step": 18884 }, { "epoch": 80.72, "learning_rate": 8.653232900659045e-05, "loss": 0.0001, "step": 18888 }, { "epoch": 80.74, "learning_rate": 8.652437096934124e-05, "loss": 0.0002, "step": 18892 }, { "epoch": 80.75, "learning_rate": 8.651641094775483e-05, "loss": 0.0, "step": 18896 }, { "epoch": 80.77, "learning_rate": 8.650844894226366e-05, "loss": 0.0003, "step": 18900 }, { "epoch": 80.79, "learning_rate": 8.650048495330034e-05, "loss": 0.0002, "step": 18904 }, { "epoch": 80.8, "learning_rate": 8.649251898129754e-05, "loss": 0.0001, "step": 18908 }, { "epoch": 80.82, "learning_rate": 8.648455102668802e-05, "loss": 0.0001, "step": 18912 }, { "epoch": 80.84, "learning_rate": 8.647658108990469e-05, "loss": 0.0001, "step": 18916 }, { "epoch": 80.85, "learning_rate": 8.646860917138055e-05, "loss": 0.0021, "step": 18920 }, { "epoch": 80.87, "learning_rate": 8.646063527154869e-05, "loss": 0.0003, "step": 18924 }, { "epoch": 80.89, "learning_rate": 8.645265939084235e-05, "loss": 0.0, "step": 18928 }, { "epoch": 80.91, "learning_rate": 8.644468152969482e-05, "loss": 0.0003, "step": 18932 }, { "epoch": 80.92, "learning_rate": 8.643670168853957e-05, "loss": 0.0002, "step": 18936 }, { "epoch": 80.94, "learning_rate": 8.642871986781012e-05, "loss": 0.0001, "step": 18940 }, { "epoch": 80.96, "learning_rate": 8.642073606794011e-05, "loss": 0.0001, "step": 18944 }, { "epoch": 80.97, "learning_rate": 8.64127502893633e-05, "loss": 0.0001, "step": 18948 }, { "epoch": 80.99, "learning_rate": 8.640476253251354e-05, "loss": 0.0009, "step": 18952 }, { "epoch": 81.01, "learning_rate": 8.639677279782481e-05, "loss": 0.0001, "step": 18956 }, { "epoch": 81.03, "learning_rate": 8.638878108573117e-05, "loss": 0.0001, "step": 18960 }, { "epoch": 81.04, "learning_rate": 8.638078739666683e-05, "loss": 0.0, "step": 18964 }, { "epoch": 81.06, "learning_rate": 8.637279173106605e-05, "loss": 0.0001, "step": 18968 }, { "epoch": 81.08, "learning_rate": 8.636479408936324e-05, "loss": 0.0002, "step": 18972 }, { "epoch": 81.09, "learning_rate": 8.635679447199292e-05, "loss": 0.0001, "step": 18976 }, { "epoch": 81.11, "learning_rate": 8.634879287938969e-05, "loss": 0.0004, "step": 18980 }, { "epoch": 81.13, "learning_rate": 8.634078931198825e-05, "loss": 0.0001, "step": 18984 }, { "epoch": 81.15, "learning_rate": 8.633278377022345e-05, "loss": 0.0002, "step": 18988 }, { "epoch": 81.16, "learning_rate": 8.632477625453021e-05, "loss": 0.0002, "step": 18992 }, { "epoch": 81.18, "learning_rate": 8.631676676534359e-05, "loss": 0.0001, "step": 18996 }, { "epoch": 81.2, "learning_rate": 8.630875530309873e-05, "loss": 0.0001, "step": 19000 }, { "epoch": 81.2, "eval_exact_match": 0.5166320166320166, "eval_loss": 0.9038861393928528, "eval_runtime": 138.7634, "eval_samples_per_second": 6.933, "step": 19000 }, { "epoch": 81.21, "learning_rate": 8.630074186823088e-05, "loss": 0.0009, "step": 19004 }, { "epoch": 81.23, "learning_rate": 8.629272646117542e-05, "loss": 0.0001, "step": 19008 }, { "epoch": 81.25, "learning_rate": 8.628470908236779e-05, "loss": 0.0015, "step": 19012 }, { "epoch": 81.26, "learning_rate": 8.627668973224358e-05, "loss": 0.0001, "step": 19016 }, { "epoch": 81.28, "learning_rate": 8.626866841123849e-05, "loss": 0.0, "step": 19020 }, { "epoch": 81.3, "learning_rate": 8.626064511978831e-05, "loss": 0.0025, "step": 19024 }, { "epoch": 81.32, "learning_rate": 8.62526198583289e-05, "loss": 0.0014, "step": 19028 }, { "epoch": 81.33, "learning_rate": 8.624459262729631e-05, "loss": 0.0003, "step": 19032 }, { "epoch": 81.35, "learning_rate": 8.623656342712664e-05, "loss": 0.0005, "step": 19036 }, { "epoch": 81.37, "learning_rate": 8.622853225825611e-05, "loss": 0.0001, "step": 19040 }, { "epoch": 81.38, "learning_rate": 8.622049912112103e-05, "loss": 0.0004, "step": 19044 }, { "epoch": 81.4, "learning_rate": 8.621246401615786e-05, "loss": 0.0003, "step": 19048 }, { "epoch": 81.42, "learning_rate": 8.620442694380311e-05, "loss": 0.0, "step": 19052 }, { "epoch": 81.44, "learning_rate": 8.619638790449345e-05, "loss": 0.0001, "step": 19056 }, { "epoch": 81.45, "learning_rate": 8.618834689866562e-05, "loss": 0.0001, "step": 19060 }, { "epoch": 81.47, "learning_rate": 8.618030392675649e-05, "loss": 0.0002, "step": 19064 }, { "epoch": 81.49, "learning_rate": 8.617225898920302e-05, "loss": 0.0003, "step": 19068 }, { "epoch": 81.5, "learning_rate": 8.61642120864423e-05, "loss": 0.0001, "step": 19072 }, { "epoch": 81.52, "learning_rate": 8.615616321891151e-05, "loss": 0.0001, "step": 19076 }, { "epoch": 81.54, "learning_rate": 8.614811238704791e-05, "loss": 0.0005, "step": 19080 }, { "epoch": 81.56, "learning_rate": 8.614005959128892e-05, "loss": 0.0006, "step": 19084 }, { "epoch": 81.57, "learning_rate": 8.613200483207205e-05, "loss": 0.0001, "step": 19088 }, { "epoch": 81.59, "learning_rate": 8.612394810983487e-05, "loss": 0.0001, "step": 19092 }, { "epoch": 81.61, "learning_rate": 8.611588942501512e-05, "loss": 0.0006, "step": 19096 }, { "epoch": 81.62, "learning_rate": 8.610782877805063e-05, "loss": 0.0001, "step": 19100 }, { "epoch": 81.64, "learning_rate": 8.609976616937931e-05, "loss": 0.0001, "step": 19104 }, { "epoch": 81.66, "learning_rate": 8.60917015994392e-05, "loss": 0.0005, "step": 19108 }, { "epoch": 81.68, "learning_rate": 8.608363506866844e-05, "loss": 0.0002, "step": 19112 }, { "epoch": 81.69, "learning_rate": 8.607556657750528e-05, "loss": 0.0002, "step": 19116 }, { "epoch": 81.71, "learning_rate": 8.606749612638809e-05, "loss": 0.0001, "step": 19120 }, { "epoch": 81.73, "learning_rate": 8.605942371575531e-05, "loss": 0.0001, "step": 19124 }, { "epoch": 81.74, "learning_rate": 8.605134934604548e-05, "loss": 0.0002, "step": 19128 }, { "epoch": 81.76, "learning_rate": 8.604327301769735e-05, "loss": 0.0003, "step": 19132 }, { "epoch": 81.78, "learning_rate": 8.603519473114962e-05, "loss": 0.0001, "step": 19136 }, { "epoch": 81.79, "learning_rate": 8.602711448684123e-05, "loss": 0.0002, "step": 19140 }, { "epoch": 81.81, "learning_rate": 8.601903228521116e-05, "loss": 0.0002, "step": 19144 }, { "epoch": 81.83, "learning_rate": 8.601094812669849e-05, "loss": 0.0006, "step": 19148 }, { "epoch": 81.85, "learning_rate": 8.600286201174243e-05, "loss": 0.0001, "step": 19152 }, { "epoch": 81.86, "learning_rate": 8.599477394078233e-05, "loss": 0.0001, "step": 19156 }, { "epoch": 81.88, "learning_rate": 8.598668391425754e-05, "loss": 0.0002, "step": 19160 }, { "epoch": 81.9, "learning_rate": 8.597859193260765e-05, "loss": 0.0004, "step": 19164 }, { "epoch": 81.91, "learning_rate": 8.597049799627225e-05, "loss": 0.0001, "step": 19168 }, { "epoch": 81.93, "learning_rate": 8.596240210569108e-05, "loss": 0.0003, "step": 19172 }, { "epoch": 81.95, "learning_rate": 8.5954304261304e-05, "loss": 0.0001, "step": 19176 }, { "epoch": 81.97, "learning_rate": 8.594620446355096e-05, "loss": 0.0001, "step": 19180 }, { "epoch": 81.98, "learning_rate": 8.5938102712872e-05, "loss": 0.0001, "step": 19184 }, { "epoch": 82.0, "learning_rate": 8.592999900970726e-05, "loss": 0.0004, "step": 19188 }, { "epoch": 82.02, "learning_rate": 8.592189335449706e-05, "loss": 0.0001, "step": 19192 }, { "epoch": 82.03, "learning_rate": 8.591378574768174e-05, "loss": 0.0, "step": 19196 }, { "epoch": 82.05, "learning_rate": 8.590567618970178e-05, "loss": 0.0001, "step": 19200 }, { "epoch": 82.07, "learning_rate": 8.589756468099778e-05, "loss": 0.0, "step": 19204 }, { "epoch": 82.09, "learning_rate": 8.588945122201042e-05, "loss": 0.0004, "step": 19208 }, { "epoch": 82.1, "learning_rate": 8.58813358131805e-05, "loss": 0.0001, "step": 19212 }, { "epoch": 82.12, "learning_rate": 8.587321845494891e-05, "loss": 0.0002, "step": 19216 }, { "epoch": 82.14, "learning_rate": 8.586509914775667e-05, "loss": 0.0, "step": 19220 }, { "epoch": 82.15, "learning_rate": 8.585697789204493e-05, "loss": 0.0009, "step": 19224 }, { "epoch": 82.17, "learning_rate": 8.584885468825483e-05, "loss": 0.0002, "step": 19228 }, { "epoch": 82.19, "learning_rate": 8.584072953682776e-05, "loss": 0.0015, "step": 19232 }, { "epoch": 82.21, "learning_rate": 8.583260243820514e-05, "loss": 0.0001, "step": 19236 }, { "epoch": 82.22, "learning_rate": 8.582447339282851e-05, "loss": 0.0001, "step": 19240 }, { "epoch": 82.24, "learning_rate": 8.58163424011395e-05, "loss": 0.0001, "step": 19244 }, { "epoch": 82.26, "learning_rate": 8.580820946357985e-05, "loss": 0.0002, "step": 19248 }, { "epoch": 82.27, "learning_rate": 8.580007458059147e-05, "loss": 0.0001, "step": 19252 }, { "epoch": 82.29, "learning_rate": 8.579193775261627e-05, "loss": 0.0006, "step": 19256 }, { "epoch": 82.31, "learning_rate": 8.578379898009632e-05, "loss": 0.0, "step": 19260 }, { "epoch": 82.32, "learning_rate": 8.57756582634738e-05, "loss": 0.0005, "step": 19264 }, { "epoch": 82.34, "learning_rate": 8.576751560319101e-05, "loss": 0.0, "step": 19268 }, { "epoch": 82.36, "learning_rate": 8.57593709996903e-05, "loss": 0.0014, "step": 19272 }, { "epoch": 82.38, "learning_rate": 8.575122445341417e-05, "loss": 0.0001, "step": 19276 }, { "epoch": 82.39, "learning_rate": 8.574307596480522e-05, "loss": 0.0006, "step": 19280 }, { "epoch": 82.41, "learning_rate": 8.573492553430615e-05, "loss": 0.0006, "step": 19284 }, { "epoch": 82.43, "learning_rate": 8.572677316235978e-05, "loss": 0.0, "step": 19288 }, { "epoch": 82.44, "learning_rate": 8.5718618849409e-05, "loss": 0.0003, "step": 19292 }, { "epoch": 82.46, "learning_rate": 8.571046259589681e-05, "loss": 0.0001, "step": 19296 }, { "epoch": 82.48, "learning_rate": 8.570230440226639e-05, "loss": 0.0001, "step": 19300 }, { "epoch": 82.5, "learning_rate": 8.56941442689609e-05, "loss": 0.0003, "step": 19304 }, { "epoch": 82.51, "learning_rate": 8.568598219642373e-05, "loss": 0.0, "step": 19308 }, { "epoch": 82.53, "learning_rate": 8.567781818509829e-05, "loss": 0.0001, "step": 19312 }, { "epoch": 82.55, "learning_rate": 8.566965223542811e-05, "loss": 0.0002, "step": 19316 }, { "epoch": 82.56, "learning_rate": 8.566148434785686e-05, "loss": 0.0001, "step": 19320 }, { "epoch": 82.58, "learning_rate": 8.56533145228283e-05, "loss": 0.0, "step": 19324 }, { "epoch": 82.6, "learning_rate": 8.564514276078626e-05, "loss": 0.0008, "step": 19328 }, { "epoch": 82.62, "learning_rate": 8.563696906217475e-05, "loss": 0.0001, "step": 19332 }, { "epoch": 82.63, "learning_rate": 8.562879342743779e-05, "loss": 0.0001, "step": 19336 }, { "epoch": 82.65, "learning_rate": 8.562061585701961e-05, "loss": 0.0001, "step": 19340 }, { "epoch": 82.67, "learning_rate": 8.561243635136443e-05, "loss": 0.0001, "step": 19344 }, { "epoch": 82.68, "learning_rate": 8.560425491091668e-05, "loss": 0.0005, "step": 19348 }, { "epoch": 82.7, "learning_rate": 8.559607153612084e-05, "loss": 0.0001, "step": 19352 }, { "epoch": 82.72, "learning_rate": 8.558788622742149e-05, "loss": 0.0001, "step": 19356 }, { "epoch": 82.74, "learning_rate": 8.557969898526335e-05, "loss": 0.0, "step": 19360 }, { "epoch": 82.75, "learning_rate": 8.557150981009121e-05, "loss": 0.0006, "step": 19364 }, { "epoch": 82.77, "learning_rate": 8.556331870234999e-05, "loss": 0.0001, "step": 19368 }, { "epoch": 82.79, "learning_rate": 8.555512566248471e-05, "loss": 0.0003, "step": 19372 }, { "epoch": 82.8, "learning_rate": 8.554693069094048e-05, "loss": 0.0002, "step": 19376 }, { "epoch": 82.82, "learning_rate": 8.553873378816252e-05, "loss": 0.0001, "step": 19380 }, { "epoch": 82.84, "learning_rate": 8.553053495459618e-05, "loss": 0.0001, "step": 19384 }, { "epoch": 82.85, "learning_rate": 8.55223341906869e-05, "loss": 0.0001, "step": 19388 }, { "epoch": 82.87, "learning_rate": 8.551413149688019e-05, "loss": 0.0007, "step": 19392 }, { "epoch": 82.89, "learning_rate": 8.55059268736217e-05, "loss": 0.0001, "step": 19396 }, { "epoch": 82.91, "learning_rate": 8.54977203213572e-05, "loss": 0.0001, "step": 19400 }, { "epoch": 82.92, "learning_rate": 8.548951184053257e-05, "loss": 0.0001, "step": 19404 }, { "epoch": 82.94, "learning_rate": 8.548130143159369e-05, "loss": 0.0004, "step": 19408 }, { "epoch": 82.96, "learning_rate": 8.54730890949867e-05, "loss": 0.0002, "step": 19412 }, { "epoch": 82.97, "learning_rate": 8.546487483115773e-05, "loss": 0.0, "step": 19416 }, { "epoch": 82.99, "learning_rate": 8.545665864055308e-05, "loss": 0.0001, "step": 19420 }, { "epoch": 83.01, "learning_rate": 8.544844052361909e-05, "loss": 0.0001, "step": 19424 }, { "epoch": 83.03, "learning_rate": 8.544022048080227e-05, "loss": 0.003, "step": 19428 }, { "epoch": 83.04, "learning_rate": 8.54319985125492e-05, "loss": 0.0001, "step": 19432 }, { "epoch": 83.06, "learning_rate": 8.542377461930659e-05, "loss": 0.0001, "step": 19436 }, { "epoch": 83.08, "learning_rate": 8.54155488015212e-05, "loss": 0.0002, "step": 19440 }, { "epoch": 83.09, "learning_rate": 8.540732105963998e-05, "loss": 0.0001, "step": 19444 }, { "epoch": 83.11, "learning_rate": 8.539909139410989e-05, "loss": 0.0002, "step": 19448 }, { "epoch": 83.13, "learning_rate": 8.539085980537807e-05, "loss": 0.0001, "step": 19452 }, { "epoch": 83.15, "learning_rate": 8.538262629389171e-05, "loss": 0.0001, "step": 19456 }, { "epoch": 83.16, "learning_rate": 8.537439086009817e-05, "loss": 0.0, "step": 19460 }, { "epoch": 83.18, "learning_rate": 8.536615350444483e-05, "loss": 0.0001, "step": 19464 }, { "epoch": 83.2, "learning_rate": 8.535791422737924e-05, "loss": 0.0005, "step": 19468 }, { "epoch": 83.21, "learning_rate": 8.534967302934905e-05, "loss": 0.0001, "step": 19472 }, { "epoch": 83.23, "learning_rate": 8.534142991080194e-05, "loss": 0.0001, "step": 19476 }, { "epoch": 83.25, "learning_rate": 8.53331848721858e-05, "loss": 0.0016, "step": 19480 }, { "epoch": 83.26, "learning_rate": 8.532493791394857e-05, "loss": 0.0009, "step": 19484 }, { "epoch": 83.28, "learning_rate": 8.531668903653831e-05, "loss": 0.0, "step": 19488 }, { "epoch": 83.3, "learning_rate": 8.530843824040314e-05, "loss": 0.0005, "step": 19492 }, { "epoch": 83.32, "learning_rate": 8.530018552599134e-05, "loss": 0.0, "step": 19496 }, { "epoch": 83.33, "learning_rate": 8.529193089375126e-05, "loss": 0.0001, "step": 19500 }, { "epoch": 83.35, "learning_rate": 8.52836743441314e-05, "loss": 0.0005, "step": 19504 }, { "epoch": 83.37, "learning_rate": 8.52754158775803e-05, "loss": 0.0, "step": 19508 }, { "epoch": 83.38, "learning_rate": 8.526715549454664e-05, "loss": 0.0004, "step": 19512 }, { "epoch": 83.4, "learning_rate": 8.52588931954792e-05, "loss": 0.0, "step": 19516 }, { "epoch": 83.42, "learning_rate": 8.525062898082685e-05, "loss": 0.0001, "step": 19520 }, { "epoch": 83.44, "learning_rate": 8.524236285103861e-05, "loss": 0.0001, "step": 19524 }, { "epoch": 83.45, "learning_rate": 8.523409480656356e-05, "loss": 0.0001, "step": 19528 }, { "epoch": 83.47, "learning_rate": 8.522582484785088e-05, "loss": 0.0011, "step": 19532 }, { "epoch": 83.49, "learning_rate": 8.52175529753499e-05, "loss": 0.0001, "step": 19536 }, { "epoch": 83.5, "learning_rate": 8.520927918950999e-05, "loss": 0.0, "step": 19540 }, { "epoch": 83.52, "learning_rate": 8.520100349078069e-05, "loss": 0.0016, "step": 19544 }, { "epoch": 83.54, "learning_rate": 8.519272587961155e-05, "loss": 0.0006, "step": 19548 }, { "epoch": 83.56, "learning_rate": 8.518444635645237e-05, "loss": 0.0019, "step": 19552 }, { "epoch": 83.57, "learning_rate": 8.517616492175292e-05, "loss": 0.0001, "step": 19556 }, { "epoch": 83.59, "learning_rate": 8.516788157596312e-05, "loss": 0.0001, "step": 19560 }, { "epoch": 83.61, "learning_rate": 8.515959631953301e-05, "loss": 0.0002, "step": 19564 }, { "epoch": 83.62, "learning_rate": 8.515130915291271e-05, "loss": 0.0008, "step": 19568 }, { "epoch": 83.64, "learning_rate": 8.514302007655248e-05, "loss": 0.0001, "step": 19572 }, { "epoch": 83.66, "learning_rate": 8.513472909090263e-05, "loss": 0.0, "step": 19576 }, { "epoch": 83.68, "learning_rate": 8.512643619641362e-05, "loss": 0.0, "step": 19580 }, { "epoch": 83.69, "learning_rate": 8.511814139353599e-05, "loss": 0.0001, "step": 19584 }, { "epoch": 83.71, "learning_rate": 8.510984468272039e-05, "loss": 0.0014, "step": 19588 }, { "epoch": 83.73, "learning_rate": 8.510154606441756e-05, "loss": 0.0014, "step": 19592 }, { "epoch": 83.74, "learning_rate": 8.509324553907837e-05, "loss": 0.0001, "step": 19596 }, { "epoch": 83.76, "learning_rate": 8.508494310715379e-05, "loss": 0.0001, "step": 19600 }, { "epoch": 83.78, "learning_rate": 8.507663876909487e-05, "loss": 0.0001, "step": 19604 }, { "epoch": 83.79, "learning_rate": 8.506833252535277e-05, "loss": 0.0, "step": 19608 }, { "epoch": 83.81, "learning_rate": 8.506002437637879e-05, "loss": 0.0001, "step": 19612 }, { "epoch": 83.83, "learning_rate": 8.505171432262427e-05, "loss": 0.0, "step": 19616 }, { "epoch": 83.85, "learning_rate": 8.504340236454071e-05, "loss": 0.0013, "step": 19620 }, { "epoch": 83.86, "learning_rate": 8.503508850257968e-05, "loss": 0.0002, "step": 19624 }, { "epoch": 83.88, "learning_rate": 8.502677273719287e-05, "loss": 0.0, "step": 19628 }, { "epoch": 83.9, "learning_rate": 8.501845506883208e-05, "loss": 0.0002, "step": 19632 }, { "epoch": 83.91, "learning_rate": 8.501013549794917e-05, "loss": 0.0012, "step": 19636 }, { "epoch": 83.93, "learning_rate": 8.500181402499617e-05, "loss": 0.0005, "step": 19640 }, { "epoch": 83.95, "learning_rate": 8.499349065042516e-05, "loss": 0.001, "step": 19644 }, { "epoch": 83.97, "learning_rate": 8.498516537468834e-05, "loss": 0.0001, "step": 19648 }, { "epoch": 83.98, "learning_rate": 8.497683819823801e-05, "loss": 0.0009, "step": 19652 }, { "epoch": 84.0, "learning_rate": 8.496850912152661e-05, "loss": 0.0001, "step": 19656 }, { "epoch": 84.02, "learning_rate": 8.496017814500661e-05, "loss": 0.0001, "step": 19660 }, { "epoch": 84.03, "learning_rate": 8.495184526913066e-05, "loss": 0.0002, "step": 19664 }, { "epoch": 84.05, "learning_rate": 8.494351049435145e-05, "loss": 0.0001, "step": 19668 }, { "epoch": 84.07, "learning_rate": 8.493517382112182e-05, "loss": 0.0001, "step": 19672 }, { "epoch": 84.09, "learning_rate": 8.492683524989467e-05, "loss": 0.0001, "step": 19676 }, { "epoch": 84.1, "learning_rate": 8.491849478112307e-05, "loss": 0.0, "step": 19680 }, { "epoch": 84.12, "learning_rate": 8.491015241526011e-05, "loss": 0.0, "step": 19684 }, { "epoch": 84.14, "learning_rate": 8.490180815275906e-05, "loss": 0.0002, "step": 19688 }, { "epoch": 84.15, "learning_rate": 8.489346199407321e-05, "loss": 0.0017, "step": 19692 }, { "epoch": 84.17, "learning_rate": 8.488511393965601e-05, "loss": 0.0002, "step": 19696 }, { "epoch": 84.19, "learning_rate": 8.487676398996105e-05, "loss": 0.0004, "step": 19700 }, { "epoch": 84.21, "learning_rate": 8.486841214544194e-05, "loss": 0.0002, "step": 19704 }, { "epoch": 84.22, "learning_rate": 8.48600584065524e-05, "loss": 0.0001, "step": 19708 }, { "epoch": 84.24, "learning_rate": 8.485170277374635e-05, "loss": 0.0, "step": 19712 }, { "epoch": 84.26, "learning_rate": 8.484334524747767e-05, "loss": 0.0001, "step": 19716 }, { "epoch": 84.27, "learning_rate": 8.483498582820048e-05, "loss": 0.0, "step": 19720 }, { "epoch": 84.29, "learning_rate": 8.482662451636891e-05, "loss": 0.0001, "step": 19724 }, { "epoch": 84.31, "learning_rate": 8.481826131243722e-05, "loss": 0.0, "step": 19728 }, { "epoch": 84.32, "learning_rate": 8.480989621685979e-05, "loss": 0.0002, "step": 19732 }, { "epoch": 84.34, "learning_rate": 8.480152923009107e-05, "loss": 0.0001, "step": 19736 }, { "epoch": 84.36, "learning_rate": 8.479316035258565e-05, "loss": 0.0016, "step": 19740 }, { "epoch": 84.38, "learning_rate": 8.478478958479819e-05, "loss": 0.0001, "step": 19744 }, { "epoch": 84.39, "learning_rate": 8.477641692718348e-05, "loss": 0.0, "step": 19748 }, { "epoch": 84.41, "learning_rate": 8.476804238019638e-05, "loss": 0.0004, "step": 19752 }, { "epoch": 84.43, "learning_rate": 8.475966594429188e-05, "loss": 0.0001, "step": 19756 }, { "epoch": 84.44, "learning_rate": 8.475128761992506e-05, "loss": 0.0001, "step": 19760 }, { "epoch": 84.46, "learning_rate": 8.474290740755113e-05, "loss": 0.0001, "step": 19764 }, { "epoch": 84.48, "learning_rate": 8.473452530762535e-05, "loss": 0.0001, "step": 19768 }, { "epoch": 84.5, "learning_rate": 8.472614132060314e-05, "loss": 0.0016, "step": 19772 }, { "epoch": 84.51, "learning_rate": 8.471775544693998e-05, "loss": 0.0002, "step": 19776 }, { "epoch": 84.53, "learning_rate": 8.470936768709146e-05, "loss": 0.0, "step": 19780 }, { "epoch": 84.55, "learning_rate": 8.47009780415133e-05, "loss": 0.0004, "step": 19784 }, { "epoch": 84.56, "learning_rate": 8.469258651066128e-05, "loss": 0.0001, "step": 19788 }, { "epoch": 84.58, "learning_rate": 8.468419309499131e-05, "loss": 0.0002, "step": 19792 }, { "epoch": 84.6, "learning_rate": 8.467579779495939e-05, "loss": 0.0015, "step": 19796 }, { "epoch": 84.62, "learning_rate": 8.466740061102166e-05, "loss": 0.0, "step": 19800 }, { "epoch": 84.63, "learning_rate": 8.465900154363431e-05, "loss": 0.0003, "step": 19804 }, { "epoch": 84.65, "learning_rate": 8.465060059325365e-05, "loss": 0.0002, "step": 19808 }, { "epoch": 84.67, "learning_rate": 8.464219776033611e-05, "loss": 0.0001, "step": 19812 }, { "epoch": 84.68, "learning_rate": 8.463379304533818e-05, "loss": 0.0001, "step": 19816 }, { "epoch": 84.7, "learning_rate": 8.462538644871653e-05, "loss": 0.0008, "step": 19820 }, { "epoch": 84.72, "learning_rate": 8.461697797092784e-05, "loss": 0.0007, "step": 19824 }, { "epoch": 84.74, "learning_rate": 8.460856761242894e-05, "loss": 0.0, "step": 19828 }, { "epoch": 84.75, "learning_rate": 8.46001553736768e-05, "loss": 0.0, "step": 19832 }, { "epoch": 84.77, "learning_rate": 8.459174125512838e-05, "loss": 0.0003, "step": 19836 }, { "epoch": 84.79, "learning_rate": 8.458332525724086e-05, "loss": 0.001, "step": 19840 }, { "epoch": 84.8, "learning_rate": 8.457490738047147e-05, "loss": 0.0003, "step": 19844 }, { "epoch": 84.82, "learning_rate": 8.456648762527755e-05, "loss": 0.0003, "step": 19848 }, { "epoch": 84.84, "learning_rate": 8.455806599211651e-05, "loss": 0.0001, "step": 19852 }, { "epoch": 84.85, "learning_rate": 8.45496424814459e-05, "loss": 0.0001, "step": 19856 }, { "epoch": 84.87, "learning_rate": 8.454121709372339e-05, "loss": 0.0001, "step": 19860 }, { "epoch": 84.89, "learning_rate": 8.453278982940667e-05, "loss": 0.0013, "step": 19864 }, { "epoch": 84.91, "learning_rate": 8.452436068895365e-05, "loss": 0.0004, "step": 19868 }, { "epoch": 84.92, "learning_rate": 8.451592967282222e-05, "loss": 0.0017, "step": 19872 }, { "epoch": 84.94, "learning_rate": 8.450749678147048e-05, "loss": 0.0001, "step": 19876 }, { "epoch": 84.96, "learning_rate": 8.449906201535653e-05, "loss": 0.0004, "step": 19880 }, { "epoch": 84.97, "learning_rate": 8.449062537493868e-05, "loss": 0.0001, "step": 19884 }, { "epoch": 84.99, "learning_rate": 8.448218686067524e-05, "loss": 0.0003, "step": 19888 }, { "epoch": 85.01, "learning_rate": 8.447374647302469e-05, "loss": 0.0001, "step": 19892 }, { "epoch": 85.03, "learning_rate": 8.446530421244557e-05, "loss": 0.0001, "step": 19896 }, { "epoch": 85.04, "learning_rate": 8.445686007939657e-05, "loss": 0.0002, "step": 19900 }, { "epoch": 85.06, "learning_rate": 8.444841407433644e-05, "loss": 0.0008, "step": 19904 }, { "epoch": 85.08, "learning_rate": 8.443996619772401e-05, "loss": 0.0006, "step": 19908 }, { "epoch": 85.09, "learning_rate": 8.44315164500183e-05, "loss": 0.0001, "step": 19912 }, { "epoch": 85.11, "learning_rate": 8.442306483167833e-05, "loss": 0.0001, "step": 19916 }, { "epoch": 85.13, "learning_rate": 8.44146113431633e-05, "loss": 0.0001, "step": 19920 }, { "epoch": 85.15, "learning_rate": 8.44061559849325e-05, "loss": 0.0002, "step": 19924 }, { "epoch": 85.16, "learning_rate": 8.439769875744524e-05, "loss": 0.0003, "step": 19928 }, { "epoch": 85.18, "learning_rate": 8.438923966116104e-05, "loss": 0.0004, "step": 19932 }, { "epoch": 85.2, "learning_rate": 8.438077869653946e-05, "loss": 0.0001, "step": 19936 }, { "epoch": 85.21, "learning_rate": 8.437231586404019e-05, "loss": 0.0008, "step": 19940 }, { "epoch": 85.23, "learning_rate": 8.4363851164123e-05, "loss": 0.0004, "step": 19944 }, { "epoch": 85.25, "learning_rate": 8.435538459724775e-05, "loss": 0.0002, "step": 19948 }, { "epoch": 85.26, "learning_rate": 8.434691616387446e-05, "loss": 0.0022, "step": 19952 }, { "epoch": 85.28, "learning_rate": 8.433844586446318e-05, "loss": 0.0001, "step": 19956 }, { "epoch": 85.3, "learning_rate": 8.43299736994741e-05, "loss": 0.0002, "step": 19960 }, { "epoch": 85.32, "learning_rate": 8.432149966936754e-05, "loss": 0.0001, "step": 19964 }, { "epoch": 85.33, "learning_rate": 8.431302377460383e-05, "loss": 0.0001, "step": 19968 }, { "epoch": 85.35, "learning_rate": 8.43045460156435e-05, "loss": 0.0001, "step": 19972 }, { "epoch": 85.37, "learning_rate": 8.429606639294711e-05, "loss": 0.0, "step": 19976 }, { "epoch": 85.38, "learning_rate": 8.428758490697538e-05, "loss": 0.0007, "step": 19980 }, { "epoch": 85.4, "learning_rate": 8.427910155818909e-05, "loss": 0.0015, "step": 19984 }, { "epoch": 85.42, "learning_rate": 8.427061634704911e-05, "loss": 0.0001, "step": 19988 }, { "epoch": 85.44, "learning_rate": 8.426212927401649e-05, "loss": 0.0, "step": 19992 }, { "epoch": 85.45, "learning_rate": 8.425364033955225e-05, "loss": 0.0001, "step": 19996 }, { "epoch": 85.47, "learning_rate": 8.424514954411767e-05, "loss": 0.0024, "step": 20000 }, { "epoch": 85.47, "eval_exact_match": 0.5031185031185031, "eval_loss": 0.9105172753334045, "eval_runtime": 140.8685, "eval_samples_per_second": 6.829, "step": 20000 }, { "epoch": 85.49, "learning_rate": 8.423665688817397e-05, "loss": 0.0006, "step": 20004 }, { "epoch": 85.5, "learning_rate": 8.422816237218259e-05, "loss": 0.0013, "step": 20008 }, { "epoch": 85.52, "learning_rate": 8.421966599660502e-05, "loss": 0.0004, "step": 20012 }, { "epoch": 85.54, "learning_rate": 8.421116776190288e-05, "loss": 0.0003, "step": 20016 }, { "epoch": 85.56, "learning_rate": 8.420266766853784e-05, "loss": 0.0001, "step": 20020 }, { "epoch": 85.57, "learning_rate": 8.419416571697171e-05, "loss": 0.0001, "step": 20024 }, { "epoch": 85.59, "learning_rate": 8.418566190766641e-05, "loss": 0.0, "step": 20028 }, { "epoch": 85.61, "learning_rate": 8.417715624108393e-05, "loss": 0.0001, "step": 20032 }, { "epoch": 85.62, "learning_rate": 8.416864871768639e-05, "loss": 0.0001, "step": 20036 }, { "epoch": 85.64, "learning_rate": 8.416013933793599e-05, "loss": 0.0003, "step": 20040 }, { "epoch": 85.66, "learning_rate": 8.415162810229502e-05, "loss": 0.0001, "step": 20044 }, { "epoch": 85.68, "learning_rate": 8.414311501122591e-05, "loss": 0.0005, "step": 20048 }, { "epoch": 85.69, "learning_rate": 8.413460006519116e-05, "loss": 0.0002, "step": 20052 }, { "epoch": 85.71, "learning_rate": 8.412608326465337e-05, "loss": 0.0001, "step": 20056 }, { "epoch": 85.73, "learning_rate": 8.411756461007527e-05, "loss": 0.0001, "step": 20060 }, { "epoch": 85.74, "learning_rate": 8.410904410191967e-05, "loss": 0.0004, "step": 20064 }, { "epoch": 85.76, "learning_rate": 8.410052174064946e-05, "loss": 0.0011, "step": 20068 }, { "epoch": 85.78, "learning_rate": 8.409199752672767e-05, "loss": 0.0002, "step": 20072 }, { "epoch": 85.79, "learning_rate": 8.408347146061741e-05, "loss": 0.0, "step": 20076 }, { "epoch": 85.81, "learning_rate": 8.40749435427819e-05, "loss": 0.0001, "step": 20080 }, { "epoch": 85.83, "learning_rate": 8.406641377368446e-05, "loss": 0.0, "step": 20084 }, { "epoch": 85.85, "learning_rate": 8.405788215378847e-05, "loss": 0.0007, "step": 20088 }, { "epoch": 85.86, "learning_rate": 8.404934868355747e-05, "loss": 0.0002, "step": 20092 }, { "epoch": 85.88, "learning_rate": 8.404081336345507e-05, "loss": 0.0003, "step": 20096 }, { "epoch": 85.9, "learning_rate": 8.4032276193945e-05, "loss": 0.0005, "step": 20100 }, { "epoch": 85.91, "learning_rate": 8.402373717549105e-05, "loss": 0.0008, "step": 20104 }, { "epoch": 85.93, "learning_rate": 8.401519630855718e-05, "loss": 0.0008, "step": 20108 }, { "epoch": 85.95, "learning_rate": 8.400665359360737e-05, "loss": 0.0006, "step": 20112 }, { "epoch": 85.97, "learning_rate": 8.399810903110575e-05, "loss": 0.0001, "step": 20116 }, { "epoch": 85.98, "learning_rate": 8.398956262151654e-05, "loss": 0.0001, "step": 20120 }, { "epoch": 86.0, "learning_rate": 8.398101436530405e-05, "loss": 0.0001, "step": 20124 }, { "epoch": 86.02, "learning_rate": 8.397246426293273e-05, "loss": 0.0001, "step": 20128 }, { "epoch": 86.03, "learning_rate": 8.396391231486707e-05, "loss": 0.0001, "step": 20132 }, { "epoch": 86.05, "learning_rate": 8.395535852157168e-05, "loss": 0.0001, "step": 20136 }, { "epoch": 86.07, "learning_rate": 8.394680288351132e-05, "loss": 0.0, "step": 20140 }, { "epoch": 86.09, "learning_rate": 8.393824540115076e-05, "loss": 0.0002, "step": 20144 }, { "epoch": 86.1, "learning_rate": 8.392968607495497e-05, "loss": 0.0002, "step": 20148 }, { "epoch": 86.12, "learning_rate": 8.392112490538894e-05, "loss": 0.0001, "step": 20152 }, { "epoch": 86.14, "learning_rate": 8.391256189291779e-05, "loss": 0.0008, "step": 20156 }, { "epoch": 86.15, "learning_rate": 8.390399703800678e-05, "loss": 0.0002, "step": 20160 }, { "epoch": 86.17, "learning_rate": 8.389543034112117e-05, "loss": 0.0001, "step": 20164 }, { "epoch": 86.19, "learning_rate": 8.388686180272643e-05, "loss": 0.0003, "step": 20168 }, { "epoch": 86.21, "learning_rate": 8.387829142328807e-05, "loss": 0.0001, "step": 20172 }, { "epoch": 86.22, "learning_rate": 8.386971920327169e-05, "loss": 0.0002, "step": 20176 }, { "epoch": 86.24, "learning_rate": 8.386114514314303e-05, "loss": 0.0001, "step": 20180 }, { "epoch": 86.26, "learning_rate": 8.385256924336792e-05, "loss": 0.0002, "step": 20184 }, { "epoch": 86.27, "learning_rate": 8.384399150441225e-05, "loss": 0.0003, "step": 20188 }, { "epoch": 86.29, "learning_rate": 8.383541192674207e-05, "loss": 0.0001, "step": 20192 }, { "epoch": 86.31, "learning_rate": 8.382683051082349e-05, "loss": 0.0001, "step": 20196 }, { "epoch": 86.32, "learning_rate": 8.381824725712272e-05, "loss": 0.0, "step": 20200 }, { "epoch": 86.34, "learning_rate": 8.380966216610612e-05, "loss": 0.0003, "step": 20204 }, { "epoch": 86.36, "learning_rate": 8.380107523824008e-05, "loss": 0.0001, "step": 20208 }, { "epoch": 86.38, "learning_rate": 8.379248647399111e-05, "loss": 0.0, "step": 20212 }, { "epoch": 86.39, "learning_rate": 8.378389587382584e-05, "loss": 0.0, "step": 20216 }, { "epoch": 86.41, "learning_rate": 8.3775303438211e-05, "loss": 0.0006, "step": 20220 }, { "epoch": 86.43, "learning_rate": 8.37667091676134e-05, "loss": 0.0001, "step": 20224 }, { "epoch": 86.44, "learning_rate": 8.375811306249996e-05, "loss": 0.0001, "step": 20228 }, { "epoch": 86.46, "learning_rate": 8.37495151233377e-05, "loss": 0.0004, "step": 20232 }, { "epoch": 86.48, "learning_rate": 8.374091535059374e-05, "loss": 0.0001, "step": 20236 }, { "epoch": 86.5, "learning_rate": 8.373231374473531e-05, "loss": 0.0001, "step": 20240 }, { "epoch": 86.51, "learning_rate": 8.37237103062297e-05, "loss": 0.0006, "step": 20244 }, { "epoch": 86.53, "learning_rate": 8.371510503554436e-05, "loss": 0.0001, "step": 20248 }, { "epoch": 86.55, "learning_rate": 8.370649793314678e-05, "loss": 0.0001, "step": 20252 }, { "epoch": 86.56, "learning_rate": 8.369788899950457e-05, "loss": 0.0019, "step": 20256 }, { "epoch": 86.58, "learning_rate": 8.368927823508548e-05, "loss": 0.0001, "step": 20260 }, { "epoch": 86.6, "learning_rate": 8.36806656403573e-05, "loss": 0.0001, "step": 20264 }, { "epoch": 86.62, "learning_rate": 8.367205121578797e-05, "loss": 0.0, "step": 20268 }, { "epoch": 86.63, "learning_rate": 8.366343496184546e-05, "loss": 0.0001, "step": 20272 }, { "epoch": 86.65, "learning_rate": 8.365481687899793e-05, "loss": 0.0016, "step": 20276 }, { "epoch": 86.67, "learning_rate": 8.364619696771355e-05, "loss": 0.0, "step": 20280 }, { "epoch": 86.68, "learning_rate": 8.363757522846066e-05, "loss": 0.0002, "step": 20284 }, { "epoch": 86.7, "learning_rate": 8.362895166170768e-05, "loss": 0.0003, "step": 20288 }, { "epoch": 86.72, "learning_rate": 8.362032626792308e-05, "loss": 0.0005, "step": 20292 }, { "epoch": 86.74, "learning_rate": 8.361169904757553e-05, "loss": 0.0002, "step": 20296 }, { "epoch": 86.75, "learning_rate": 8.360307000113369e-05, "loss": 0.0002, "step": 20300 }, { "epoch": 86.77, "learning_rate": 8.359443912906639e-05, "loss": 0.0001, "step": 20304 }, { "epoch": 86.79, "learning_rate": 8.358580643184253e-05, "loss": 0.0005, "step": 20308 }, { "epoch": 86.8, "learning_rate": 8.357717190993113e-05, "loss": 0.0003, "step": 20312 }, { "epoch": 86.82, "learning_rate": 8.356853556380127e-05, "loss": 0.0, "step": 20316 }, { "epoch": 86.84, "learning_rate": 8.355989739392217e-05, "loss": 0.0012, "step": 20320 }, { "epoch": 86.85, "learning_rate": 8.355125740076315e-05, "loss": 0.0008, "step": 20324 }, { "epoch": 86.87, "learning_rate": 8.35426155847936e-05, "loss": 0.0008, "step": 20328 }, { "epoch": 86.89, "learning_rate": 8.3533971946483e-05, "loss": 0.0018, "step": 20332 }, { "epoch": 86.91, "learning_rate": 8.352532648630098e-05, "loss": 0.0009, "step": 20336 }, { "epoch": 86.92, "learning_rate": 8.351667920471723e-05, "loss": 0.0002, "step": 20340 }, { "epoch": 86.94, "learning_rate": 8.350803010220156e-05, "loss": 0.0003, "step": 20344 }, { "epoch": 86.96, "learning_rate": 8.349937917922386e-05, "loss": 0.0001, "step": 20348 }, { "epoch": 86.97, "learning_rate": 8.349072643625412e-05, "loss": 0.0001, "step": 20352 }, { "epoch": 86.99, "learning_rate": 8.348207187376245e-05, "loss": 0.0001, "step": 20356 }, { "epoch": 87.01, "learning_rate": 8.347341549221903e-05, "loss": 0.0001, "step": 20360 }, { "epoch": 87.03, "learning_rate": 8.346475729209416e-05, "loss": 0.0001, "step": 20364 }, { "epoch": 87.04, "learning_rate": 8.345609727385825e-05, "loss": 0.0002, "step": 20368 }, { "epoch": 87.06, "learning_rate": 8.344743543798176e-05, "loss": 0.0001, "step": 20372 }, { "epoch": 87.08, "learning_rate": 8.343877178493529e-05, "loss": 0.0001, "step": 20376 }, { "epoch": 87.09, "learning_rate": 8.343010631518955e-05, "loss": 0.0003, "step": 20380 }, { "epoch": 87.11, "learning_rate": 8.342143902921531e-05, "loss": 0.0, "step": 20384 }, { "epoch": 87.13, "learning_rate": 8.341276992748344e-05, "loss": 0.0001, "step": 20388 }, { "epoch": 87.15, "learning_rate": 8.340409901046496e-05, "loss": 0.0001, "step": 20392 }, { "epoch": 87.16, "learning_rate": 8.339542627863093e-05, "loss": 0.0009, "step": 20396 }, { "epoch": 87.18, "learning_rate": 8.338675173245254e-05, "loss": 0.0001, "step": 20400 }, { "epoch": 87.2, "learning_rate": 8.337807537240106e-05, "loss": 0.0002, "step": 20404 }, { "epoch": 87.21, "learning_rate": 8.336939719894788e-05, "loss": 0.0002, "step": 20408 }, { "epoch": 87.23, "learning_rate": 8.336071721256447e-05, "loss": 0.0003, "step": 20412 }, { "epoch": 87.25, "learning_rate": 8.335203541372242e-05, "loss": 0.0, "step": 20416 }, { "epoch": 87.26, "learning_rate": 8.33433518028934e-05, "loss": 0.0005, "step": 20420 }, { "epoch": 87.28, "learning_rate": 8.333466638054916e-05, "loss": 0.0003, "step": 20424 }, { "epoch": 87.3, "learning_rate": 8.33259791471616e-05, "loss": 0.0003, "step": 20428 }, { "epoch": 87.32, "learning_rate": 8.331729010320267e-05, "loss": 0.0004, "step": 20432 }, { "epoch": 87.33, "learning_rate": 8.330859924914444e-05, "loss": 0.0005, "step": 20436 }, { "epoch": 87.35, "learning_rate": 8.329990658545912e-05, "loss": 0.0011, "step": 20440 }, { "epoch": 87.37, "learning_rate": 8.329121211261892e-05, "loss": 0.0005, "step": 20444 }, { "epoch": 87.38, "learning_rate": 8.328251583109621e-05, "loss": 0.0001, "step": 20448 }, { "epoch": 87.4, "learning_rate": 8.327381774136347e-05, "loss": 0.0, "step": 20452 }, { "epoch": 87.42, "learning_rate": 8.326511784389326e-05, "loss": 0.0001, "step": 20456 }, { "epoch": 87.44, "learning_rate": 8.325641613915822e-05, "loss": 0.0002, "step": 20460 }, { "epoch": 87.45, "learning_rate": 8.324771262763114e-05, "loss": 0.0003, "step": 20464 }, { "epoch": 87.47, "learning_rate": 8.323900730978482e-05, "loss": 0.0016, "step": 20468 }, { "epoch": 87.49, "learning_rate": 8.323030018609225e-05, "loss": 0.0001, "step": 20472 }, { "epoch": 87.5, "learning_rate": 8.322159125702649e-05, "loss": 0.0, "step": 20476 }, { "epoch": 87.52, "learning_rate": 8.321288052306066e-05, "loss": 0.0002, "step": 20480 }, { "epoch": 87.54, "learning_rate": 8.320416798466803e-05, "loss": 0.0, "step": 20484 }, { "epoch": 87.56, "learning_rate": 8.319545364232193e-05, "loss": 0.0001, "step": 20488 }, { "epoch": 87.57, "learning_rate": 8.31867374964958e-05, "loss": 0.0001, "step": 20492 }, { "epoch": 87.59, "learning_rate": 8.317801954766318e-05, "loss": 0.0007, "step": 20496 }, { "epoch": 87.61, "learning_rate": 8.316929979629773e-05, "loss": 0.0001, "step": 20500 }, { "epoch": 87.62, "learning_rate": 8.316057824287315e-05, "loss": 0.0004, "step": 20504 }, { "epoch": 87.64, "learning_rate": 8.315185488786332e-05, "loss": 0.0, "step": 20508 }, { "epoch": 87.66, "learning_rate": 8.314312973174214e-05, "loss": 0.0013, "step": 20512 }, { "epoch": 87.68, "learning_rate": 8.313440277498366e-05, "loss": 0.0004, "step": 20516 }, { "epoch": 87.69, "learning_rate": 8.312567401806197e-05, "loss": 0.0005, "step": 20520 }, { "epoch": 87.71, "learning_rate": 8.311694346145133e-05, "loss": 0.0, "step": 20524 }, { "epoch": 87.73, "learning_rate": 8.310821110562608e-05, "loss": 0.0004, "step": 20528 }, { "epoch": 87.74, "learning_rate": 8.30994769510606e-05, "loss": 0.0, "step": 20532 }, { "epoch": 87.76, "learning_rate": 8.309074099822942e-05, "loss": 0.0002, "step": 20536 }, { "epoch": 87.78, "learning_rate": 8.308200324760717e-05, "loss": 0.0003, "step": 20540 }, { "epoch": 87.79, "learning_rate": 8.307326369966854e-05, "loss": 0.0001, "step": 20544 }, { "epoch": 87.81, "learning_rate": 8.306452235488839e-05, "loss": 0.0004, "step": 20548 }, { "epoch": 87.83, "learning_rate": 8.305577921374158e-05, "loss": 0.0001, "step": 20552 }, { "epoch": 87.85, "learning_rate": 8.304703427670313e-05, "loss": 0.0001, "step": 20556 }, { "epoch": 87.86, "learning_rate": 8.303828754424816e-05, "loss": 0.0001, "step": 20560 }, { "epoch": 87.88, "learning_rate": 8.302953901685187e-05, "loss": 0.0, "step": 20564 }, { "epoch": 87.9, "learning_rate": 8.302078869498955e-05, "loss": 0.0015, "step": 20568 }, { "epoch": 87.91, "learning_rate": 8.301203657913658e-05, "loss": 0.0001, "step": 20572 }, { "epoch": 87.93, "learning_rate": 8.300328266976848e-05, "loss": 0.0, "step": 20576 }, { "epoch": 87.95, "learning_rate": 8.299452696736086e-05, "loss": 0.0002, "step": 20580 }, { "epoch": 87.97, "learning_rate": 8.298576947238938e-05, "loss": 0.0001, "step": 20584 }, { "epoch": 87.98, "learning_rate": 8.297701018532982e-05, "loss": 0.0008, "step": 20588 }, { "epoch": 88.0, "learning_rate": 8.29682491066581e-05, "loss": 0.0002, "step": 20592 }, { "epoch": 88.02, "learning_rate": 8.295948623685014e-05, "loss": 0.0004, "step": 20596 }, { "epoch": 88.03, "learning_rate": 8.29507215763821e-05, "loss": 0.0005, "step": 20600 }, { "epoch": 88.05, "learning_rate": 8.294195512573011e-05, "loss": 0.0001, "step": 20604 }, { "epoch": 88.07, "learning_rate": 8.293318688537043e-05, "loss": 0.0001, "step": 20608 }, { "epoch": 88.09, "learning_rate": 8.292441685577946e-05, "loss": 0.0, "step": 20612 }, { "epoch": 88.1, "learning_rate": 8.291564503743365e-05, "loss": 0.0001, "step": 20616 }, { "epoch": 88.12, "learning_rate": 8.29068714308096e-05, "loss": 0.0001, "step": 20620 }, { "epoch": 88.14, "learning_rate": 8.289809603638391e-05, "loss": 0.0, "step": 20624 }, { "epoch": 88.15, "learning_rate": 8.28893188546334e-05, "loss": 0.0001, "step": 20628 }, { "epoch": 88.17, "learning_rate": 8.28805398860349e-05, "loss": 0.0, "step": 20632 }, { "epoch": 88.19, "learning_rate": 8.287175913106535e-05, "loss": 0.0, "step": 20636 }, { "epoch": 88.21, "learning_rate": 8.286297659020183e-05, "loss": 0.0001, "step": 20640 }, { "epoch": 88.22, "learning_rate": 8.285419226392148e-05, "loss": 0.0, "step": 20644 }, { "epoch": 88.24, "learning_rate": 8.284540615270152e-05, "loss": 0.0001, "step": 20648 }, { "epoch": 88.26, "learning_rate": 8.283661825701933e-05, "loss": 0.0001, "step": 20652 }, { "epoch": 88.27, "learning_rate": 8.28278285773523e-05, "loss": 0.0001, "step": 20656 }, { "epoch": 88.29, "learning_rate": 8.281903711417802e-05, "loss": 0.0001, "step": 20660 }, { "epoch": 88.31, "learning_rate": 8.28102438679741e-05, "loss": 0.0003, "step": 20664 }, { "epoch": 88.32, "learning_rate": 8.280144883921827e-05, "loss": 0.0001, "step": 20668 }, { "epoch": 88.34, "learning_rate": 8.279265202838833e-05, "loss": 0.0001, "step": 20672 }, { "epoch": 88.36, "learning_rate": 8.278385343596225e-05, "loss": 0.0001, "step": 20676 }, { "epoch": 88.38, "learning_rate": 8.277505306241799e-05, "loss": 0.0002, "step": 20680 }, { "epoch": 88.39, "learning_rate": 8.276625090823374e-05, "loss": 0.0, "step": 20684 }, { "epoch": 88.41, "learning_rate": 8.275744697388766e-05, "loss": 0.0, "step": 20688 }, { "epoch": 88.43, "learning_rate": 8.274864125985808e-05, "loss": 0.0, "step": 20692 }, { "epoch": 88.44, "learning_rate": 8.273983376662339e-05, "loss": 0.0005, "step": 20696 }, { "epoch": 88.46, "learning_rate": 8.273102449466212e-05, "loss": 0.0, "step": 20700 }, { "epoch": 88.48, "learning_rate": 8.272221344445285e-05, "loss": 0.0, "step": 20704 }, { "epoch": 88.5, "learning_rate": 8.27134006164743e-05, "loss": 0.0, "step": 20708 }, { "epoch": 88.51, "learning_rate": 8.270458601120523e-05, "loss": 0.0001, "step": 20712 }, { "epoch": 88.53, "learning_rate": 8.269576962912456e-05, "loss": 0.0, "step": 20716 }, { "epoch": 88.55, "learning_rate": 8.268695147071123e-05, "loss": 0.0001, "step": 20720 }, { "epoch": 88.56, "learning_rate": 8.267813153644439e-05, "loss": 0.0003, "step": 20724 }, { "epoch": 88.58, "learning_rate": 8.266930982680317e-05, "loss": 0.0017, "step": 20728 }, { "epoch": 88.6, "learning_rate": 8.266048634226686e-05, "loss": 0.0, "step": 20732 }, { "epoch": 88.62, "learning_rate": 8.265166108331483e-05, "loss": 0.0001, "step": 20736 }, { "epoch": 88.63, "learning_rate": 8.264283405042657e-05, "loss": 0.0, "step": 20740 }, { "epoch": 88.65, "learning_rate": 8.26340052440816e-05, "loss": 0.0, "step": 20744 }, { "epoch": 88.67, "learning_rate": 8.262517466475963e-05, "loss": 0.0, "step": 20748 }, { "epoch": 88.68, "learning_rate": 8.261634231294037e-05, "loss": 0.0, "step": 20752 }, { "epoch": 88.7, "learning_rate": 8.260750818910372e-05, "loss": 0.0, "step": 20756 }, { "epoch": 88.72, "learning_rate": 8.259867229372961e-05, "loss": 0.0001, "step": 20760 }, { "epoch": 88.74, "learning_rate": 8.258983462729808e-05, "loss": 0.0003, "step": 20764 }, { "epoch": 88.75, "learning_rate": 8.258099519028928e-05, "loss": 0.0, "step": 20768 }, { "epoch": 88.77, "learning_rate": 8.257215398318344e-05, "loss": 0.0002, "step": 20772 }, { "epoch": 88.79, "learning_rate": 8.25633110064609e-05, "loss": 0.0, "step": 20776 }, { "epoch": 88.8, "learning_rate": 8.25544662606021e-05, "loss": 0.0001, "step": 20780 }, { "epoch": 88.82, "learning_rate": 8.254561974608757e-05, "loss": 0.0002, "step": 20784 }, { "epoch": 88.84, "learning_rate": 8.253677146339791e-05, "loss": 0.0001, "step": 20788 }, { "epoch": 88.85, "learning_rate": 8.252792141301386e-05, "loss": 0.0012, "step": 20792 }, { "epoch": 88.87, "learning_rate": 8.251906959541625e-05, "loss": 0.0, "step": 20796 }, { "epoch": 88.89, "learning_rate": 8.251021601108593e-05, "loss": 0.0, "step": 20800 }, { "epoch": 88.91, "learning_rate": 8.250136066050397e-05, "loss": 0.0, "step": 20804 }, { "epoch": 88.92, "learning_rate": 8.249250354415145e-05, "loss": 0.001, "step": 20808 }, { "epoch": 88.94, "learning_rate": 8.248364466250959e-05, "loss": 0.0001, "step": 20812 }, { "epoch": 88.96, "learning_rate": 8.247478401605963e-05, "loss": 0.0001, "step": 20816 }, { "epoch": 88.97, "learning_rate": 8.246592160528302e-05, "loss": 0.0001, "step": 20820 }, { "epoch": 88.99, "learning_rate": 8.245705743066123e-05, "loss": 0.0002, "step": 20824 }, { "epoch": 89.01, "learning_rate": 8.244819149267584e-05, "loss": 0.0013, "step": 20828 }, { "epoch": 89.03, "learning_rate": 8.24393237918085e-05, "loss": 0.0003, "step": 20832 }, { "epoch": 89.04, "learning_rate": 8.243045432854102e-05, "loss": 0.0001, "step": 20836 }, { "epoch": 89.06, "learning_rate": 8.242158310335528e-05, "loss": 0.0, "step": 20840 }, { "epoch": 89.08, "learning_rate": 8.241271011673322e-05, "loss": 0.0001, "step": 20844 }, { "epoch": 89.09, "learning_rate": 8.24038353691569e-05, "loss": 0.0001, "step": 20848 }, { "epoch": 89.11, "learning_rate": 8.239495886110848e-05, "loss": 0.0002, "step": 20852 }, { "epoch": 89.13, "learning_rate": 8.238608059307023e-05, "loss": 0.0001, "step": 20856 }, { "epoch": 89.15, "learning_rate": 8.237720056552449e-05, "loss": 0.0001, "step": 20860 }, { "epoch": 89.16, "learning_rate": 8.23683187789537e-05, "loss": 0.0, "step": 20864 }, { "epoch": 89.18, "learning_rate": 8.23594352338404e-05, "loss": 0.0, "step": 20868 }, { "epoch": 89.2, "learning_rate": 8.235054993066722e-05, "loss": 0.0, "step": 20872 }, { "epoch": 89.21, "learning_rate": 8.23416628699169e-05, "loss": 0.0, "step": 20876 }, { "epoch": 89.23, "learning_rate": 8.233277405207227e-05, "loss": 0.0001, "step": 20880 }, { "epoch": 89.25, "learning_rate": 8.232388347761624e-05, "loss": 0.0021, "step": 20884 }, { "epoch": 89.26, "learning_rate": 8.231499114703184e-05, "loss": 0.0004, "step": 20888 }, { "epoch": 89.28, "learning_rate": 8.230609706080217e-05, "loss": 0.0004, "step": 20892 }, { "epoch": 89.3, "learning_rate": 8.229720121941043e-05, "loss": 0.0001, "step": 20896 }, { "epoch": 89.32, "learning_rate": 8.228830362333995e-05, "loss": 0.0, "step": 20900 }, { "epoch": 89.33, "learning_rate": 8.227940427307413e-05, "loss": 0.0001, "step": 20904 }, { "epoch": 89.35, "learning_rate": 8.227050316909642e-05, "loss": 0.0001, "step": 20908 }, { "epoch": 89.37, "learning_rate": 8.226160031189045e-05, "loss": 0.0, "step": 20912 }, { "epoch": 89.38, "learning_rate": 8.225269570193989e-05, "loss": 0.0001, "step": 20916 }, { "epoch": 89.4, "learning_rate": 8.224378933972851e-05, "loss": 0.0, "step": 20920 }, { "epoch": 89.42, "learning_rate": 8.223488122574022e-05, "loss": 0.0, "step": 20924 }, { "epoch": 89.44, "learning_rate": 8.222597136045895e-05, "loss": 0.0, "step": 20928 }, { "epoch": 89.45, "learning_rate": 8.221705974436881e-05, "loss": 0.0001, "step": 20932 }, { "epoch": 89.47, "learning_rate": 8.22081463779539e-05, "loss": 0.0, "step": 20936 }, { "epoch": 89.49, "learning_rate": 8.219923126169853e-05, "loss": 0.0002, "step": 20940 }, { "epoch": 89.5, "learning_rate": 8.219031439608702e-05, "loss": 0.0003, "step": 20944 }, { "epoch": 89.52, "learning_rate": 8.218139578160382e-05, "loss": 0.0009, "step": 20948 }, { "epoch": 89.54, "learning_rate": 8.217247541873347e-05, "loss": 0.0001, "step": 20952 }, { "epoch": 89.56, "learning_rate": 8.216355330796061e-05, "loss": 0.0006, "step": 20956 }, { "epoch": 89.57, "learning_rate": 8.215462944976999e-05, "loss": 0.0, "step": 20960 }, { "epoch": 89.59, "learning_rate": 8.21457038446464e-05, "loss": 0.0002, "step": 20964 }, { "epoch": 89.61, "learning_rate": 8.213677649307478e-05, "loss": 0.0002, "step": 20968 }, { "epoch": 89.62, "learning_rate": 8.212784739554015e-05, "loss": 0.0, "step": 20972 }, { "epoch": 89.64, "learning_rate": 8.21189165525276e-05, "loss": 0.0006, "step": 20976 }, { "epoch": 89.66, "learning_rate": 8.210998396452235e-05, "loss": 0.0, "step": 20980 }, { "epoch": 89.68, "learning_rate": 8.21010496320097e-05, "loss": 0.0, "step": 20984 }, { "epoch": 89.69, "learning_rate": 8.209211355547504e-05, "loss": 0.0, "step": 20988 }, { "epoch": 89.71, "learning_rate": 8.208317573540386e-05, "loss": 0.0, "step": 20992 }, { "epoch": 89.73, "learning_rate": 8.207423617228174e-05, "loss": 0.0, "step": 20996 }, { "epoch": 89.74, "learning_rate": 8.206529486659435e-05, "loss": 0.0001, "step": 21000 }, { "epoch": 89.74, "eval_exact_match": 0.5083160083160083, "eval_loss": 0.9255384206771851, "eval_runtime": 135.5229, "eval_samples_per_second": 7.098, "step": 21000 }, { "epoch": 89.76, "learning_rate": 8.20563518188275e-05, "loss": 0.0, "step": 21004 }, { "epoch": 89.78, "learning_rate": 8.204740702946702e-05, "loss": 0.0001, "step": 21008 }, { "epoch": 89.79, "learning_rate": 8.203846049899891e-05, "loss": 0.0005, "step": 21012 }, { "epoch": 89.81, "learning_rate": 8.202951222790916e-05, "loss": 0.0001, "step": 21016 }, { "epoch": 89.83, "learning_rate": 8.2020562216684e-05, "loss": 0.0, "step": 21020 }, { "epoch": 89.85, "learning_rate": 8.201161046580963e-05, "loss": 0.0, "step": 21024 }, { "epoch": 89.86, "learning_rate": 8.200265697577241e-05, "loss": 0.0004, "step": 21028 }, { "epoch": 89.88, "learning_rate": 8.199370174705876e-05, "loss": 0.001, "step": 21032 }, { "epoch": 89.9, "learning_rate": 8.198474478015521e-05, "loss": 0.0001, "step": 21036 }, { "epoch": 89.91, "learning_rate": 8.197578607554842e-05, "loss": 0.0002, "step": 21040 }, { "epoch": 89.93, "learning_rate": 8.196682563372505e-05, "loss": 0.0002, "step": 21044 }, { "epoch": 89.95, "learning_rate": 8.195786345517196e-05, "loss": 0.0011, "step": 21048 }, { "epoch": 89.97, "learning_rate": 8.194889954037603e-05, "loss": 0.0001, "step": 21052 }, { "epoch": 89.98, "learning_rate": 8.193993388982428e-05, "loss": 0.0001, "step": 21056 }, { "epoch": 90.0, "learning_rate": 8.19309665040038e-05, "loss": 0.0, "step": 21060 }, { "epoch": 90.02, "learning_rate": 8.192199738340177e-05, "loss": 0.0001, "step": 21064 }, { "epoch": 90.03, "learning_rate": 8.19130265285055e-05, "loss": 0.0001, "step": 21068 }, { "epoch": 90.05, "learning_rate": 8.190405393980234e-05, "loss": 0.0001, "step": 21072 }, { "epoch": 90.07, "learning_rate": 8.189507961777976e-05, "loss": 0.0011, "step": 21076 }, { "epoch": 90.09, "learning_rate": 8.188610356292536e-05, "loss": 0.0, "step": 21080 }, { "epoch": 90.1, "learning_rate": 8.187712577572679e-05, "loss": 0.0001, "step": 21084 }, { "epoch": 90.12, "learning_rate": 8.186814625667178e-05, "loss": 0.0001, "step": 21088 }, { "epoch": 90.14, "learning_rate": 8.185916500624821e-05, "loss": 0.0001, "step": 21092 }, { "epoch": 90.15, "learning_rate": 8.185018202494401e-05, "loss": 0.0005, "step": 21096 }, { "epoch": 90.17, "learning_rate": 8.184119731324722e-05, "loss": 0.0, "step": 21100 }, { "epoch": 90.19, "learning_rate": 8.183221087164596e-05, "loss": 0.0001, "step": 21104 }, { "epoch": 90.21, "learning_rate": 8.182322270062848e-05, "loss": 0.0003, "step": 21108 }, { "epoch": 90.22, "learning_rate": 8.181423280068308e-05, "loss": 0.0, "step": 21112 }, { "epoch": 90.24, "learning_rate": 8.180524117229818e-05, "loss": 0.0, "step": 21116 }, { "epoch": 90.26, "learning_rate": 8.179624781596226e-05, "loss": 0.0001, "step": 21120 }, { "epoch": 90.27, "learning_rate": 8.1787252732164e-05, "loss": 0.0002, "step": 21124 }, { "epoch": 90.29, "learning_rate": 8.1778255921392e-05, "loss": 0.0001, "step": 21128 }, { "epoch": 90.31, "learning_rate": 8.17692573841351e-05, "loss": 0.0, "step": 21132 }, { "epoch": 90.32, "learning_rate": 8.176025712088218e-05, "loss": 0.0001, "step": 21136 }, { "epoch": 90.34, "learning_rate": 8.175125513212221e-05, "loss": 0.0001, "step": 21140 }, { "epoch": 90.36, "learning_rate": 8.174225141834426e-05, "loss": 0.0002, "step": 21144 }, { "epoch": 90.38, "learning_rate": 8.17332459800375e-05, "loss": 0.0001, "step": 21148 }, { "epoch": 90.39, "learning_rate": 8.172423881769117e-05, "loss": 0.0006, "step": 21152 }, { "epoch": 90.41, "learning_rate": 8.171522993179463e-05, "loss": 0.0001, "step": 21156 }, { "epoch": 90.43, "learning_rate": 8.170621932283735e-05, "loss": 0.0003, "step": 21160 }, { "epoch": 90.44, "learning_rate": 8.169720699130883e-05, "loss": 0.0, "step": 21164 }, { "epoch": 90.46, "learning_rate": 8.168819293769874e-05, "loss": 0.0001, "step": 21168 }, { "epoch": 90.48, "learning_rate": 8.167917716249677e-05, "loss": 0.0001, "step": 21172 }, { "epoch": 90.5, "learning_rate": 8.167015966619274e-05, "loss": 0.0, "step": 21176 }, { "epoch": 90.51, "learning_rate": 8.16611404492766e-05, "loss": 0.0, "step": 21180 }, { "epoch": 90.53, "learning_rate": 8.165211951223832e-05, "loss": 0.0001, "step": 21184 }, { "epoch": 90.55, "learning_rate": 8.164309685556802e-05, "loss": 0.0, "step": 21188 }, { "epoch": 90.56, "learning_rate": 8.163407247975588e-05, "loss": 0.0001, "step": 21192 }, { "epoch": 90.58, "learning_rate": 8.162504638529218e-05, "loss": 0.0002, "step": 21196 }, { "epoch": 90.6, "learning_rate": 8.161601857266732e-05, "loss": 0.0, "step": 21200 }, { "epoch": 90.62, "learning_rate": 8.160698904237176e-05, "loss": 0.0003, "step": 21204 }, { "epoch": 90.63, "learning_rate": 8.15979577948961e-05, "loss": 0.0001, "step": 21208 }, { "epoch": 90.65, "learning_rate": 8.158892483073094e-05, "loss": 0.0001, "step": 21212 }, { "epoch": 90.67, "learning_rate": 8.157989015036707e-05, "loss": 0.0002, "step": 21216 }, { "epoch": 90.68, "learning_rate": 8.157085375429533e-05, "loss": 0.0, "step": 21220 }, { "epoch": 90.7, "learning_rate": 8.156181564300667e-05, "loss": 0.0002, "step": 21224 }, { "epoch": 90.72, "learning_rate": 8.155277581699212e-05, "loss": 0.0001, "step": 21228 }, { "epoch": 90.74, "learning_rate": 8.154373427674278e-05, "loss": 0.0, "step": 21232 }, { "epoch": 90.75, "learning_rate": 8.153469102274988e-05, "loss": 0.0002, "step": 21236 }, { "epoch": 90.77, "learning_rate": 8.152564605550476e-05, "loss": 0.0, "step": 21240 }, { "epoch": 90.79, "learning_rate": 8.15165993754988e-05, "loss": 0.0003, "step": 21244 }, { "epoch": 90.8, "learning_rate": 8.150755098322351e-05, "loss": 0.0002, "step": 21248 }, { "epoch": 90.82, "learning_rate": 8.149850087917045e-05, "loss": 0.0002, "step": 21252 }, { "epoch": 90.84, "learning_rate": 8.148944906383136e-05, "loss": 0.0002, "step": 21256 }, { "epoch": 90.85, "learning_rate": 8.148039553769796e-05, "loss": 0.0001, "step": 21260 }, { "epoch": 90.87, "learning_rate": 8.147134030126217e-05, "loss": 0.0001, "step": 21264 }, { "epoch": 90.89, "learning_rate": 8.146228335501591e-05, "loss": 0.0001, "step": 21268 }, { "epoch": 90.91, "learning_rate": 8.145322469945126e-05, "loss": 0.0001, "step": 21272 }, { "epoch": 90.92, "learning_rate": 8.144416433506036e-05, "loss": 0.0002, "step": 21276 }, { "epoch": 90.94, "learning_rate": 8.143510226233546e-05, "loss": 0.0001, "step": 21280 }, { "epoch": 90.96, "learning_rate": 8.14260384817689e-05, "loss": 0.0002, "step": 21284 }, { "epoch": 90.97, "learning_rate": 8.141697299385308e-05, "loss": 0.0001, "step": 21288 }, { "epoch": 90.99, "learning_rate": 8.140790579908056e-05, "loss": 0.0, "step": 21292 }, { "epoch": 91.01, "learning_rate": 8.13988368979439e-05, "loss": 0.0001, "step": 21296 }, { "epoch": 91.03, "learning_rate": 8.138976629093586e-05, "loss": 0.0, "step": 21300 }, { "epoch": 91.04, "learning_rate": 8.13806939785492e-05, "loss": 0.0021, "step": 21304 }, { "epoch": 91.06, "learning_rate": 8.137161996127682e-05, "loss": 0.0011, "step": 21308 }, { "epoch": 91.08, "learning_rate": 8.136254423961172e-05, "loss": 0.0, "step": 21312 }, { "epoch": 91.09, "learning_rate": 8.135346681404696e-05, "loss": 0.0, "step": 21316 }, { "epoch": 91.11, "learning_rate": 8.134438768507572e-05, "loss": 0.0, "step": 21320 }, { "epoch": 91.13, "learning_rate": 8.133530685319125e-05, "loss": 0.0003, "step": 21324 }, { "epoch": 91.15, "learning_rate": 8.13262243188869e-05, "loss": 0.0001, "step": 21328 }, { "epoch": 91.16, "learning_rate": 8.131714008265614e-05, "loss": 0.0001, "step": 21332 }, { "epoch": 91.18, "learning_rate": 8.130805414499247e-05, "loss": 0.0, "step": 21336 }, { "epoch": 91.2, "learning_rate": 8.129896650638956e-05, "loss": 0.0001, "step": 21340 }, { "epoch": 91.21, "learning_rate": 8.128987716734111e-05, "loss": 0.0, "step": 21344 }, { "epoch": 91.23, "learning_rate": 8.128078612834095e-05, "loss": 0.0001, "step": 21348 }, { "epoch": 91.25, "learning_rate": 8.127169338988299e-05, "loss": 0.0, "step": 21352 }, { "epoch": 91.26, "learning_rate": 8.12625989524612e-05, "loss": 0.0, "step": 21356 }, { "epoch": 91.28, "learning_rate": 8.125350281656969e-05, "loss": 0.0001, "step": 21360 }, { "epoch": 91.3, "learning_rate": 8.124440498270267e-05, "loss": 0.0, "step": 21364 }, { "epoch": 91.32, "learning_rate": 8.123530545135439e-05, "loss": 0.0001, "step": 21368 }, { "epoch": 91.33, "learning_rate": 8.122620422301923e-05, "loss": 0.0003, "step": 21372 }, { "epoch": 91.35, "learning_rate": 8.121710129819163e-05, "loss": 0.0001, "step": 21376 }, { "epoch": 91.37, "learning_rate": 8.120799667736618e-05, "loss": 0.0006, "step": 21380 }, { "epoch": 91.38, "learning_rate": 8.11988903610375e-05, "loss": 0.0001, "step": 21384 }, { "epoch": 91.4, "learning_rate": 8.118978234970035e-05, "loss": 0.0, "step": 21388 }, { "epoch": 91.42, "learning_rate": 8.118067264384955e-05, "loss": 0.0003, "step": 21392 }, { "epoch": 91.44, "learning_rate": 8.117156124397999e-05, "loss": 0.0001, "step": 21396 }, { "epoch": 91.45, "learning_rate": 8.116244815058673e-05, "loss": 0.0, "step": 21400 }, { "epoch": 91.47, "learning_rate": 8.115333336416486e-05, "loss": 0.0001, "step": 21404 }, { "epoch": 91.49, "learning_rate": 8.114421688520957e-05, "loss": 0.0001, "step": 21408 }, { "epoch": 91.5, "learning_rate": 8.113509871421618e-05, "loss": 0.0001, "step": 21412 }, { "epoch": 91.52, "learning_rate": 8.112597885168003e-05, "loss": 0.0, "step": 21416 }, { "epoch": 91.54, "learning_rate": 8.111685729809662e-05, "loss": 0.0002, "step": 21420 }, { "epoch": 91.56, "learning_rate": 8.110773405396153e-05, "loss": 0.0001, "step": 21424 }, { "epoch": 91.57, "learning_rate": 8.109860911977039e-05, "loss": 0.0002, "step": 21428 }, { "epoch": 91.59, "learning_rate": 8.108948249601894e-05, "loss": 0.0002, "step": 21432 }, { "epoch": 91.61, "learning_rate": 8.108035418320306e-05, "loss": 0.0001, "step": 21436 }, { "epoch": 91.62, "learning_rate": 8.107122418181865e-05, "loss": 0.0028, "step": 21440 }, { "epoch": 91.64, "learning_rate": 8.106209249236177e-05, "loss": 0.0, "step": 21444 }, { "epoch": 91.66, "learning_rate": 8.105295911532848e-05, "loss": 0.0004, "step": 21448 }, { "epoch": 91.68, "learning_rate": 8.104382405121506e-05, "loss": 0.0002, "step": 21452 }, { "epoch": 91.69, "learning_rate": 8.103468730051774e-05, "loss": 0.0, "step": 21456 }, { "epoch": 91.71, "learning_rate": 8.102554886373296e-05, "loss": 0.0, "step": 21460 }, { "epoch": 91.73, "learning_rate": 8.10164087413572e-05, "loss": 0.0, "step": 21464 }, { "epoch": 91.74, "learning_rate": 8.100726693388703e-05, "loss": 0.0001, "step": 21468 }, { "epoch": 91.76, "learning_rate": 8.099812344181909e-05, "loss": 0.0001, "step": 21472 }, { "epoch": 91.78, "learning_rate": 8.098897826565016e-05, "loss": 0.0001, "step": 21476 }, { "epoch": 91.79, "learning_rate": 8.09798314058771e-05, "loss": 0.0001, "step": 21480 }, { "epoch": 91.81, "learning_rate": 8.097068286299683e-05, "loss": 0.0021, "step": 21484 }, { "epoch": 91.83, "learning_rate": 8.096153263750639e-05, "loss": 0.0003, "step": 21488 }, { "epoch": 91.85, "learning_rate": 8.095238072990289e-05, "loss": 0.0001, "step": 21492 }, { "epoch": 91.86, "learning_rate": 8.094322714068358e-05, "loss": 0.0, "step": 21496 }, { "epoch": 91.88, "learning_rate": 8.093407187034576e-05, "loss": 0.0018, "step": 21500 }, { "epoch": 91.9, "learning_rate": 8.092491491938678e-05, "loss": 0.0007, "step": 21504 }, { "epoch": 91.91, "learning_rate": 8.091575628830419e-05, "loss": 0.0001, "step": 21508 }, { "epoch": 91.93, "learning_rate": 8.090659597759554e-05, "loss": 0.0001, "step": 21512 }, { "epoch": 91.95, "learning_rate": 8.089743398775849e-05, "loss": 0.0, "step": 21516 }, { "epoch": 91.97, "learning_rate": 8.088827031929081e-05, "loss": 0.0, "step": 21520 }, { "epoch": 91.98, "learning_rate": 8.08791049726904e-05, "loss": 0.0001, "step": 21524 }, { "epoch": 92.0, "learning_rate": 8.086993794845514e-05, "loss": 0.0001, "step": 21528 }, { "epoch": 92.02, "learning_rate": 8.08607692470831e-05, "loss": 0.0001, "step": 21532 }, { "epoch": 92.03, "learning_rate": 8.085159886907239e-05, "loss": 0.0, "step": 21536 }, { "epoch": 92.05, "learning_rate": 8.084242681492125e-05, "loss": 0.0001, "step": 21540 }, { "epoch": 92.07, "learning_rate": 8.083325308512799e-05, "loss": 0.0, "step": 21544 }, { "epoch": 92.09, "learning_rate": 8.082407768019099e-05, "loss": 0.0, "step": 21548 }, { "epoch": 92.1, "learning_rate": 8.081490060060875e-05, "loss": 0.0005, "step": 21552 }, { "epoch": 92.12, "learning_rate": 8.080572184687987e-05, "loss": 0.0, "step": 21556 }, { "epoch": 92.14, "learning_rate": 8.0796541419503e-05, "loss": 0.0001, "step": 21560 }, { "epoch": 92.15, "learning_rate": 8.078735931897691e-05, "loss": 0.0004, "step": 21564 }, { "epoch": 92.17, "learning_rate": 8.077817554580045e-05, "loss": 0.0003, "step": 21568 }, { "epoch": 92.19, "learning_rate": 8.076899010047259e-05, "loss": 0.0, "step": 21572 }, { "epoch": 92.21, "learning_rate": 8.075980298349235e-05, "loss": 0.0002, "step": 21576 }, { "epoch": 92.22, "learning_rate": 8.075061419535885e-05, "loss": 0.0001, "step": 21580 }, { "epoch": 92.24, "learning_rate": 8.074142373657135e-05, "loss": 0.0, "step": 21584 }, { "epoch": 92.26, "learning_rate": 8.07322316076291e-05, "loss": 0.0002, "step": 21588 }, { "epoch": 92.27, "learning_rate": 8.072303780903153e-05, "loss": 0.0004, "step": 21592 }, { "epoch": 92.29, "learning_rate": 8.071384234127813e-05, "loss": 0.0, "step": 21596 }, { "epoch": 92.31, "learning_rate": 8.070464520486849e-05, "loss": 0.0018, "step": 21600 }, { "epoch": 92.32, "learning_rate": 8.069544640030227e-05, "loss": 0.0, "step": 21604 }, { "epoch": 92.34, "learning_rate": 8.068624592807924e-05, "loss": 0.0001, "step": 21608 }, { "epoch": 92.36, "learning_rate": 8.067704378869927e-05, "loss": 0.0, "step": 21612 }, { "epoch": 92.38, "learning_rate": 8.066783998266225e-05, "loss": 0.0, "step": 21616 }, { "epoch": 92.39, "learning_rate": 8.065863451046828e-05, "loss": 0.0003, "step": 21620 }, { "epoch": 92.41, "learning_rate": 8.064942737261745e-05, "loss": 0.0004, "step": 21624 }, { "epoch": 92.43, "learning_rate": 8.064021856960998e-05, "loss": 0.0001, "step": 21628 }, { "epoch": 92.44, "learning_rate": 8.063100810194616e-05, "loss": 0.0, "step": 21632 }, { "epoch": 92.46, "learning_rate": 8.062179597012641e-05, "loss": 0.0001, "step": 21636 }, { "epoch": 92.48, "learning_rate": 8.061258217465121e-05, "loss": 0.0003, "step": 21640 }, { "epoch": 92.5, "learning_rate": 8.060336671602116e-05, "loss": 0.0001, "step": 21644 }, { "epoch": 92.51, "learning_rate": 8.059414959473689e-05, "loss": 0.0001, "step": 21648 }, { "epoch": 92.53, "learning_rate": 8.058493081129917e-05, "loss": 0.0012, "step": 21652 }, { "epoch": 92.55, "learning_rate": 8.057571036620886e-05, "loss": 0.0, "step": 21656 }, { "epoch": 92.56, "learning_rate": 8.056648825996688e-05, "loss": 0.0001, "step": 21660 }, { "epoch": 92.58, "learning_rate": 8.05572644930743e-05, "loss": 0.0, "step": 21664 }, { "epoch": 92.6, "learning_rate": 8.054803906603219e-05, "loss": 0.0, "step": 21668 }, { "epoch": 92.62, "learning_rate": 8.053881197934178e-05, "loss": 0.0, "step": 21672 }, { "epoch": 92.63, "learning_rate": 8.052958323350437e-05, "loss": 0.0, "step": 21676 }, { "epoch": 92.65, "learning_rate": 8.052035282902135e-05, "loss": 0.0001, "step": 21680 }, { "epoch": 92.67, "learning_rate": 8.05111207663942e-05, "loss": 0.0, "step": 21684 }, { "epoch": 92.68, "learning_rate": 8.050188704612448e-05, "loss": 0.0002, "step": 21688 }, { "epoch": 92.7, "learning_rate": 8.049265166871387e-05, "loss": 0.0, "step": 21692 }, { "epoch": 92.72, "learning_rate": 8.04834146346641e-05, "loss": 0.0001, "step": 21696 }, { "epoch": 92.74, "learning_rate": 8.047417594447702e-05, "loss": 0.0001, "step": 21700 }, { "epoch": 92.75, "learning_rate": 8.046493559865456e-05, "loss": 0.0, "step": 21704 }, { "epoch": 92.77, "learning_rate": 8.045569359769874e-05, "loss": 0.0001, "step": 21708 }, { "epoch": 92.79, "learning_rate": 8.044644994211165e-05, "loss": 0.0006, "step": 21712 }, { "epoch": 92.8, "learning_rate": 8.043720463239553e-05, "loss": 0.0001, "step": 21716 }, { "epoch": 92.82, "learning_rate": 8.042795766905264e-05, "loss": 0.0004, "step": 21720 }, { "epoch": 92.84, "learning_rate": 8.041870905258538e-05, "loss": 0.0001, "step": 21724 }, { "epoch": 92.85, "learning_rate": 8.040945878349617e-05, "loss": 0.0001, "step": 21728 }, { "epoch": 92.87, "learning_rate": 8.040020686228764e-05, "loss": 0.0001, "step": 21732 }, { "epoch": 92.89, "learning_rate": 8.039095328946239e-05, "loss": 0.0018, "step": 21736 }, { "epoch": 92.91, "learning_rate": 8.038169806552318e-05, "loss": 0.0, "step": 21740 }, { "epoch": 92.92, "learning_rate": 8.037244119097283e-05, "loss": 0.0, "step": 21744 }, { "epoch": 92.94, "learning_rate": 8.036318266631425e-05, "loss": 0.0, "step": 21748 }, { "epoch": 92.96, "learning_rate": 8.035392249205046e-05, "loss": 0.0004, "step": 21752 }, { "epoch": 92.97, "learning_rate": 8.034466066868454e-05, "loss": 0.0026, "step": 21756 }, { "epoch": 92.99, "learning_rate": 8.033539719671972e-05, "loss": 0.0001, "step": 21760 }, { "epoch": 93.01, "learning_rate": 8.032613207665922e-05, "loss": 0.0001, "step": 21764 }, { "epoch": 93.03, "learning_rate": 8.031686530900645e-05, "loss": 0.0001, "step": 21768 }, { "epoch": 93.04, "learning_rate": 8.030759689426484e-05, "loss": 0.0, "step": 21772 }, { "epoch": 93.06, "learning_rate": 8.029832683293795e-05, "loss": 0.0001, "step": 21776 }, { "epoch": 93.08, "learning_rate": 8.028905512552939e-05, "loss": 0.0001, "step": 21780 }, { "epoch": 93.09, "learning_rate": 8.027978177254292e-05, "loss": 0.0001, "step": 21784 }, { "epoch": 93.11, "learning_rate": 8.027050677448231e-05, "loss": 0.0, "step": 21788 }, { "epoch": 93.13, "learning_rate": 8.026123013185152e-05, "loss": 0.0009, "step": 21792 }, { "epoch": 93.15, "learning_rate": 8.025195184515448e-05, "loss": 0.0008, "step": 21796 }, { "epoch": 93.16, "learning_rate": 8.02426719148953e-05, "loss": 0.0003, "step": 21800 }, { "epoch": 93.18, "learning_rate": 8.023339034157817e-05, "loss": 0.0, "step": 21804 }, { "epoch": 93.2, "learning_rate": 8.022410712570732e-05, "loss": 0.0016, "step": 21808 }, { "epoch": 93.21, "learning_rate": 8.021482226778712e-05, "loss": 0.0001, "step": 21812 }, { "epoch": 93.23, "learning_rate": 8.020553576832197e-05, "loss": 0.0001, "step": 21816 }, { "epoch": 93.25, "learning_rate": 8.019624762781643e-05, "loss": 0.0, "step": 21820 }, { "epoch": 93.26, "learning_rate": 8.018695784677513e-05, "loss": 0.002, "step": 21824 }, { "epoch": 93.28, "learning_rate": 8.017766642570276e-05, "loss": 0.0, "step": 21828 }, { "epoch": 93.3, "learning_rate": 8.01683733651041e-05, "loss": 0.0002, "step": 21832 }, { "epoch": 93.32, "learning_rate": 8.015907866548404e-05, "loss": 0.0001, "step": 21836 }, { "epoch": 93.33, "learning_rate": 8.014978232734757e-05, "loss": 0.0, "step": 21840 }, { "epoch": 93.35, "learning_rate": 8.014048435119973e-05, "loss": 0.0002, "step": 21844 }, { "epoch": 93.37, "learning_rate": 8.013118473754568e-05, "loss": 0.0, "step": 21848 }, { "epoch": 93.38, "learning_rate": 8.012188348689068e-05, "loss": 0.0, "step": 21852 }, { "epoch": 93.4, "learning_rate": 8.011258059974002e-05, "loss": 0.0003, "step": 21856 }, { "epoch": 93.42, "learning_rate": 8.010327607659914e-05, "loss": 0.0, "step": 21860 }, { "epoch": 93.44, "learning_rate": 8.009396991797355e-05, "loss": 0.0, "step": 21864 }, { "epoch": 93.45, "learning_rate": 8.008466212436884e-05, "loss": 0.0001, "step": 21868 }, { "epoch": 93.47, "learning_rate": 8.007535269629069e-05, "loss": 0.0002, "step": 21872 }, { "epoch": 93.49, "learning_rate": 8.00660416342449e-05, "loss": 0.0001, "step": 21876 }, { "epoch": 93.5, "learning_rate": 8.005672893873729e-05, "loss": 0.0, "step": 21880 }, { "epoch": 93.52, "learning_rate": 8.004741461027381e-05, "loss": 0.0001, "step": 21884 }, { "epoch": 93.54, "learning_rate": 8.003809864936055e-05, "loss": 0.0001, "step": 21888 }, { "epoch": 93.56, "learning_rate": 8.00287810565036e-05, "loss": 0.0, "step": 21892 }, { "epoch": 93.57, "learning_rate": 8.001946183220916e-05, "loss": 0.0, "step": 21896 }, { "epoch": 93.59, "learning_rate": 8.001014097698359e-05, "loss": 0.0003, "step": 21900 }, { "epoch": 93.61, "learning_rate": 8.000081849133324e-05, "loss": 0.0001, "step": 21904 }, { "epoch": 93.62, "learning_rate": 7.999149437576461e-05, "loss": 0.0, "step": 21908 }, { "epoch": 93.64, "learning_rate": 7.998216863078427e-05, "loss": 0.0, "step": 21912 }, { "epoch": 93.66, "learning_rate": 7.997284125689886e-05, "loss": 0.0, "step": 21916 }, { "epoch": 93.68, "learning_rate": 7.996351225461516e-05, "loss": 0.0001, "step": 21920 }, { "epoch": 93.69, "learning_rate": 7.995418162443999e-05, "loss": 0.0002, "step": 21924 }, { "epoch": 93.71, "learning_rate": 7.994484936688027e-05, "loss": 0.0004, "step": 21928 }, { "epoch": 93.73, "learning_rate": 7.993551548244301e-05, "loss": 0.0002, "step": 21932 }, { "epoch": 93.74, "learning_rate": 7.992617997163533e-05, "loss": 0.0, "step": 21936 }, { "epoch": 93.76, "learning_rate": 7.991684283496442e-05, "loss": 0.0001, "step": 21940 }, { "epoch": 93.78, "learning_rate": 7.990750407293754e-05, "loss": 0.0005, "step": 21944 }, { "epoch": 93.79, "learning_rate": 7.989816368606207e-05, "loss": 0.0001, "step": 21948 }, { "epoch": 93.81, "learning_rate": 7.988882167484546e-05, "loss": 0.0001, "step": 21952 }, { "epoch": 93.83, "learning_rate": 7.987947803979525e-05, "loss": 0.0001, "step": 21956 }, { "epoch": 93.85, "learning_rate": 7.987013278141909e-05, "loss": 0.0006, "step": 21960 }, { "epoch": 93.86, "learning_rate": 7.986078590022468e-05, "loss": 0.0, "step": 21964 }, { "epoch": 93.88, "learning_rate": 7.985143739671983e-05, "loss": 0.0, "step": 21968 }, { "epoch": 93.9, "learning_rate": 7.984208727141245e-05, "loss": 0.0, "step": 21972 }, { "epoch": 93.91, "learning_rate": 7.983273552481048e-05, "loss": 0.0, "step": 21976 }, { "epoch": 93.93, "learning_rate": 7.982338215742207e-05, "loss": 0.0001, "step": 21980 }, { "epoch": 93.95, "learning_rate": 7.981402716975532e-05, "loss": 0.0, "step": 21984 }, { "epoch": 93.97, "learning_rate": 7.980467056231848e-05, "loss": 0.0002, "step": 21988 }, { "epoch": 93.98, "learning_rate": 7.979531233561992e-05, "loss": 0.0002, "step": 21992 }, { "epoch": 94.0, "learning_rate": 7.978595249016803e-05, "loss": 0.0003, "step": 21996 }, { "epoch": 94.02, "learning_rate": 7.977659102647137e-05, "loss": 0.0001, "step": 22000 }, { "epoch": 94.02, "eval_exact_match": 0.5135135135135135, "eval_loss": 0.9283977746963501, "eval_runtime": 174.6852, "eval_samples_per_second": 5.507, "step": 22000 }, { "epoch": 94.03, "learning_rate": 7.976722794503848e-05, "loss": 0.0, "step": 22004 }, { "epoch": 94.05, "learning_rate": 7.975786324637808e-05, "loss": 0.0, "step": 22008 }, { "epoch": 94.07, "learning_rate": 7.974849693099895e-05, "loss": 0.0001, "step": 22012 }, { "epoch": 94.09, "learning_rate": 7.973912899940994e-05, "loss": 0.0008, "step": 22016 }, { "epoch": 94.1, "learning_rate": 7.972975945212e-05, "loss": 0.0001, "step": 22020 }, { "epoch": 94.12, "learning_rate": 7.97203882896382e-05, "loss": 0.0018, "step": 22024 }, { "epoch": 94.14, "learning_rate": 7.971101551247361e-05, "loss": 0.0001, "step": 22028 }, { "epoch": 94.15, "learning_rate": 7.970164112113551e-05, "loss": 0.0, "step": 22032 }, { "epoch": 94.17, "learning_rate": 7.969226511613314e-05, "loss": 0.0, "step": 22036 }, { "epoch": 94.19, "learning_rate": 7.968288749797596e-05, "loss": 0.0001, "step": 22040 }, { "epoch": 94.21, "learning_rate": 7.967350826717338e-05, "loss": 0.0004, "step": 22044 }, { "epoch": 94.22, "learning_rate": 7.966412742423501e-05, "loss": 0.0, "step": 22048 }, { "epoch": 94.24, "learning_rate": 7.965474496967047e-05, "loss": 0.0001, "step": 22052 }, { "epoch": 94.26, "learning_rate": 7.964536090398953e-05, "loss": 0.0001, "step": 22056 }, { "epoch": 94.27, "learning_rate": 7.963597522770201e-05, "loss": 0.0001, "step": 22060 }, { "epoch": 94.29, "learning_rate": 7.962658794131781e-05, "loss": 0.0001, "step": 22064 }, { "epoch": 94.31, "learning_rate": 7.961719904534694e-05, "loss": 0.0001, "step": 22068 }, { "epoch": 94.32, "learning_rate": 7.960780854029952e-05, "loss": 0.0001, "step": 22072 }, { "epoch": 94.34, "learning_rate": 7.959841642668569e-05, "loss": 0.0, "step": 22076 }, { "epoch": 94.36, "learning_rate": 7.958902270501571e-05, "loss": 0.0002, "step": 22080 }, { "epoch": 94.38, "learning_rate": 7.957962737579998e-05, "loss": 0.0, "step": 22084 }, { "epoch": 94.39, "learning_rate": 7.95702304395489e-05, "loss": 0.0001, "step": 22088 }, { "epoch": 94.41, "learning_rate": 7.956083189677302e-05, "loss": 0.0002, "step": 22092 }, { "epoch": 94.43, "learning_rate": 7.955143174798292e-05, "loss": 0.0, "step": 22096 }, { "epoch": 94.44, "learning_rate": 7.954202999368934e-05, "loss": 0.0006, "step": 22100 }, { "epoch": 94.46, "learning_rate": 7.953262663440306e-05, "loss": 0.0, "step": 22104 }, { "epoch": 94.48, "learning_rate": 7.952322167063492e-05, "loss": 0.0, "step": 22108 }, { "epoch": 94.5, "learning_rate": 7.951381510289596e-05, "loss": 0.0, "step": 22112 }, { "epoch": 94.51, "learning_rate": 7.950440693169714e-05, "loss": 0.0015, "step": 22116 }, { "epoch": 94.53, "learning_rate": 7.949499715754967e-05, "loss": 0.0001, "step": 22120 }, { "epoch": 94.55, "learning_rate": 7.948558578096474e-05, "loss": 0.0, "step": 22124 }, { "epoch": 94.56, "learning_rate": 7.947617280245366e-05, "loss": 0.0008, "step": 22128 }, { "epoch": 94.58, "learning_rate": 7.946675822252786e-05, "loss": 0.0003, "step": 22132 }, { "epoch": 94.6, "learning_rate": 7.945734204169879e-05, "loss": 0.0, "step": 22136 }, { "epoch": 94.62, "learning_rate": 7.944792426047802e-05, "loss": 0.0001, "step": 22140 }, { "epoch": 94.63, "learning_rate": 7.943850487937725e-05, "loss": 0.0, "step": 22144 }, { "epoch": 94.65, "learning_rate": 7.94290838989082e-05, "loss": 0.0, "step": 22148 }, { "epoch": 94.67, "learning_rate": 7.941966131958272e-05, "loss": 0.0012, "step": 22152 }, { "epoch": 94.68, "learning_rate": 7.941023714191269e-05, "loss": 0.0003, "step": 22156 }, { "epoch": 94.7, "learning_rate": 7.940081136641015e-05, "loss": 0.0, "step": 22160 }, { "epoch": 94.72, "learning_rate": 7.93913839935872e-05, "loss": 0.0002, "step": 22164 }, { "epoch": 94.74, "learning_rate": 7.9381955023956e-05, "loss": 0.0001, "step": 22168 }, { "epoch": 94.75, "learning_rate": 7.937252445802884e-05, "loss": 0.0007, "step": 22172 }, { "epoch": 94.77, "learning_rate": 7.936309229631804e-05, "loss": 0.0002, "step": 22176 }, { "epoch": 94.79, "learning_rate": 7.93536585393361e-05, "loss": 0.0006, "step": 22180 }, { "epoch": 94.8, "learning_rate": 7.934422318759547e-05, "loss": 0.0, "step": 22184 }, { "epoch": 94.82, "learning_rate": 7.933478624160884e-05, "loss": 0.0003, "step": 22188 }, { "epoch": 94.84, "learning_rate": 7.932534770188886e-05, "loss": 0.0001, "step": 22192 }, { "epoch": 94.85, "learning_rate": 7.931590756894833e-05, "loss": 0.0, "step": 22196 }, { "epoch": 94.87, "learning_rate": 7.930646584330012e-05, "loss": 0.0003, "step": 22200 }, { "epoch": 94.89, "learning_rate": 7.929702252545723e-05, "loss": 0.0013, "step": 22204 }, { "epoch": 94.91, "learning_rate": 7.928757761593265e-05, "loss": 0.0002, "step": 22208 }, { "epoch": 94.92, "learning_rate": 7.927813111523955e-05, "loss": 0.0008, "step": 22212 }, { "epoch": 94.94, "learning_rate": 7.926868302389114e-05, "loss": 0.0, "step": 22216 }, { "epoch": 94.96, "learning_rate": 7.925923334240072e-05, "loss": 0.0001, "step": 22220 }, { "epoch": 94.97, "learning_rate": 7.92497820712817e-05, "loss": 0.0, "step": 22224 }, { "epoch": 94.99, "learning_rate": 7.924032921104754e-05, "loss": 0.0002, "step": 22228 }, { "epoch": 95.01, "learning_rate": 7.923087476221182e-05, "loss": 0.0001, "step": 22232 }, { "epoch": 95.03, "learning_rate": 7.922141872528817e-05, "loss": 0.0, "step": 22236 }, { "epoch": 95.04, "learning_rate": 7.921196110079037e-05, "loss": 0.0001, "step": 22240 }, { "epoch": 95.06, "learning_rate": 7.92025018892322e-05, "loss": 0.0001, "step": 22244 }, { "epoch": 95.08, "learning_rate": 7.91930410911276e-05, "loss": 0.0, "step": 22248 }, { "epoch": 95.09, "learning_rate": 7.918357870699056e-05, "loss": 0.0028, "step": 22252 }, { "epoch": 95.11, "learning_rate": 7.917411473733514e-05, "loss": 0.0, "step": 22256 }, { "epoch": 95.13, "learning_rate": 7.916464918267554e-05, "loss": 0.0, "step": 22260 }, { "epoch": 95.15, "learning_rate": 7.915518204352602e-05, "loss": 0.0004, "step": 22264 }, { "epoch": 95.16, "learning_rate": 7.914571332040088e-05, "loss": 0.0006, "step": 22268 }, { "epoch": 95.18, "learning_rate": 7.913624301381459e-05, "loss": 0.0, "step": 22272 }, { "epoch": 95.2, "learning_rate": 7.912677112428164e-05, "loss": 0.0002, "step": 22276 }, { "epoch": 95.21, "learning_rate": 7.911729765231665e-05, "loss": 0.0, "step": 22280 }, { "epoch": 95.23, "learning_rate": 7.910782259843427e-05, "loss": 0.0016, "step": 22284 }, { "epoch": 95.25, "learning_rate": 7.909834596314932e-05, "loss": 0.0005, "step": 22288 }, { "epoch": 95.26, "learning_rate": 7.908886774697661e-05, "loss": 0.0003, "step": 22292 }, { "epoch": 95.28, "learning_rate": 7.90793879504311e-05, "loss": 0.0002, "step": 22296 }, { "epoch": 95.3, "learning_rate": 7.906990657402783e-05, "loss": 0.0, "step": 22300 }, { "epoch": 95.32, "learning_rate": 7.90604236182819e-05, "loss": 0.0001, "step": 22304 }, { "epoch": 95.33, "learning_rate": 7.905093908370852e-05, "loss": 0.0012, "step": 22308 }, { "epoch": 95.35, "learning_rate": 7.904145297082299e-05, "loss": 0.0, "step": 22312 }, { "epoch": 95.37, "learning_rate": 7.903196528014065e-05, "loss": 0.0, "step": 22316 }, { "epoch": 95.38, "learning_rate": 7.902247601217699e-05, "loss": 0.0, "step": 22320 }, { "epoch": 95.4, "learning_rate": 7.901298516744753e-05, "loss": 0.0001, "step": 22324 }, { "epoch": 95.42, "learning_rate": 7.900349274646791e-05, "loss": 0.0, "step": 22328 }, { "epoch": 95.44, "learning_rate": 7.899399874975383e-05, "loss": 0.0002, "step": 22332 }, { "epoch": 95.45, "learning_rate": 7.898450317782112e-05, "loss": 0.0, "step": 22336 }, { "epoch": 95.47, "learning_rate": 7.897500603118565e-05, "loss": 0.0, "step": 22340 }, { "epoch": 95.49, "learning_rate": 7.896550731036338e-05, "loss": 0.0, "step": 22344 }, { "epoch": 95.5, "learning_rate": 7.89560070158704e-05, "loss": 0.0, "step": 22348 }, { "epoch": 95.52, "learning_rate": 7.894650514822281e-05, "loss": 0.0, "step": 22352 }, { "epoch": 95.54, "learning_rate": 7.893700170793686e-05, "loss": 0.0, "step": 22356 }, { "epoch": 95.56, "learning_rate": 7.892749669552888e-05, "loss": 0.0, "step": 22360 }, { "epoch": 95.57, "learning_rate": 7.891799011151523e-05, "loss": 0.0011, "step": 22364 }, { "epoch": 95.59, "learning_rate": 7.890848195641243e-05, "loss": 0.0, "step": 22368 }, { "epoch": 95.61, "learning_rate": 7.889897223073703e-05, "loss": 0.0004, "step": 22372 }, { "epoch": 95.62, "learning_rate": 7.888946093500568e-05, "loss": 0.0002, "step": 22376 }, { "epoch": 95.64, "learning_rate": 7.887994806973516e-05, "loss": 0.0, "step": 22380 }, { "epoch": 95.66, "learning_rate": 7.887043363544225e-05, "loss": 0.0, "step": 22384 }, { "epoch": 95.68, "learning_rate": 7.886091763264388e-05, "loss": 0.0, "step": 22388 }, { "epoch": 95.69, "learning_rate": 7.885140006185705e-05, "loss": 0.0, "step": 22392 }, { "epoch": 95.71, "learning_rate": 7.884188092359881e-05, "loss": 0.0001, "step": 22396 }, { "epoch": 95.73, "learning_rate": 7.883236021838638e-05, "loss": 0.0, "step": 22400 }, { "epoch": 95.74, "learning_rate": 7.882283794673698e-05, "loss": 0.0004, "step": 22404 }, { "epoch": 95.76, "learning_rate": 7.881331410916795e-05, "loss": 0.0, "step": 22408 }, { "epoch": 95.78, "learning_rate": 7.880378870619672e-05, "loss": 0.0001, "step": 22412 }, { "epoch": 95.79, "learning_rate": 7.879426173834076e-05, "loss": 0.0006, "step": 22416 }, { "epoch": 95.81, "learning_rate": 7.878473320611771e-05, "loss": 0.0, "step": 22420 }, { "epoch": 95.83, "learning_rate": 7.877520311004523e-05, "loss": 0.0001, "step": 22424 }, { "epoch": 95.85, "learning_rate": 7.876567145064108e-05, "loss": 0.0005, "step": 22428 }, { "epoch": 95.86, "learning_rate": 7.875613822842311e-05, "loss": 0.0, "step": 22432 }, { "epoch": 95.88, "learning_rate": 7.874660344390923e-05, "loss": 0.0001, "step": 22436 }, { "epoch": 95.9, "learning_rate": 7.873706709761748e-05, "loss": 0.0, "step": 22440 }, { "epoch": 95.91, "learning_rate": 7.872752919006596e-05, "loss": 0.0001, "step": 22444 }, { "epoch": 95.93, "learning_rate": 7.871798972177287e-05, "loss": 0.0, "step": 22448 }, { "epoch": 95.95, "learning_rate": 7.870844869325644e-05, "loss": 0.0001, "step": 22452 }, { "epoch": 95.97, "learning_rate": 7.869890610503504e-05, "loss": 0.0, "step": 22456 }, { "epoch": 95.98, "learning_rate": 7.868936195762714e-05, "loss": 0.0007, "step": 22460 }, { "epoch": 96.0, "learning_rate": 7.867981625155124e-05, "loss": 0.0, "step": 22464 }, { "epoch": 96.02, "learning_rate": 7.867026898732595e-05, "loss": 0.001, "step": 22468 }, { "epoch": 96.03, "learning_rate": 7.866072016546997e-05, "loss": 0.0001, "step": 22472 }, { "epoch": 96.05, "learning_rate": 7.865116978650208e-05, "loss": 0.0, "step": 22476 }, { "epoch": 96.07, "learning_rate": 7.864161785094114e-05, "loss": 0.0001, "step": 22480 }, { "epoch": 96.09, "learning_rate": 7.86320643593061e-05, "loss": 0.0004, "step": 22484 }, { "epoch": 96.1, "learning_rate": 7.8622509312116e-05, "loss": 0.0006, "step": 22488 }, { "epoch": 96.12, "learning_rate": 7.861295270988994e-05, "loss": 0.0, "step": 22492 }, { "epoch": 96.14, "learning_rate": 7.860339455314713e-05, "loss": 0.0001, "step": 22496 }, { "epoch": 96.15, "learning_rate": 7.859383484240687e-05, "loss": 0.0007, "step": 22500 }, { "epoch": 96.17, "learning_rate": 7.858427357818851e-05, "loss": 0.0, "step": 22504 }, { "epoch": 96.19, "learning_rate": 7.857471076101153e-05, "loss": 0.0, "step": 22508 }, { "epoch": 96.21, "learning_rate": 7.856514639139546e-05, "loss": 0.0, "step": 22512 }, { "epoch": 96.22, "learning_rate": 7.855558046985986e-05, "loss": 0.0002, "step": 22516 }, { "epoch": 96.24, "learning_rate": 7.854601299692454e-05, "loss": 0.0008, "step": 22520 }, { "epoch": 96.26, "learning_rate": 7.853644397310926e-05, "loss": 0.0, "step": 22524 }, { "epoch": 96.27, "learning_rate": 7.852687339893386e-05, "loss": 0.0001, "step": 22528 }, { "epoch": 96.29, "learning_rate": 7.851730127491834e-05, "loss": 0.0, "step": 22532 }, { "epoch": 96.31, "learning_rate": 7.850772760158273e-05, "loss": 0.0, "step": 22536 }, { "epoch": 96.32, "learning_rate": 7.849815237944716e-05, "loss": 0.0001, "step": 22540 }, { "epoch": 96.34, "learning_rate": 7.848857560903183e-05, "loss": 0.0001, "step": 22544 }, { "epoch": 96.36, "learning_rate": 7.847899729085706e-05, "loss": 0.0, "step": 22548 }, { "epoch": 96.38, "learning_rate": 7.846941742544323e-05, "loss": 0.0002, "step": 22552 }, { "epoch": 96.39, "learning_rate": 7.84598360133108e-05, "loss": 0.0006, "step": 22556 }, { "epoch": 96.41, "learning_rate": 7.84502530549803e-05, "loss": 0.0006, "step": 22560 }, { "epoch": 96.43, "learning_rate": 7.844066855097241e-05, "loss": 0.0014, "step": 22564 }, { "epoch": 96.44, "learning_rate": 7.84310825018078e-05, "loss": 0.0001, "step": 22568 }, { "epoch": 96.46, "learning_rate": 7.84214949080073e-05, "loss": 0.0, "step": 22572 }, { "epoch": 96.48, "learning_rate": 7.841190577009179e-05, "loss": 0.0001, "step": 22576 }, { "epoch": 96.5, "learning_rate": 7.840231508858222e-05, "loss": 0.0001, "step": 22580 }, { "epoch": 96.51, "learning_rate": 7.839272286399967e-05, "loss": 0.0002, "step": 22584 }, { "epoch": 96.53, "learning_rate": 7.838312909686528e-05, "loss": 0.0, "step": 22588 }, { "epoch": 96.55, "learning_rate": 7.837353378770024e-05, "loss": 0.0001, "step": 22592 }, { "epoch": 96.56, "learning_rate": 7.836393693702588e-05, "loss": 0.0001, "step": 22596 }, { "epoch": 96.58, "learning_rate": 7.835433854536359e-05, "loss": 0.0001, "step": 22600 }, { "epoch": 96.6, "learning_rate": 7.834473861323483e-05, "loss": 0.0, "step": 22604 }, { "epoch": 96.62, "learning_rate": 7.833513714116117e-05, "loss": 0.0001, "step": 22608 }, { "epoch": 96.63, "learning_rate": 7.832553412966421e-05, "loss": 0.0005, "step": 22612 }, { "epoch": 96.65, "learning_rate": 7.831592957926572e-05, "loss": 0.0026, "step": 22616 }, { "epoch": 96.67, "learning_rate": 7.830632349048748e-05, "loss": 0.0, "step": 22620 }, { "epoch": 96.68, "learning_rate": 7.829671586385142e-05, "loss": 0.0, "step": 22624 }, { "epoch": 96.7, "learning_rate": 7.828710669987946e-05, "loss": 0.0002, "step": 22628 }, { "epoch": 96.72, "learning_rate": 7.827749599909368e-05, "loss": 0.0002, "step": 22632 }, { "epoch": 96.74, "learning_rate": 7.826788376201622e-05, "loss": 0.0007, "step": 22636 }, { "epoch": 96.75, "learning_rate": 7.825826998916932e-05, "loss": 0.0001, "step": 22640 }, { "epoch": 96.77, "learning_rate": 7.824865468107528e-05, "loss": 0.0002, "step": 22644 }, { "epoch": 96.79, "learning_rate": 7.823903783825646e-05, "loss": 0.0002, "step": 22648 }, { "epoch": 96.8, "learning_rate": 7.822941946123538e-05, "loss": 0.0001, "step": 22652 }, { "epoch": 96.82, "learning_rate": 7.821979955053458e-05, "loss": 0.0, "step": 22656 }, { "epoch": 96.84, "learning_rate": 7.821017810667669e-05, "loss": 0.0002, "step": 22660 }, { "epoch": 96.85, "learning_rate": 7.820055513018445e-05, "loss": 0.0, "step": 22664 }, { "epoch": 96.87, "learning_rate": 7.819093062158067e-05, "loss": 0.0, "step": 22668 }, { "epoch": 96.89, "learning_rate": 7.818130458138823e-05, "loss": 0.0001, "step": 22672 }, { "epoch": 96.91, "learning_rate": 7.81716770101301e-05, "loss": 0.0001, "step": 22676 }, { "epoch": 96.92, "learning_rate": 7.816204790832935e-05, "loss": 0.0001, "step": 22680 }, { "epoch": 96.94, "learning_rate": 7.81524172765091e-05, "loss": 0.0004, "step": 22684 }, { "epoch": 96.96, "learning_rate": 7.814278511519262e-05, "loss": 0.0001, "step": 22688 }, { "epoch": 96.97, "learning_rate": 7.813315142490318e-05, "loss": 0.0002, "step": 22692 }, { "epoch": 96.99, "learning_rate": 7.812351620616416e-05, "loss": 0.0, "step": 22696 }, { "epoch": 97.01, "learning_rate": 7.811387945949906e-05, "loss": 0.0002, "step": 22700 }, { "epoch": 97.03, "learning_rate": 7.810424118543143e-05, "loss": 0.0001, "step": 22704 }, { "epoch": 97.04, "learning_rate": 7.809460138448491e-05, "loss": 0.0, "step": 22708 }, { "epoch": 97.06, "learning_rate": 7.808496005718323e-05, "loss": 0.0, "step": 22712 }, { "epoch": 97.08, "learning_rate": 7.807531720405015e-05, "loss": 0.0002, "step": 22716 }, { "epoch": 97.09, "learning_rate": 7.806567282560959e-05, "loss": 0.0001, "step": 22720 }, { "epoch": 97.11, "learning_rate": 7.805602692238554e-05, "loss": 0.0001, "step": 22724 }, { "epoch": 97.13, "learning_rate": 7.804637949490203e-05, "loss": 0.0002, "step": 22728 }, { "epoch": 97.15, "learning_rate": 7.80367305436832e-05, "loss": 0.0001, "step": 22732 }, { "epoch": 97.16, "learning_rate": 7.802708006925326e-05, "loss": 0.0, "step": 22736 }, { "epoch": 97.18, "learning_rate": 7.801742807213652e-05, "loss": 0.0005, "step": 22740 }, { "epoch": 97.2, "learning_rate": 7.80077745528574e-05, "loss": 0.0, "step": 22744 }, { "epoch": 97.21, "learning_rate": 7.79981195119403e-05, "loss": 0.0, "step": 22748 }, { "epoch": 97.23, "learning_rate": 7.79884629499098e-05, "loss": 0.0002, "step": 22752 }, { "epoch": 97.25, "learning_rate": 7.797880486729055e-05, "loss": 0.0, "step": 22756 }, { "epoch": 97.26, "learning_rate": 7.796914526460725e-05, "loss": 0.0002, "step": 22760 }, { "epoch": 97.28, "learning_rate": 7.79594841423847e-05, "loss": 0.0, "step": 22764 }, { "epoch": 97.3, "learning_rate": 7.794982150114778e-05, "loss": 0.0006, "step": 22768 }, { "epoch": 97.32, "learning_rate": 7.794015734142144e-05, "loss": 0.0, "step": 22772 }, { "epoch": 97.33, "learning_rate": 7.793049166373075e-05, "loss": 0.0008, "step": 22776 }, { "epoch": 97.35, "learning_rate": 7.792082446860083e-05, "loss": 0.0005, "step": 22780 }, { "epoch": 97.37, "learning_rate": 7.791115575655687e-05, "loss": 0.0001, "step": 22784 }, { "epoch": 97.38, "learning_rate": 7.790148552812417e-05, "loss": 0.0, "step": 22788 }, { "epoch": 97.4, "learning_rate": 7.789181378382816e-05, "loss": 0.0001, "step": 22792 }, { "epoch": 97.42, "learning_rate": 7.78821405241942e-05, "loss": 0.0001, "step": 22796 }, { "epoch": 97.44, "learning_rate": 7.78724657497479e-05, "loss": 0.0007, "step": 22800 }, { "epoch": 97.45, "learning_rate": 7.786278946101487e-05, "loss": 0.0, "step": 22804 }, { "epoch": 97.47, "learning_rate": 7.785311165852078e-05, "loss": 0.0, "step": 22808 }, { "epoch": 97.49, "learning_rate": 7.784343234279147e-05, "loss": 0.0, "step": 22812 }, { "epoch": 97.5, "learning_rate": 7.783375151435277e-05, "loss": 0.0005, "step": 22816 }, { "epoch": 97.52, "learning_rate": 7.782406917373065e-05, "loss": 0.0, "step": 22820 }, { "epoch": 97.54, "learning_rate": 7.781438532145114e-05, "loss": 0.0001, "step": 22824 }, { "epoch": 97.56, "learning_rate": 7.780469995804034e-05, "loss": 0.0002, "step": 22828 }, { "epoch": 97.57, "learning_rate": 7.779501308402446e-05, "loss": 0.0015, "step": 22832 }, { "epoch": 97.59, "learning_rate": 7.778532469992977e-05, "loss": 0.0, "step": 22836 }, { "epoch": 97.61, "learning_rate": 7.777563480628265e-05, "loss": 0.0004, "step": 22840 }, { "epoch": 97.62, "learning_rate": 7.776594340360954e-05, "loss": 0.0001, "step": 22844 }, { "epoch": 97.64, "learning_rate": 7.775625049243695e-05, "loss": 0.0002, "step": 22848 }, { "epoch": 97.66, "learning_rate": 7.77465560732915e-05, "loss": 0.0, "step": 22852 }, { "epoch": 97.68, "learning_rate": 7.773686014669988e-05, "loss": 0.0, "step": 22856 }, { "epoch": 97.69, "learning_rate": 7.772716271318884e-05, "loss": 0.0, "step": 22860 }, { "epoch": 97.71, "learning_rate": 7.771746377328527e-05, "loss": 0.0001, "step": 22864 }, { "epoch": 97.73, "learning_rate": 7.770776332751606e-05, "loss": 0.0002, "step": 22868 }, { "epoch": 97.74, "learning_rate": 7.769806137640827e-05, "loss": 0.0001, "step": 22872 }, { "epoch": 97.76, "learning_rate": 7.768835792048896e-05, "loss": 0.0, "step": 22876 }, { "epoch": 97.78, "learning_rate": 7.767865296028535e-05, "loss": 0.0, "step": 22880 }, { "epoch": 97.79, "learning_rate": 7.766894649632468e-05, "loss": 0.0001, "step": 22884 }, { "epoch": 97.81, "learning_rate": 7.765923852913431e-05, "loss": 0.0005, "step": 22888 }, { "epoch": 97.83, "learning_rate": 7.764952905924162e-05, "loss": 0.0001, "step": 22892 }, { "epoch": 97.85, "learning_rate": 7.763981808717416e-05, "loss": 0.0001, "step": 22896 }, { "epoch": 97.86, "learning_rate": 7.763010561345952e-05, "loss": 0.0, "step": 22900 }, { "epoch": 97.88, "learning_rate": 7.762039163862533e-05, "loss": 0.0003, "step": 22904 }, { "epoch": 97.9, "learning_rate": 7.761067616319939e-05, "loss": 0.002, "step": 22908 }, { "epoch": 97.91, "learning_rate": 7.760095918770951e-05, "loss": 0.0007, "step": 22912 }, { "epoch": 97.93, "learning_rate": 7.75912407126836e-05, "loss": 0.0, "step": 22916 }, { "epoch": 97.95, "learning_rate": 7.758152073864967e-05, "loss": 0.0, "step": 22920 }, { "epoch": 97.97, "learning_rate": 7.757179926613579e-05, "loss": 0.0001, "step": 22924 }, { "epoch": 97.98, "learning_rate": 7.756207629567011e-05, "loss": 0.0001, "step": 22928 }, { "epoch": 98.0, "learning_rate": 7.75523518277809e-05, "loss": 0.0, "step": 22932 }, { "epoch": 98.02, "learning_rate": 7.754262586299645e-05, "loss": 0.0001, "step": 22936 }, { "epoch": 98.03, "learning_rate": 7.753289840184518e-05, "loss": 0.0, "step": 22940 }, { "epoch": 98.05, "learning_rate": 7.752316944485556e-05, "loss": 0.0002, "step": 22944 }, { "epoch": 98.07, "learning_rate": 7.751343899255618e-05, "loss": 0.0, "step": 22948 }, { "epoch": 98.09, "learning_rate": 7.750370704547567e-05, "loss": 0.0001, "step": 22952 }, { "epoch": 98.1, "learning_rate": 7.749397360414274e-05, "loss": 0.0001, "step": 22956 }, { "epoch": 98.12, "learning_rate": 7.748423866908625e-05, "loss": 0.0002, "step": 22960 }, { "epoch": 98.14, "learning_rate": 7.747450224083505e-05, "loss": 0.0001, "step": 22964 }, { "epoch": 98.15, "learning_rate": 7.746476431991811e-05, "loss": 0.0, "step": 22968 }, { "epoch": 98.17, "learning_rate": 7.745502490686452e-05, "loss": 0.0, "step": 22972 }, { "epoch": 98.19, "learning_rate": 7.744528400220336e-05, "loss": 0.0, "step": 22976 }, { "epoch": 98.21, "learning_rate": 7.74355416064639e-05, "loss": 0.0, "step": 22980 }, { "epoch": 98.22, "learning_rate": 7.74257977201754e-05, "loss": 0.0, "step": 22984 }, { "epoch": 98.24, "learning_rate": 7.741605234386724e-05, "loss": 0.0, "step": 22988 }, { "epoch": 98.26, "learning_rate": 7.740630547806889e-05, "loss": 0.0001, "step": 22992 }, { "epoch": 98.27, "learning_rate": 7.739655712330989e-05, "loss": 0.0001, "step": 22996 }, { "epoch": 98.29, "learning_rate": 7.738680728011983e-05, "loss": 0.0001, "step": 23000 }, { "epoch": 98.29, "eval_exact_match": 0.5093555093555093, "eval_loss": 0.9610804915428162, "eval_runtime": 137.9706, "eval_samples_per_second": 6.973, "step": 23000 }, { "epoch": 98.31, "learning_rate": 7.737705594902847e-05, "loss": 0.0007, "step": 23004 }, { "epoch": 98.32, "learning_rate": 7.736730313056552e-05, "loss": 0.0, "step": 23008 }, { "epoch": 98.34, "learning_rate": 7.73575488252609e-05, "loss": 0.0001, "step": 23012 }, { "epoch": 98.36, "learning_rate": 7.73477930336445e-05, "loss": 0.0001, "step": 23016 }, { "epoch": 98.38, "learning_rate": 7.73380357562464e-05, "loss": 0.0001, "step": 23020 }, { "epoch": 98.39, "learning_rate": 7.732827699359668e-05, "loss": 0.0, "step": 23024 }, { "epoch": 98.41, "learning_rate": 7.73185167462255e-05, "loss": 0.0, "step": 23028 }, { "epoch": 98.43, "learning_rate": 7.730875501466317e-05, "loss": 0.0001, "step": 23032 }, { "epoch": 98.44, "learning_rate": 7.729899179944e-05, "loss": 0.0, "step": 23036 }, { "epoch": 98.46, "learning_rate": 7.728922710108644e-05, "loss": 0.0001, "step": 23040 }, { "epoch": 98.48, "learning_rate": 7.727946092013298e-05, "loss": 0.0, "step": 23044 }, { "epoch": 98.5, "learning_rate": 7.726969325711023e-05, "loss": 0.0, "step": 23048 }, { "epoch": 98.51, "learning_rate": 7.725992411254885e-05, "loss": 0.0, "step": 23052 }, { "epoch": 98.53, "learning_rate": 7.725015348697956e-05, "loss": 0.0001, "step": 23056 }, { "epoch": 98.55, "learning_rate": 7.724038138093324e-05, "loss": 0.0001, "step": 23060 }, { "epoch": 98.56, "learning_rate": 7.723060779494075e-05, "loss": 0.0002, "step": 23064 }, { "epoch": 98.58, "learning_rate": 7.722083272953314e-05, "loss": 0.0, "step": 23068 }, { "epoch": 98.6, "learning_rate": 7.721105618524141e-05, "loss": 0.0, "step": 23072 }, { "epoch": 98.62, "learning_rate": 7.720127816259677e-05, "loss": 0.0002, "step": 23076 }, { "epoch": 98.63, "learning_rate": 7.719149866213041e-05, "loss": 0.0, "step": 23080 }, { "epoch": 98.65, "learning_rate": 7.718171768437367e-05, "loss": 0.0001, "step": 23084 }, { "epoch": 98.67, "learning_rate": 7.717193522985794e-05, "loss": 0.0003, "step": 23088 }, { "epoch": 98.68, "learning_rate": 7.716215129911467e-05, "loss": 0.0001, "step": 23092 }, { "epoch": 98.7, "learning_rate": 7.715236589267542e-05, "loss": 0.0001, "step": 23096 }, { "epoch": 98.72, "learning_rate": 7.714257901107185e-05, "loss": 0.0003, "step": 23100 }, { "epoch": 98.74, "learning_rate": 7.713279065483565e-05, "loss": 0.0001, "step": 23104 }, { "epoch": 98.75, "learning_rate": 7.712300082449862e-05, "loss": 0.0, "step": 23108 }, { "epoch": 98.77, "learning_rate": 7.71132095205926e-05, "loss": 0.0028, "step": 23112 }, { "epoch": 98.79, "learning_rate": 7.710341674364958e-05, "loss": 0.0004, "step": 23116 }, { "epoch": 98.8, "learning_rate": 7.70936224942016e-05, "loss": 0.0001, "step": 23120 }, { "epoch": 98.82, "learning_rate": 7.708382677278074e-05, "loss": 0.0003, "step": 23124 }, { "epoch": 98.84, "learning_rate": 7.707402957991923e-05, "loss": 0.0, "step": 23128 }, { "epoch": 98.85, "learning_rate": 7.70642309161493e-05, "loss": 0.0, "step": 23132 }, { "epoch": 98.87, "learning_rate": 7.705443078200333e-05, "loss": 0.0001, "step": 23136 }, { "epoch": 98.89, "learning_rate": 7.704462917801376e-05, "loss": 0.0001, "step": 23140 }, { "epoch": 98.91, "learning_rate": 7.703482610471309e-05, "loss": 0.0, "step": 23144 }, { "epoch": 98.92, "learning_rate": 7.70250215626339e-05, "loss": 0.0, "step": 23148 }, { "epoch": 98.94, "learning_rate": 7.701521555230888e-05, "loss": 0.0, "step": 23152 }, { "epoch": 98.96, "learning_rate": 7.700540807427078e-05, "loss": 0.0, "step": 23156 }, { "epoch": 98.97, "learning_rate": 7.699559912905243e-05, "loss": 0.0003, "step": 23160 }, { "epoch": 98.99, "learning_rate": 7.698578871718673e-05, "loss": 0.0, "step": 23164 }, { "epoch": 99.01, "learning_rate": 7.697597683920669e-05, "loss": 0.0006, "step": 23168 }, { "epoch": 99.03, "learning_rate": 7.696616349564539e-05, "loss": 0.0, "step": 23172 }, { "epoch": 99.04, "learning_rate": 7.695634868703594e-05, "loss": 0.0, "step": 23176 }, { "epoch": 99.06, "learning_rate": 7.694653241391161e-05, "loss": 0.0002, "step": 23180 }, { "epoch": 99.08, "learning_rate": 7.693671467680567e-05, "loss": 0.0002, "step": 23184 }, { "epoch": 99.09, "learning_rate": 7.692689547625154e-05, "loss": 0.0, "step": 23188 }, { "epoch": 99.11, "learning_rate": 7.69170748127827e-05, "loss": 0.0001, "step": 23192 }, { "epoch": 99.13, "learning_rate": 7.690725268693266e-05, "loss": 0.0, "step": 23196 }, { "epoch": 99.15, "learning_rate": 7.689742909923505e-05, "loss": 0.0, "step": 23200 }, { "epoch": 99.16, "learning_rate": 7.688760405022362e-05, "loss": 0.0, "step": 23204 }, { "epoch": 99.18, "learning_rate": 7.687777754043212e-05, "loss": 0.0015, "step": 23208 }, { "epoch": 99.2, "learning_rate": 7.686794957039442e-05, "loss": 0.0001, "step": 23212 }, { "epoch": 99.21, "learning_rate": 7.685812014064448e-05, "loss": 0.0, "step": 23216 }, { "epoch": 99.23, "learning_rate": 7.68482892517163e-05, "loss": 0.0, "step": 23220 }, { "epoch": 99.25, "learning_rate": 7.683845690414402e-05, "loss": 0.0002, "step": 23224 }, { "epoch": 99.26, "learning_rate": 7.682862309846177e-05, "loss": 0.0008, "step": 23228 }, { "epoch": 99.28, "learning_rate": 7.681878783520386e-05, "loss": 0.0, "step": 23232 }, { "epoch": 99.3, "learning_rate": 7.680895111490462e-05, "loss": 0.0, "step": 23236 }, { "epoch": 99.32, "learning_rate": 7.679911293809846e-05, "loss": 0.0002, "step": 23240 }, { "epoch": 99.33, "learning_rate": 7.678927330531988e-05, "loss": 0.0001, "step": 23244 }, { "epoch": 99.35, "learning_rate": 7.677943221710346e-05, "loss": 0.0001, "step": 23248 }, { "epoch": 99.37, "learning_rate": 7.676958967398386e-05, "loss": 0.0001, "step": 23252 }, { "epoch": 99.38, "learning_rate": 7.675974567649583e-05, "loss": 0.0001, "step": 23256 }, { "epoch": 99.4, "learning_rate": 7.674990022517417e-05, "loss": 0.0005, "step": 23260 }, { "epoch": 99.42, "learning_rate": 7.674005332055376e-05, "loss": 0.0, "step": 23264 }, { "epoch": 99.44, "learning_rate": 7.673020496316961e-05, "loss": 0.0, "step": 23268 }, { "epoch": 99.45, "learning_rate": 7.672035515355675e-05, "loss": 0.0, "step": 23272 }, { "epoch": 99.47, "learning_rate": 7.67105038922503e-05, "loss": 0.0, "step": 23276 }, { "epoch": 99.49, "learning_rate": 7.670065117978552e-05, "loss": 0.0, "step": 23280 }, { "epoch": 99.5, "learning_rate": 7.669079701669762e-05, "loss": 0.0, "step": 23284 }, { "epoch": 99.52, "learning_rate": 7.668094140352205e-05, "loss": 0.0, "step": 23288 }, { "epoch": 99.54, "learning_rate": 7.667108434079421e-05, "loss": 0.0, "step": 23292 }, { "epoch": 99.56, "learning_rate": 7.666122582904962e-05, "loss": 0.0, "step": 23296 }, { "epoch": 99.57, "learning_rate": 7.665136586882391e-05, "loss": 0.0, "step": 23300 }, { "epoch": 99.59, "learning_rate": 7.664150446065278e-05, "loss": 0.0001, "step": 23304 }, { "epoch": 99.61, "learning_rate": 7.663164160507192e-05, "loss": 0.0, "step": 23308 }, { "epoch": 99.62, "learning_rate": 7.662177730261723e-05, "loss": 0.0, "step": 23312 }, { "epoch": 99.64, "learning_rate": 7.661191155382464e-05, "loss": 0.0001, "step": 23316 }, { "epoch": 99.66, "learning_rate": 7.66020443592301e-05, "loss": 0.0, "step": 23320 }, { "epoch": 99.68, "learning_rate": 7.659217571936969e-05, "loss": 0.0, "step": 23324 }, { "epoch": 99.69, "learning_rate": 7.658230563477962e-05, "loss": 0.0001, "step": 23328 }, { "epoch": 99.71, "learning_rate": 7.657243410599608e-05, "loss": 0.0, "step": 23332 }, { "epoch": 99.73, "learning_rate": 7.656256113355536e-05, "loss": 0.0006, "step": 23336 }, { "epoch": 99.74, "learning_rate": 7.655268671799389e-05, "loss": 0.0, "step": 23340 }, { "epoch": 99.76, "learning_rate": 7.654281085984814e-05, "loss": 0.0, "step": 23344 }, { "epoch": 99.78, "learning_rate": 7.653293355965464e-05, "loss": 0.0, "step": 23348 }, { "epoch": 99.79, "learning_rate": 7.652305481795e-05, "loss": 0.0002, "step": 23352 }, { "epoch": 99.81, "learning_rate": 7.651317463527093e-05, "loss": 0.0001, "step": 23356 }, { "epoch": 99.83, "learning_rate": 7.650329301215424e-05, "loss": 0.0003, "step": 23360 }, { "epoch": 99.85, "learning_rate": 7.649340994913677e-05, "loss": 0.0001, "step": 23364 }, { "epoch": 99.86, "learning_rate": 7.648352544675546e-05, "loss": 0.0, "step": 23368 }, { "epoch": 99.88, "learning_rate": 7.647363950554734e-05, "loss": 0.0001, "step": 23372 }, { "epoch": 99.9, "learning_rate": 7.646375212604949e-05, "loss": 0.0, "step": 23376 }, { "epoch": 99.91, "learning_rate": 7.645386330879906e-05, "loss": 0.0, "step": 23380 }, { "epoch": 99.93, "learning_rate": 7.644397305433334e-05, "loss": 0.0, "step": 23384 }, { "epoch": 99.95, "learning_rate": 7.643408136318964e-05, "loss": 0.0, "step": 23388 }, { "epoch": 99.97, "learning_rate": 7.642418823590538e-05, "loss": 0.0, "step": 23392 }, { "epoch": 99.98, "learning_rate": 7.641429367301802e-05, "loss": 0.0001, "step": 23396 }, { "epoch": 100.0, "learning_rate": 7.640439767506516e-05, "loss": 0.0005, "step": 23400 }, { "epoch": 100.02, "learning_rate": 7.63945002425844e-05, "loss": 0.0, "step": 23404 }, { "epoch": 100.03, "learning_rate": 7.638460137611349e-05, "loss": 0.0005, "step": 23408 }, { "epoch": 100.05, "learning_rate": 7.637470107619021e-05, "loss": 0.0, "step": 23412 }, { "epoch": 100.07, "learning_rate": 7.636479934335243e-05, "loss": 0.0002, "step": 23416 }, { "epoch": 100.09, "learning_rate": 7.635489617813814e-05, "loss": 0.0007, "step": 23420 }, { "epoch": 100.1, "learning_rate": 7.634499158108532e-05, "loss": 0.0, "step": 23424 }, { "epoch": 100.12, "learning_rate": 7.633508555273213e-05, "loss": 0.0, "step": 23428 }, { "epoch": 100.14, "learning_rate": 7.63251780936167e-05, "loss": 0.0, "step": 23432 }, { "epoch": 100.15, "learning_rate": 7.631526920427732e-05, "loss": 0.0, "step": 23436 }, { "epoch": 100.17, "learning_rate": 7.630535888525232e-05, "loss": 0.0001, "step": 23440 }, { "epoch": 100.19, "learning_rate": 7.629544713708016e-05, "loss": 0.0002, "step": 23444 }, { "epoch": 100.21, "learning_rate": 7.62855339602993e-05, "loss": 0.0001, "step": 23448 }, { "epoch": 100.22, "learning_rate": 7.627561935544831e-05, "loss": 0.0, "step": 23452 }, { "epoch": 100.24, "learning_rate": 7.626570332306585e-05, "loss": 0.0, "step": 23456 }, { "epoch": 100.26, "learning_rate": 7.625578586369067e-05, "loss": 0.0, "step": 23460 }, { "epoch": 100.27, "learning_rate": 7.624586697786156e-05, "loss": 0.0, "step": 23464 }, { "epoch": 100.29, "learning_rate": 7.623594666611738e-05, "loss": 0.0, "step": 23468 }, { "epoch": 100.31, "learning_rate": 7.622602492899714e-05, "loss": 0.0, "step": 23472 }, { "epoch": 100.32, "learning_rate": 7.621610176703984e-05, "loss": 0.0, "step": 23476 }, { "epoch": 100.34, "learning_rate": 7.620617718078462e-05, "loss": 0.0001, "step": 23480 }, { "epoch": 100.36, "learning_rate": 7.619625117077067e-05, "loss": 0.0001, "step": 23484 }, { "epoch": 100.38, "learning_rate": 7.618632373753724e-05, "loss": 0.0, "step": 23488 }, { "epoch": 100.39, "learning_rate": 7.617639488162371e-05, "loss": 0.0002, "step": 23492 }, { "epoch": 100.41, "learning_rate": 7.616646460356949e-05, "loss": 0.0, "step": 23496 }, { "epoch": 100.43, "learning_rate": 7.615653290391409e-05, "loss": 0.0001, "step": 23500 }, { "epoch": 100.44, "learning_rate": 7.614659978319709e-05, "loss": 0.0022, "step": 23504 }, { "epoch": 100.46, "learning_rate": 7.613666524195814e-05, "loss": 0.0002, "step": 23508 }, { "epoch": 100.48, "learning_rate": 7.612672928073698e-05, "loss": 0.0004, "step": 23512 }, { "epoch": 100.5, "learning_rate": 7.611679190007342e-05, "loss": 0.001, "step": 23516 }, { "epoch": 100.51, "learning_rate": 7.610685310050733e-05, "loss": 0.0, "step": 23520 }, { "epoch": 100.53, "learning_rate": 7.609691288257872e-05, "loss": 0.0001, "step": 23524 }, { "epoch": 100.55, "learning_rate": 7.608697124682761e-05, "loss": 0.0009, "step": 23528 }, { "epoch": 100.56, "learning_rate": 7.607702819379413e-05, "loss": 0.0, "step": 23532 }, { "epoch": 100.58, "learning_rate": 7.606708372401843e-05, "loss": 0.0, "step": 23536 }, { "epoch": 100.6, "learning_rate": 7.605713783804085e-05, "loss": 0.0002, "step": 23540 }, { "epoch": 100.62, "learning_rate": 7.604719053640172e-05, "loss": 0.0, "step": 23544 }, { "epoch": 100.63, "learning_rate": 7.603724181964144e-05, "loss": 0.0, "step": 23548 }, { "epoch": 100.65, "learning_rate": 7.602729168830056e-05, "loss": 0.0014, "step": 23552 }, { "epoch": 100.67, "learning_rate": 7.601734014291961e-05, "loss": 0.0008, "step": 23556 }, { "epoch": 100.68, "learning_rate": 7.60073871840393e-05, "loss": 0.0003, "step": 23560 }, { "epoch": 100.7, "learning_rate": 7.599743281220033e-05, "loss": 0.0001, "step": 23564 }, { "epoch": 100.72, "learning_rate": 7.598747702794353e-05, "loss": 0.0, "step": 23568 }, { "epoch": 100.74, "learning_rate": 7.59775198318098e-05, "loss": 0.0, "step": 23572 }, { "epoch": 100.75, "learning_rate": 7.596756122434007e-05, "loss": 0.0, "step": 23576 }, { "epoch": 100.77, "learning_rate": 7.59576012060754e-05, "loss": 0.0023, "step": 23580 }, { "epoch": 100.79, "learning_rate": 7.594763977755692e-05, "loss": 0.0008, "step": 23584 }, { "epoch": 100.8, "learning_rate": 7.593767693932583e-05, "loss": 0.0, "step": 23588 }, { "epoch": 100.82, "learning_rate": 7.592771269192339e-05, "loss": 0.0, "step": 23592 }, { "epoch": 100.84, "learning_rate": 7.591774703589092e-05, "loss": 0.0003, "step": 23596 }, { "epoch": 100.85, "learning_rate": 7.59077799717699e-05, "loss": 0.0, "step": 23600 }, { "epoch": 100.87, "learning_rate": 7.58978115001018e-05, "loss": 0.0, "step": 23604 }, { "epoch": 100.89, "learning_rate": 7.588784162142819e-05, "loss": 0.0, "step": 23608 }, { "epoch": 100.91, "learning_rate": 7.587787033629075e-05, "loss": 0.0, "step": 23612 }, { "epoch": 100.92, "learning_rate": 7.586789764523121e-05, "loss": 0.0001, "step": 23616 }, { "epoch": 100.94, "learning_rate": 7.585792354879136e-05, "loss": 0.0, "step": 23620 }, { "epoch": 100.96, "learning_rate": 7.584794804751309e-05, "loss": 0.0001, "step": 23624 }, { "epoch": 100.97, "learning_rate": 7.583797114193836e-05, "loss": 0.0001, "step": 23628 }, { "epoch": 100.99, "learning_rate": 7.582799283260921e-05, "loss": 0.0, "step": 23632 }, { "epoch": 101.01, "learning_rate": 7.581801312006776e-05, "loss": 0.0, "step": 23636 }, { "epoch": 101.03, "learning_rate": 7.580803200485618e-05, "loss": 0.0, "step": 23640 }, { "epoch": 101.04, "learning_rate": 7.579804948751675e-05, "loss": 0.0, "step": 23644 }, { "epoch": 101.06, "learning_rate": 7.57880655685918e-05, "loss": 0.0001, "step": 23648 }, { "epoch": 101.08, "learning_rate": 7.577808024862378e-05, "loss": 0.0, "step": 23652 }, { "epoch": 101.09, "learning_rate": 7.576809352815512e-05, "loss": 0.0, "step": 23656 }, { "epoch": 101.11, "learning_rate": 7.575810540772846e-05, "loss": 0.0, "step": 23660 }, { "epoch": 101.13, "learning_rate": 7.574811588788641e-05, "loss": 0.0, "step": 23664 }, { "epoch": 101.15, "learning_rate": 7.57381249691717e-05, "loss": 0.0001, "step": 23668 }, { "epoch": 101.16, "learning_rate": 7.572813265212711e-05, "loss": 0.0001, "step": 23672 }, { "epoch": 101.18, "learning_rate": 7.571813893729552e-05, "loss": 0.0002, "step": 23676 }, { "epoch": 101.2, "learning_rate": 7.570814382521991e-05, "loss": 0.0, "step": 23680 }, { "epoch": 101.21, "learning_rate": 7.569814731644327e-05, "loss": 0.0013, "step": 23684 }, { "epoch": 101.23, "learning_rate": 7.568814941150873e-05, "loss": 0.0008, "step": 23688 }, { "epoch": 101.25, "learning_rate": 7.567815011095945e-05, "loss": 0.0, "step": 23692 }, { "epoch": 101.26, "learning_rate": 7.566814941533866e-05, "loss": 0.0, "step": 23696 }, { "epoch": 101.28, "learning_rate": 7.565814732518974e-05, "loss": 0.0017, "step": 23700 }, { "epoch": 101.3, "learning_rate": 7.564814384105607e-05, "loss": 0.0, "step": 23704 }, { "epoch": 101.32, "learning_rate": 7.563813896348113e-05, "loss": 0.0, "step": 23708 }, { "epoch": 101.33, "learning_rate": 7.562813269300845e-05, "loss": 0.0001, "step": 23712 }, { "epoch": 101.35, "learning_rate": 7.561812503018172e-05, "loss": 0.0, "step": 23716 }, { "epoch": 101.37, "learning_rate": 7.56081159755446e-05, "loss": 0.0006, "step": 23720 }, { "epoch": 101.38, "learning_rate": 7.559810552964091e-05, "loss": 0.0, "step": 23724 }, { "epoch": 101.4, "learning_rate": 7.558809369301447e-05, "loss": 0.0, "step": 23728 }, { "epoch": 101.42, "learning_rate": 7.557808046620922e-05, "loss": 0.0006, "step": 23732 }, { "epoch": 101.44, "learning_rate": 7.556806584976919e-05, "loss": 0.0, "step": 23736 }, { "epoch": 101.45, "learning_rate": 7.555804984423849e-05, "loss": 0.0003, "step": 23740 }, { "epoch": 101.47, "learning_rate": 7.554803245016123e-05, "loss": 0.0, "step": 23744 }, { "epoch": 101.49, "learning_rate": 7.553801366808165e-05, "loss": 0.0, "step": 23748 }, { "epoch": 101.5, "learning_rate": 7.552799349854408e-05, "loss": 0.0009, "step": 23752 }, { "epoch": 101.52, "learning_rate": 7.551797194209292e-05, "loss": 0.0, "step": 23756 }, { "epoch": 101.54, "learning_rate": 7.550794899927258e-05, "loss": 0.0, "step": 23760 }, { "epoch": 101.56, "learning_rate": 7.549792467062768e-05, "loss": 0.0, "step": 23764 }, { "epoch": 101.57, "learning_rate": 7.548789895670277e-05, "loss": 0.0, "step": 23768 }, { "epoch": 101.59, "learning_rate": 7.547787185804253e-05, "loss": 0.0, "step": 23772 }, { "epoch": 101.61, "learning_rate": 7.546784337519177e-05, "loss": 0.0, "step": 23776 }, { "epoch": 101.62, "learning_rate": 7.545781350869532e-05, "loss": 0.0, "step": 23780 }, { "epoch": 101.64, "learning_rate": 7.544778225909807e-05, "loss": 0.0, "step": 23784 }, { "epoch": 101.66, "learning_rate": 7.543774962694501e-05, "loss": 0.0, "step": 23788 }, { "epoch": 101.68, "learning_rate": 7.542771561278123e-05, "loss": 0.0, "step": 23792 }, { "epoch": 101.69, "learning_rate": 7.541768021715184e-05, "loss": 0.0001, "step": 23796 }, { "epoch": 101.71, "learning_rate": 7.540764344060207e-05, "loss": 0.0, "step": 23800 }, { "epoch": 101.73, "learning_rate": 7.539760528367721e-05, "loss": 0.0, "step": 23804 }, { "epoch": 101.74, "learning_rate": 7.538756574692263e-05, "loss": 0.0, "step": 23808 }, { "epoch": 101.76, "learning_rate": 7.537752483088376e-05, "loss": 0.0, "step": 23812 }, { "epoch": 101.78, "learning_rate": 7.536748253610611e-05, "loss": 0.0, "step": 23816 }, { "epoch": 101.79, "learning_rate": 7.535743886313526e-05, "loss": 0.0001, "step": 23820 }, { "epoch": 101.81, "learning_rate": 7.534739381251691e-05, "loss": 0.0011, "step": 23824 }, { "epoch": 101.83, "learning_rate": 7.533734738479676e-05, "loss": 0.0001, "step": 23828 }, { "epoch": 101.85, "learning_rate": 7.532729958052065e-05, "loss": 0.0, "step": 23832 }, { "epoch": 101.86, "learning_rate": 7.531725040023445e-05, "loss": 0.0, "step": 23836 }, { "epoch": 101.88, "learning_rate": 7.530719984448416e-05, "loss": 0.0, "step": 23840 }, { "epoch": 101.9, "learning_rate": 7.529714791381575e-05, "loss": 0.0, "step": 23844 }, { "epoch": 101.91, "learning_rate": 7.528709460877542e-05, "loss": 0.0, "step": 23848 }, { "epoch": 101.93, "learning_rate": 7.527703992990926e-05, "loss": 0.0, "step": 23852 }, { "epoch": 101.95, "learning_rate": 7.526698387776361e-05, "loss": 0.0004, "step": 23856 }, { "epoch": 101.97, "learning_rate": 7.525692645288477e-05, "loss": 0.0, "step": 23860 }, { "epoch": 101.98, "learning_rate": 7.524686765581916e-05, "loss": 0.0005, "step": 23864 }, { "epoch": 102.0, "learning_rate": 7.523680748711328e-05, "loss": 0.0001, "step": 23868 }, { "epoch": 102.02, "learning_rate": 7.522674594731366e-05, "loss": 0.0, "step": 23872 }, { "epoch": 102.03, "learning_rate": 7.521668303696694e-05, "loss": 0.0, "step": 23876 }, { "epoch": 102.05, "learning_rate": 7.520661875661987e-05, "loss": 0.0, "step": 23880 }, { "epoch": 102.07, "learning_rate": 7.519655310681919e-05, "loss": 0.0001, "step": 23884 }, { "epoch": 102.09, "learning_rate": 7.518648608811176e-05, "loss": 0.0001, "step": 23888 }, { "epoch": 102.1, "learning_rate": 7.517641770104453e-05, "loss": 0.0001, "step": 23892 }, { "epoch": 102.12, "learning_rate": 7.516634794616451e-05, "loss": 0.0, "step": 23896 }, { "epoch": 102.14, "learning_rate": 7.515627682401874e-05, "loss": 0.0, "step": 23900 }, { "epoch": 102.15, "learning_rate": 7.514620433515443e-05, "loss": 0.0003, "step": 23904 }, { "epoch": 102.17, "learning_rate": 7.51361304801188e-05, "loss": 0.0, "step": 23908 }, { "epoch": 102.19, "learning_rate": 7.51260552594591e-05, "loss": 0.0001, "step": 23912 }, { "epoch": 102.21, "learning_rate": 7.511597867372278e-05, "loss": 0.0, "step": 23916 }, { "epoch": 102.22, "learning_rate": 7.510590072345724e-05, "loss": 0.0, "step": 23920 }, { "epoch": 102.24, "learning_rate": 7.509582140921006e-05, "loss": 0.0001, "step": 23924 }, { "epoch": 102.26, "learning_rate": 7.508574073152876e-05, "loss": 0.0004, "step": 23928 }, { "epoch": 102.27, "learning_rate": 7.507565869096109e-05, "loss": 0.0, "step": 23932 }, { "epoch": 102.29, "learning_rate": 7.506557528805476e-05, "loss": 0.0, "step": 23936 }, { "epoch": 102.31, "learning_rate": 7.50554905233576e-05, "loss": 0.0, "step": 23940 }, { "epoch": 102.32, "learning_rate": 7.50454043974175e-05, "loss": 0.0, "step": 23944 }, { "epoch": 102.34, "learning_rate": 7.503531691078245e-05, "loss": 0.0001, "step": 23948 }, { "epoch": 102.36, "learning_rate": 7.50252280640005e-05, "loss": 0.0, "step": 23952 }, { "epoch": 102.38, "learning_rate": 7.501513785761971e-05, "loss": 0.0001, "step": 23956 }, { "epoch": 102.39, "learning_rate": 7.500504629218834e-05, "loss": 0.0, "step": 23960 }, { "epoch": 102.41, "learning_rate": 7.499495336825463e-05, "loss": 0.0006, "step": 23964 }, { "epoch": 102.43, "learning_rate": 7.49848590863669e-05, "loss": 0.0, "step": 23968 }, { "epoch": 102.44, "learning_rate": 7.497476344707359e-05, "loss": 0.0, "step": 23972 }, { "epoch": 102.46, "learning_rate": 7.49646664509232e-05, "loss": 0.0, "step": 23976 }, { "epoch": 102.48, "learning_rate": 7.495456809846424e-05, "loss": 0.0002, "step": 23980 }, { "epoch": 102.5, "learning_rate": 7.494446839024539e-05, "loss": 0.0, "step": 23984 }, { "epoch": 102.51, "learning_rate": 7.493436732681535e-05, "loss": 0.0001, "step": 23988 }, { "epoch": 102.53, "learning_rate": 7.492426490872289e-05, "loss": 0.0001, "step": 23992 }, { "epoch": 102.55, "learning_rate": 7.491416113651686e-05, "loss": 0.0001, "step": 23996 }, { "epoch": 102.56, "learning_rate": 7.49040560107462e-05, "loss": 0.0, "step": 24000 }, { "epoch": 102.56, "eval_exact_match": 0.5343035343035343, "eval_loss": 0.9552536010742188, "eval_runtime": 133.8111, "eval_samples_per_second": 7.189, "step": 24000 } ], "max_steps": 59904, "num_train_epochs": 256, "total_flos": 2.0936137837879296e+17, "trial_name": null, "trial_params": null }