diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18084 @@ +{ + "best_metric": 1.9482014179229736, + "best_model_checkpoint": "saves/Custom/lora/llama2-Medical-Medtext-28-10/checkpoint-14000", + "epoch": 1.9999324529703806, + "eval_steps": 400, + "global_step": 14804, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.99932450689003e-06, + "loss": 2.8148, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.9986490137800594e-06, + "loss": 2.6944, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9979735206700893e-06, + "loss": 2.688, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 1.997298027560119e-06, + "loss": 2.8038, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.9966225344501488e-06, + "loss": 2.8559, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.9959470413401783e-06, + "loss": 2.7153, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.995271548230208e-06, + "loss": 2.7136, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 1.9945960551202377e-06, + "loss": 2.7289, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.9939205620102677e-06, + "loss": 2.7078, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 1.993245068900297e-06, + "loss": 2.6738, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.992569575790327e-06, + "loss": 2.6968, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 1.9918940826803566e-06, + "loss": 2.7469, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.9912185895703866e-06, + "loss": 2.628, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 1.990543096460416e-06, + "loss": 2.7036, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.9898676033504456e-06, + "loss": 2.5699, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 1.9891921102404755e-06, + "loss": 2.6844, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.988516617130505e-06, + "loss": 2.6691, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 1.987841124020535e-06, + "loss": 2.708, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.9871656309105645e-06, + "loss": 2.7602, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 1.9864901378005944e-06, + "loss": 2.8753, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.985814644690624e-06, + "loss": 2.6093, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 1.985139151580654e-06, + "loss": 2.7309, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 1.9844636584706834e-06, + "loss": 2.6666, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 1.9837881653607133e-06, + "loss": 2.6702, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 1.983112672250743e-06, + "loss": 2.7468, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 1.9824371791407724e-06, + "loss": 2.6879, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 1.9817616860308023e-06, + "loss": 2.6141, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 1.981086192920832e-06, + "loss": 2.6581, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 1.9804106998108618e-06, + "loss": 2.5732, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 1.9797352067008917e-06, + "loss": 2.6452, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.9790597135909212e-06, + "loss": 2.6152, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 1.978384220480951e-06, + "loss": 2.5607, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 1.9777087273709807e-06, + "loss": 2.6272, + "step": 165 + }, + { + "epoch": 0.02, + "learning_rate": 1.9770332342610106e-06, + "loss": 2.7238, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 1.97635774115104e-06, + "loss": 2.6797, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 1.97568224804107e-06, + "loss": 2.6734, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 1.9750067549310996e-06, + "loss": 2.5306, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 1.9743312618211295e-06, + "loss": 2.5373, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 1.973655768711159e-06, + "loss": 2.5462, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 1.972980275601189e-06, + "loss": 2.6225, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 1.9723047824912185e-06, + "loss": 2.5914, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 1.9716292893812484e-06, + "loss": 2.5997, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 1.970953796271278e-06, + "loss": 2.5858, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 1.970278303161308e-06, + "loss": 2.6066, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 1.9696028100513374e-06, + "loss": 2.4575, + "step": 225 + }, + { + "epoch": 0.03, + "learning_rate": 1.9689273169413673e-06, + "loss": 2.5655, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 1.968251823831397e-06, + "loss": 2.6101, + "step": 235 + }, + { + "epoch": 0.03, + "learning_rate": 1.9675763307214268e-06, + "loss": 2.4752, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 1.9669008376114563e-06, + "loss": 2.5649, + "step": 245 + }, + { + "epoch": 0.03, + "learning_rate": 1.9662253445014862e-06, + "loss": 2.4658, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 1.9655498513915157e-06, + "loss": 2.6025, + "step": 255 + }, + { + "epoch": 0.04, + "learning_rate": 1.9648743582815453e-06, + "loss": 2.528, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 1.964198865171575e-06, + "loss": 2.4624, + "step": 265 + }, + { + "epoch": 0.04, + "learning_rate": 1.9635233720616047e-06, + "loss": 2.5285, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 1.9628478789516346e-06, + "loss": 2.5167, + "step": 275 + }, + { + "epoch": 0.04, + "learning_rate": 1.962172385841664e-06, + "loss": 2.4256, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 1.961496892731694e-06, + "loss": 2.4909, + "step": 285 + }, + { + "epoch": 0.04, + "learning_rate": 1.9608213996217236e-06, + "loss": 2.4507, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 1.9601459065117535e-06, + "loss": 2.5089, + "step": 295 + }, + { + "epoch": 0.04, + "learning_rate": 1.959470413401783e-06, + "loss": 2.4909, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 1.958794920291813e-06, + "loss": 2.3888, + "step": 305 + }, + { + "epoch": 0.04, + "learning_rate": 1.9581194271818425e-06, + "loss": 2.3279, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 1.9574439340718725e-06, + "loss": 2.3981, + "step": 315 + }, + { + "epoch": 0.04, + "learning_rate": 1.956768440961902e-06, + "loss": 2.4836, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 1.956092947851932e-06, + "loss": 2.4944, + "step": 325 + }, + { + "epoch": 0.04, + "learning_rate": 1.9554174547419614e-06, + "loss": 2.5004, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 1.9547419616319914e-06, + "loss": 2.3797, + "step": 335 + }, + { + "epoch": 0.05, + "learning_rate": 1.954066468522021e-06, + "loss": 2.4652, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 1.953390975412051e-06, + "loss": 2.4376, + "step": 345 + }, + { + "epoch": 0.05, + "learning_rate": 1.9527154823020803e-06, + "loss": 2.4507, + "step": 350 + }, + { + "epoch": 0.05, + "learning_rate": 1.9520399891921103e-06, + "loss": 2.4931, + "step": 355 + }, + { + "epoch": 0.05, + "learning_rate": 1.9513644960821398e-06, + "loss": 2.5258, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 1.9506890029721697e-06, + "loss": 2.3549, + "step": 365 + }, + { + "epoch": 0.05, + "learning_rate": 1.9500135098621992e-06, + "loss": 2.4138, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 1.949338016752229e-06, + "loss": 2.4262, + "step": 375 + }, + { + "epoch": 0.05, + "learning_rate": 1.9486625236422587e-06, + "loss": 2.3892, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 1.9479870305322886e-06, + "loss": 2.275, + "step": 385 + }, + { + "epoch": 0.05, + "learning_rate": 1.947311537422318e-06, + "loss": 2.4148, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 1.946636044312348e-06, + "loss": 2.3715, + "step": 395 + }, + { + "epoch": 0.05, + "learning_rate": 1.9459605512023776e-06, + "loss": 2.3765, + "step": 400 + }, + { + "epoch": 0.05, + "eval_loss": 2.380300521850586, + "eval_runtime": 165.4903, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.453, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 1.9452850580924075e-06, + "loss": 2.4244, + "step": 405 + }, + { + "epoch": 0.06, + "learning_rate": 1.944609564982437e-06, + "loss": 2.3635, + "step": 410 + }, + { + "epoch": 0.06, + "learning_rate": 1.943934071872467e-06, + "loss": 2.3548, + "step": 415 + }, + { + "epoch": 0.06, + "learning_rate": 1.9432585787624965e-06, + "loss": 2.3199, + "step": 420 + }, + { + "epoch": 0.06, + "learning_rate": 1.9425830856525264e-06, + "loss": 2.3976, + "step": 425 + }, + { + "epoch": 0.06, + "learning_rate": 1.941907592542556e-06, + "loss": 2.4299, + "step": 430 + }, + { + "epoch": 0.06, + "learning_rate": 1.941232099432586e-06, + "loss": 2.3214, + "step": 435 + }, + { + "epoch": 0.06, + "learning_rate": 1.9405566063226154e-06, + "loss": 2.3571, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 1.9398811132126453e-06, + "loss": 2.3554, + "step": 445 + }, + { + "epoch": 0.06, + "learning_rate": 1.939205620102675e-06, + "loss": 2.3911, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 1.9385301269927044e-06, + "loss": 2.3898, + "step": 455 + }, + { + "epoch": 0.06, + "learning_rate": 1.9378546338827343e-06, + "loss": 2.3447, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 1.937179140772764e-06, + "loss": 2.3268, + "step": 465 + }, + { + "epoch": 0.06, + "learning_rate": 1.9365036476627938e-06, + "loss": 2.3788, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 1.9358281545528233e-06, + "loss": 2.3091, + "step": 475 + }, + { + "epoch": 0.06, + "learning_rate": 1.935152661442853e-06, + "loss": 2.289, + "step": 480 + }, + { + "epoch": 0.07, + "learning_rate": 1.9344771683328827e-06, + "loss": 2.3099, + "step": 485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9338016752229127e-06, + "loss": 2.2789, + "step": 490 + }, + { + "epoch": 0.07, + "learning_rate": 1.933126182112942e-06, + "loss": 2.2609, + "step": 495 + }, + { + "epoch": 0.07, + "learning_rate": 1.932450689002972e-06, + "loss": 2.3611, + "step": 500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9317751958930016e-06, + "loss": 2.2964, + "step": 505 + }, + { + "epoch": 0.07, + "learning_rate": 1.9310997027830316e-06, + "loss": 2.3029, + "step": 510 + }, + { + "epoch": 0.07, + "learning_rate": 1.930424209673061e-06, + "loss": 2.3109, + "step": 515 + }, + { + "epoch": 0.07, + "learning_rate": 1.929748716563091e-06, + "loss": 2.4029, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9290732234531205e-06, + "loss": 2.2692, + "step": 525 + }, + { + "epoch": 0.07, + "learning_rate": 1.9283977303431505e-06, + "loss": 2.4243, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 1.92772223723318e-06, + "loss": 2.3542, + "step": 535 + }, + { + "epoch": 0.07, + "learning_rate": 1.92704674412321e-06, + "loss": 2.3045, + "step": 540 + }, + { + "epoch": 0.07, + "learning_rate": 1.9263712510132394e-06, + "loss": 2.3004, + "step": 545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9256957579032694e-06, + "loss": 2.359, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 1.925020264793299e-06, + "loss": 2.3057, + "step": 555 + }, + { + "epoch": 0.08, + "learning_rate": 1.924344771683329e-06, + "loss": 2.3865, + "step": 560 + }, + { + "epoch": 0.08, + "learning_rate": 1.9236692785733583e-06, + "loss": 2.2997, + "step": 565 + }, + { + "epoch": 0.08, + "learning_rate": 1.9229937854633883e-06, + "loss": 2.2846, + "step": 570 + }, + { + "epoch": 0.08, + "learning_rate": 1.922318292353418e-06, + "loss": 2.3284, + "step": 575 + }, + { + "epoch": 0.08, + "learning_rate": 1.9216427992434477e-06, + "loss": 2.258, + "step": 580 + }, + { + "epoch": 0.08, + "learning_rate": 1.9209673061334777e-06, + "loss": 2.3458, + "step": 585 + }, + { + "epoch": 0.08, + "learning_rate": 1.920291813023507e-06, + "loss": 2.276, + "step": 590 + }, + { + "epoch": 0.08, + "learning_rate": 1.9196163199135367e-06, + "loss": 2.2503, + "step": 595 + }, + { + "epoch": 0.08, + "learning_rate": 1.9189408268035666e-06, + "loss": 2.2727, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 1.918265333693596e-06, + "loss": 2.289, + "step": 605 + }, + { + "epoch": 0.08, + "learning_rate": 1.917589840583626e-06, + "loss": 2.2968, + "step": 610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9169143474736556e-06, + "loss": 2.3512, + "step": 615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9162388543636855e-06, + "loss": 2.3458, + "step": 620 + }, + { + "epoch": 0.08, + "learning_rate": 1.915563361253715e-06, + "loss": 2.2532, + "step": 625 + }, + { + "epoch": 0.09, + "learning_rate": 1.914887868143745e-06, + "loss": 2.2273, + "step": 630 + }, + { + "epoch": 0.09, + "learning_rate": 1.9142123750337745e-06, + "loss": 2.3108, + "step": 635 + }, + { + "epoch": 0.09, + "learning_rate": 1.9135368819238044e-06, + "loss": 2.3238, + "step": 640 + }, + { + "epoch": 0.09, + "learning_rate": 1.912861388813834e-06, + "loss": 2.2053, + "step": 645 + }, + { + "epoch": 0.09, + "learning_rate": 1.9121858957038635e-06, + "loss": 2.386, + "step": 650 + }, + { + "epoch": 0.09, + "learning_rate": 1.9115104025938934e-06, + "loss": 2.1914, + "step": 655 + }, + { + "epoch": 0.09, + "learning_rate": 1.910834909483923e-06, + "loss": 2.2638, + "step": 660 + }, + { + "epoch": 0.09, + "learning_rate": 1.910159416373953e-06, + "loss": 2.3571, + "step": 665 + }, + { + "epoch": 0.09, + "learning_rate": 1.9094839232639824e-06, + "loss": 2.344, + "step": 670 + }, + { + "epoch": 0.09, + "learning_rate": 1.9088084301540123e-06, + "loss": 2.2765, + "step": 675 + }, + { + "epoch": 0.09, + "learning_rate": 1.908132937044042e-06, + "loss": 2.2643, + "step": 680 + }, + { + "epoch": 0.09, + "learning_rate": 1.9074574439340718e-06, + "loss": 2.2843, + "step": 685 + }, + { + "epoch": 0.09, + "learning_rate": 1.9067819508241015e-06, + "loss": 2.3156, + "step": 690 + }, + { + "epoch": 0.09, + "learning_rate": 1.9061064577141312e-06, + "loss": 2.2365, + "step": 695 + }, + { + "epoch": 0.09, + "learning_rate": 1.905430964604161e-06, + "loss": 2.2396, + "step": 700 + }, + { + "epoch": 0.1, + "learning_rate": 1.9047554714941907e-06, + "loss": 2.3003, + "step": 705 + }, + { + "epoch": 0.1, + "learning_rate": 1.9040799783842202e-06, + "loss": 2.2414, + "step": 710 + }, + { + "epoch": 0.1, + "learning_rate": 1.9034044852742501e-06, + "loss": 2.2087, + "step": 715 + }, + { + "epoch": 0.1, + "learning_rate": 1.9027289921642796e-06, + "loss": 2.3138, + "step": 720 + }, + { + "epoch": 0.1, + "learning_rate": 1.9020534990543096e-06, + "loss": 2.2667, + "step": 725 + }, + { + "epoch": 0.1, + "learning_rate": 1.9013780059443393e-06, + "loss": 2.171, + "step": 730 + }, + { + "epoch": 0.1, + "learning_rate": 1.900702512834369e-06, + "loss": 2.2188, + "step": 735 + }, + { + "epoch": 0.1, + "learning_rate": 1.9000270197243988e-06, + "loss": 2.2388, + "step": 740 + }, + { + "epoch": 0.1, + "learning_rate": 1.8993515266144285e-06, + "loss": 2.2639, + "step": 745 + }, + { + "epoch": 0.1, + "learning_rate": 1.8986760335044582e-06, + "loss": 2.1737, + "step": 750 + }, + { + "epoch": 0.1, + "learning_rate": 1.898000540394488e-06, + "loss": 2.2252, + "step": 755 + }, + { + "epoch": 0.1, + "learning_rate": 1.8973250472845177e-06, + "loss": 2.1938, + "step": 760 + }, + { + "epoch": 0.1, + "learning_rate": 1.8966495541745474e-06, + "loss": 2.2139, + "step": 765 + }, + { + "epoch": 0.1, + "learning_rate": 1.8959740610645771e-06, + "loss": 2.2717, + "step": 770 + }, + { + "epoch": 0.1, + "learning_rate": 1.8952985679546068e-06, + "loss": 2.2766, + "step": 775 + }, + { + "epoch": 0.11, + "learning_rate": 1.8946230748446366e-06, + "loss": 2.2662, + "step": 780 + }, + { + "epoch": 0.11, + "learning_rate": 1.893947581734666e-06, + "loss": 2.1394, + "step": 785 + }, + { + "epoch": 0.11, + "learning_rate": 1.893272088624696e-06, + "loss": 2.187, + "step": 790 + }, + { + "epoch": 0.11, + "learning_rate": 1.8925965955147255e-06, + "loss": 2.2881, + "step": 795 + }, + { + "epoch": 0.11, + "learning_rate": 1.8919211024047555e-06, + "loss": 2.2189, + "step": 800 + }, + { + "epoch": 0.11, + "eval_loss": 2.2260634899139404, + "eval_runtime": 165.4618, + "eval_samples_per_second": 3.602, + "eval_steps_per_second": 0.453, + "step": 800 + }, + { + "epoch": 0.11, + "learning_rate": 1.891245609294785e-06, + "loss": 2.2459, + "step": 805 + }, + { + "epoch": 0.11, + "learning_rate": 1.890570116184815e-06, + "loss": 2.1488, + "step": 810 + }, + { + "epoch": 0.11, + "learning_rate": 1.8898946230748444e-06, + "loss": 2.2133, + "step": 815 + }, + { + "epoch": 0.11, + "learning_rate": 1.8892191299648744e-06, + "loss": 2.2117, + "step": 820 + }, + { + "epoch": 0.11, + "learning_rate": 1.888543636854904e-06, + "loss": 2.1795, + "step": 825 + }, + { + "epoch": 0.11, + "learning_rate": 1.8878681437449338e-06, + "loss": 2.2757, + "step": 830 + }, + { + "epoch": 0.11, + "learning_rate": 1.8871926506349634e-06, + "loss": 2.2471, + "step": 835 + }, + { + "epoch": 0.11, + "learning_rate": 1.8865171575249933e-06, + "loss": 2.3356, + "step": 840 + }, + { + "epoch": 0.11, + "learning_rate": 1.8858416644150228e-06, + "loss": 2.263, + "step": 845 + }, + { + "epoch": 0.11, + "learning_rate": 1.8851661713050525e-06, + "loss": 2.2264, + "step": 850 + }, + { + "epoch": 0.12, + "learning_rate": 1.8844906781950823e-06, + "loss": 2.2703, + "step": 855 + }, + { + "epoch": 0.12, + "learning_rate": 1.883815185085112e-06, + "loss": 2.2639, + "step": 860 + }, + { + "epoch": 0.12, + "learning_rate": 1.8831396919751417e-06, + "loss": 2.2495, + "step": 865 + }, + { + "epoch": 0.12, + "learning_rate": 1.8824641988651714e-06, + "loss": 2.2448, + "step": 870 + }, + { + "epoch": 0.12, + "learning_rate": 1.8817887057552014e-06, + "loss": 2.2363, + "step": 875 + }, + { + "epoch": 0.12, + "learning_rate": 1.8811132126452309e-06, + "loss": 2.1892, + "step": 880 + }, + { + "epoch": 0.12, + "learning_rate": 1.8804377195352608e-06, + "loss": 2.1822, + "step": 885 + }, + { + "epoch": 0.12, + "learning_rate": 1.8797622264252903e-06, + "loss": 2.1871, + "step": 890 + }, + { + "epoch": 0.12, + "learning_rate": 1.8790867333153203e-06, + "loss": 2.1882, + "step": 895 + }, + { + "epoch": 0.12, + "learning_rate": 1.8784112402053498e-06, + "loss": 2.1884, + "step": 900 + }, + { + "epoch": 0.12, + "learning_rate": 1.8777357470953797e-06, + "loss": 2.2064, + "step": 905 + }, + { + "epoch": 0.12, + "learning_rate": 1.8770602539854092e-06, + "loss": 2.249, + "step": 910 + }, + { + "epoch": 0.12, + "learning_rate": 1.876384760875439e-06, + "loss": 2.2484, + "step": 915 + }, + { + "epoch": 0.12, + "learning_rate": 1.8757092677654687e-06, + "loss": 2.1547, + "step": 920 + }, + { + "epoch": 0.12, + "learning_rate": 1.8750337746554984e-06, + "loss": 2.177, + "step": 925 + }, + { + "epoch": 0.13, + "learning_rate": 1.8743582815455281e-06, + "loss": 2.1332, + "step": 930 + }, + { + "epoch": 0.13, + "learning_rate": 1.8736827884355579e-06, + "loss": 2.2354, + "step": 935 + }, + { + "epoch": 0.13, + "learning_rate": 1.8730072953255876e-06, + "loss": 2.1353, + "step": 940 + }, + { + "epoch": 0.13, + "learning_rate": 1.8723318022156173e-06, + "loss": 2.2044, + "step": 945 + }, + { + "epoch": 0.13, + "learning_rate": 1.871656309105647e-06, + "loss": 2.2369, + "step": 950 + }, + { + "epoch": 0.13, + "learning_rate": 1.8709808159956768e-06, + "loss": 2.1856, + "step": 955 + }, + { + "epoch": 0.13, + "learning_rate": 1.8703053228857065e-06, + "loss": 2.1692, + "step": 960 + }, + { + "epoch": 0.13, + "learning_rate": 1.8696298297757362e-06, + "loss": 2.2238, + "step": 965 + }, + { + "epoch": 0.13, + "learning_rate": 1.8689543366657657e-06, + "loss": 2.2798, + "step": 970 + }, + { + "epoch": 0.13, + "learning_rate": 1.8682788435557957e-06, + "loss": 2.2296, + "step": 975 + }, + { + "epoch": 0.13, + "learning_rate": 1.8676033504458252e-06, + "loss": 2.2336, + "step": 980 + }, + { + "epoch": 0.13, + "learning_rate": 1.8669278573358551e-06, + "loss": 2.2287, + "step": 985 + }, + { + "epoch": 0.13, + "learning_rate": 1.8662523642258847e-06, + "loss": 2.244, + "step": 990 + }, + { + "epoch": 0.13, + "learning_rate": 1.8655768711159146e-06, + "loss": 2.1761, + "step": 995 + }, + { + "epoch": 0.14, + "learning_rate": 1.8649013780059441e-06, + "loss": 2.2065, + "step": 1000 + }, + { + "epoch": 0.14, + "learning_rate": 1.864225884895974e-06, + "loss": 2.1739, + "step": 1005 + }, + { + "epoch": 0.14, + "learning_rate": 1.8635503917860036e-06, + "loss": 2.1753, + "step": 1010 + }, + { + "epoch": 0.14, + "learning_rate": 1.8628748986760335e-06, + "loss": 2.3107, + "step": 1015 + }, + { + "epoch": 0.14, + "learning_rate": 1.8621994055660632e-06, + "loss": 2.214, + "step": 1020 + }, + { + "epoch": 0.14, + "learning_rate": 1.861523912456093e-06, + "loss": 2.1209, + "step": 1025 + }, + { + "epoch": 0.14, + "learning_rate": 1.8608484193461227e-06, + "loss": 2.1574, + "step": 1030 + }, + { + "epoch": 0.14, + "learning_rate": 1.8601729262361522e-06, + "loss": 2.2181, + "step": 1035 + }, + { + "epoch": 0.14, + "learning_rate": 1.8594974331261821e-06, + "loss": 2.2101, + "step": 1040 + }, + { + "epoch": 0.14, + "learning_rate": 1.8588219400162116e-06, + "loss": 2.196, + "step": 1045 + }, + { + "epoch": 0.14, + "learning_rate": 1.8581464469062416e-06, + "loss": 2.2107, + "step": 1050 + }, + { + "epoch": 0.14, + "learning_rate": 1.857470953796271e-06, + "loss": 2.1913, + "step": 1055 + }, + { + "epoch": 0.14, + "learning_rate": 1.856795460686301e-06, + "loss": 2.1373, + "step": 1060 + }, + { + "epoch": 0.14, + "learning_rate": 1.8561199675763305e-06, + "loss": 2.0939, + "step": 1065 + }, + { + "epoch": 0.14, + "learning_rate": 1.8554444744663605e-06, + "loss": 2.2936, + "step": 1070 + }, + { + "epoch": 0.15, + "learning_rate": 1.85476898135639e-06, + "loss": 2.11, + "step": 1075 + }, + { + "epoch": 0.15, + "learning_rate": 1.85409348824642e-06, + "loss": 2.2067, + "step": 1080 + }, + { + "epoch": 0.15, + "learning_rate": 1.8534179951364495e-06, + "loss": 2.0939, + "step": 1085 + }, + { + "epoch": 0.15, + "learning_rate": 1.8527425020264794e-06, + "loss": 2.1118, + "step": 1090 + }, + { + "epoch": 0.15, + "learning_rate": 1.852067008916509e-06, + "loss": 2.2529, + "step": 1095 + }, + { + "epoch": 0.15, + "learning_rate": 1.8513915158065388e-06, + "loss": 2.1717, + "step": 1100 + }, + { + "epoch": 0.15, + "learning_rate": 1.8507160226965684e-06, + "loss": 2.1909, + "step": 1105 + }, + { + "epoch": 0.15, + "learning_rate": 1.850040529586598e-06, + "loss": 2.1772, + "step": 1110 + }, + { + "epoch": 0.15, + "learning_rate": 1.8493650364766278e-06, + "loss": 2.1615, + "step": 1115 + }, + { + "epoch": 0.15, + "learning_rate": 1.8486895433666575e-06, + "loss": 2.1519, + "step": 1120 + }, + { + "epoch": 0.15, + "learning_rate": 1.8480140502566873e-06, + "loss": 2.1842, + "step": 1125 + }, + { + "epoch": 0.15, + "learning_rate": 1.847338557146717e-06, + "loss": 2.1566, + "step": 1130 + }, + { + "epoch": 0.15, + "learning_rate": 1.8466630640367467e-06, + "loss": 2.2503, + "step": 1135 + }, + { + "epoch": 0.15, + "learning_rate": 1.8459875709267764e-06, + "loss": 2.1565, + "step": 1140 + }, + { + "epoch": 0.15, + "learning_rate": 1.8453120778168062e-06, + "loss": 2.1437, + "step": 1145 + }, + { + "epoch": 0.16, + "learning_rate": 1.844636584706836e-06, + "loss": 2.1883, + "step": 1150 + }, + { + "epoch": 0.16, + "learning_rate": 1.8439610915968656e-06, + "loss": 2.1408, + "step": 1155 + }, + { + "epoch": 0.16, + "learning_rate": 1.8432855984868953e-06, + "loss": 2.2447, + "step": 1160 + }, + { + "epoch": 0.16, + "learning_rate": 1.8426101053769249e-06, + "loss": 2.1307, + "step": 1165 + }, + { + "epoch": 0.16, + "learning_rate": 1.8419346122669548e-06, + "loss": 2.132, + "step": 1170 + }, + { + "epoch": 0.16, + "learning_rate": 1.8412591191569845e-06, + "loss": 2.2312, + "step": 1175 + }, + { + "epoch": 0.16, + "learning_rate": 1.8405836260470143e-06, + "loss": 2.1841, + "step": 1180 + }, + { + "epoch": 0.16, + "learning_rate": 1.839908132937044e-06, + "loss": 2.1887, + "step": 1185 + }, + { + "epoch": 0.16, + "learning_rate": 1.8392326398270737e-06, + "loss": 2.0504, + "step": 1190 + }, + { + "epoch": 0.16, + "learning_rate": 1.8385571467171034e-06, + "loss": 2.167, + "step": 1195 + }, + { + "epoch": 0.16, + "learning_rate": 1.8378816536071332e-06, + "loss": 2.1824, + "step": 1200 + }, + { + "epoch": 0.16, + "eval_loss": 2.1631150245666504, + "eval_runtime": 165.4999, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.453, + "step": 1200 + }, + { + "epoch": 0.16, + "learning_rate": 1.8372061604971629e-06, + "loss": 2.2233, + "step": 1205 + }, + { + "epoch": 0.16, + "learning_rate": 1.8365306673871926e-06, + "loss": 2.2954, + "step": 1210 + }, + { + "epoch": 0.16, + "learning_rate": 1.8358551742772223e-06, + "loss": 2.2049, + "step": 1215 + }, + { + "epoch": 0.16, + "learning_rate": 1.835179681167252e-06, + "loss": 2.1491, + "step": 1220 + }, + { + "epoch": 0.17, + "learning_rate": 1.8345041880572818e-06, + "loss": 2.1174, + "step": 1225 + }, + { + "epoch": 0.17, + "learning_rate": 1.8338286949473113e-06, + "loss": 2.1562, + "step": 1230 + }, + { + "epoch": 0.17, + "learning_rate": 1.8331532018373412e-06, + "loss": 2.1639, + "step": 1235 + }, + { + "epoch": 0.17, + "learning_rate": 1.8324777087273708e-06, + "loss": 2.1512, + "step": 1240 + }, + { + "epoch": 0.17, + "learning_rate": 1.8318022156174007e-06, + "loss": 2.1613, + "step": 1245 + }, + { + "epoch": 0.17, + "learning_rate": 1.8311267225074302e-06, + "loss": 2.1649, + "step": 1250 + }, + { + "epoch": 0.17, + "learning_rate": 1.8304512293974601e-06, + "loss": 2.1872, + "step": 1255 + }, + { + "epoch": 0.17, + "learning_rate": 1.8297757362874897e-06, + "loss": 2.1949, + "step": 1260 + }, + { + "epoch": 0.17, + "learning_rate": 1.8291002431775196e-06, + "loss": 2.1656, + "step": 1265 + }, + { + "epoch": 0.17, + "learning_rate": 1.8284247500675491e-06, + "loss": 2.1845, + "step": 1270 + }, + { + "epoch": 0.17, + "learning_rate": 1.827749256957579e-06, + "loss": 2.1477, + "step": 1275 + }, + { + "epoch": 0.17, + "learning_rate": 1.8270737638476086e-06, + "loss": 2.2332, + "step": 1280 + }, + { + "epoch": 0.17, + "learning_rate": 1.8263982707376385e-06, + "loss": 2.1762, + "step": 1285 + }, + { + "epoch": 0.17, + "learning_rate": 1.825722777627668e-06, + "loss": 2.2039, + "step": 1290 + }, + { + "epoch": 0.17, + "learning_rate": 1.8250472845176977e-06, + "loss": 2.192, + "step": 1295 + }, + { + "epoch": 0.18, + "learning_rate": 1.8243717914077275e-06, + "loss": 2.2305, + "step": 1300 + }, + { + "epoch": 0.18, + "learning_rate": 1.8236962982977572e-06, + "loss": 2.063, + "step": 1305 + }, + { + "epoch": 0.18, + "learning_rate": 1.823020805187787e-06, + "loss": 2.1416, + "step": 1310 + }, + { + "epoch": 0.18, + "learning_rate": 1.8223453120778166e-06, + "loss": 2.082, + "step": 1315 + }, + { + "epoch": 0.18, + "learning_rate": 1.8216698189678466e-06, + "loss": 2.2055, + "step": 1320 + }, + { + "epoch": 0.18, + "learning_rate": 1.820994325857876e-06, + "loss": 2.2108, + "step": 1325 + }, + { + "epoch": 0.18, + "learning_rate": 1.820318832747906e-06, + "loss": 2.2147, + "step": 1330 + }, + { + "epoch": 0.18, + "learning_rate": 1.8196433396379356e-06, + "loss": 2.2602, + "step": 1335 + }, + { + "epoch": 0.18, + "learning_rate": 1.8189678465279655e-06, + "loss": 2.1456, + "step": 1340 + }, + { + "epoch": 0.18, + "learning_rate": 1.818292353417995e-06, + "loss": 2.1897, + "step": 1345 + }, + { + "epoch": 0.18, + "learning_rate": 1.817616860308025e-06, + "loss": 2.1242, + "step": 1350 + }, + { + "epoch": 0.18, + "learning_rate": 1.8169413671980545e-06, + "loss": 2.1346, + "step": 1355 + }, + { + "epoch": 0.18, + "learning_rate": 1.8162658740880844e-06, + "loss": 2.1532, + "step": 1360 + }, + { + "epoch": 0.18, + "learning_rate": 1.815590380978114e-06, + "loss": 2.1337, + "step": 1365 + }, + { + "epoch": 0.19, + "learning_rate": 1.8149148878681436e-06, + "loss": 2.1347, + "step": 1370 + }, + { + "epoch": 0.19, + "learning_rate": 1.8142393947581734e-06, + "loss": 2.1518, + "step": 1375 + }, + { + "epoch": 0.19, + "learning_rate": 1.813563901648203e-06, + "loss": 2.085, + "step": 1380 + }, + { + "epoch": 0.19, + "learning_rate": 1.8128884085382328e-06, + "loss": 2.2308, + "step": 1385 + }, + { + "epoch": 0.19, + "learning_rate": 1.8122129154282625e-06, + "loss": 2.189, + "step": 1390 + }, + { + "epoch": 0.19, + "learning_rate": 1.8115374223182923e-06, + "loss": 2.1436, + "step": 1395 + }, + { + "epoch": 0.19, + "learning_rate": 1.810861929208322e-06, + "loss": 2.1599, + "step": 1400 + }, + { + "epoch": 0.19, + "learning_rate": 1.8101864360983517e-06, + "loss": 2.1162, + "step": 1405 + }, + { + "epoch": 0.19, + "learning_rate": 1.8095109429883814e-06, + "loss": 2.1438, + "step": 1410 + }, + { + "epoch": 0.19, + "learning_rate": 1.8088354498784112e-06, + "loss": 2.1114, + "step": 1415 + }, + { + "epoch": 0.19, + "learning_rate": 1.808159956768441e-06, + "loss": 2.1274, + "step": 1420 + }, + { + "epoch": 0.19, + "learning_rate": 1.8074844636584704e-06, + "loss": 2.0931, + "step": 1425 + }, + { + "epoch": 0.19, + "learning_rate": 1.8068089705485004e-06, + "loss": 2.1427, + "step": 1430 + }, + { + "epoch": 0.19, + "learning_rate": 1.8061334774385299e-06, + "loss": 2.1255, + "step": 1435 + }, + { + "epoch": 0.19, + "learning_rate": 1.8054579843285598e-06, + "loss": 2.2059, + "step": 1440 + }, + { + "epoch": 0.2, + "learning_rate": 1.8047824912185893e-06, + "loss": 2.1694, + "step": 1445 + }, + { + "epoch": 0.2, + "learning_rate": 1.8041069981086193e-06, + "loss": 2.0896, + "step": 1450 + }, + { + "epoch": 0.2, + "learning_rate": 1.8034315049986488e-06, + "loss": 2.1429, + "step": 1455 + }, + { + "epoch": 0.2, + "learning_rate": 1.8027560118886787e-06, + "loss": 2.2017, + "step": 1460 + }, + { + "epoch": 0.2, + "learning_rate": 1.8020805187787084e-06, + "loss": 2.1535, + "step": 1465 + }, + { + "epoch": 0.2, + "learning_rate": 1.8014050256687382e-06, + "loss": 2.1342, + "step": 1470 + }, + { + "epoch": 0.2, + "learning_rate": 1.8007295325587679e-06, + "loss": 2.1528, + "step": 1475 + }, + { + "epoch": 0.2, + "learning_rate": 1.8000540394487976e-06, + "loss": 2.1151, + "step": 1480 + }, + { + "epoch": 0.2, + "learning_rate": 1.7993785463388273e-06, + "loss": 2.1482, + "step": 1485 + }, + { + "epoch": 0.2, + "learning_rate": 1.7987030532288569e-06, + "loss": 2.0806, + "step": 1490 + }, + { + "epoch": 0.2, + "learning_rate": 1.7980275601188868e-06, + "loss": 2.1843, + "step": 1495 + }, + { + "epoch": 0.2, + "learning_rate": 1.7973520670089163e-06, + "loss": 2.1359, + "step": 1500 + }, + { + "epoch": 0.2, + "learning_rate": 1.7966765738989462e-06, + "loss": 2.1303, + "step": 1505 + }, + { + "epoch": 0.2, + "learning_rate": 1.7960010807889758e-06, + "loss": 2.1373, + "step": 1510 + }, + { + "epoch": 0.2, + "learning_rate": 1.7953255876790057e-06, + "loss": 2.1413, + "step": 1515 + }, + { + "epoch": 0.21, + "learning_rate": 1.7946500945690352e-06, + "loss": 2.1424, + "step": 1520 + }, + { + "epoch": 0.21, + "learning_rate": 1.7939746014590652e-06, + "loss": 2.1073, + "step": 1525 + }, + { + "epoch": 0.21, + "learning_rate": 1.7932991083490947e-06, + "loss": 2.0807, + "step": 1530 + }, + { + "epoch": 0.21, + "learning_rate": 1.7926236152391246e-06, + "loss": 2.1708, + "step": 1535 + }, + { + "epoch": 0.21, + "learning_rate": 1.7919481221291541e-06, + "loss": 2.1456, + "step": 1540 + }, + { + "epoch": 0.21, + "learning_rate": 1.791272629019184e-06, + "loss": 2.2062, + "step": 1545 + }, + { + "epoch": 0.21, + "learning_rate": 1.7905971359092136e-06, + "loss": 2.1719, + "step": 1550 + }, + { + "epoch": 0.21, + "learning_rate": 1.7899216427992433e-06, + "loss": 2.1313, + "step": 1555 + }, + { + "epoch": 0.21, + "learning_rate": 1.789246149689273e-06, + "loss": 2.0904, + "step": 1560 + }, + { + "epoch": 0.21, + "learning_rate": 1.7885706565793028e-06, + "loss": 2.1181, + "step": 1565 + }, + { + "epoch": 0.21, + "learning_rate": 1.7878951634693325e-06, + "loss": 2.236, + "step": 1570 + }, + { + "epoch": 0.21, + "learning_rate": 1.7872196703593622e-06, + "loss": 2.2107, + "step": 1575 + }, + { + "epoch": 0.21, + "learning_rate": 1.786544177249392e-06, + "loss": 2.1614, + "step": 1580 + }, + { + "epoch": 0.21, + "learning_rate": 1.7858686841394217e-06, + "loss": 2.0653, + "step": 1585 + }, + { + "epoch": 0.21, + "learning_rate": 1.7851931910294514e-06, + "loss": 2.1127, + "step": 1590 + }, + { + "epoch": 0.22, + "learning_rate": 1.7845176979194811e-06, + "loss": 2.1073, + "step": 1595 + }, + { + "epoch": 0.22, + "learning_rate": 1.7838422048095108e-06, + "loss": 2.0995, + "step": 1600 + }, + { + "epoch": 0.22, + "eval_loss": 2.1323931217193604, + "eval_runtime": 165.5394, + "eval_samples_per_second": 3.6, + "eval_steps_per_second": 0.453, + "step": 1600 + }, + { + "epoch": 0.22, + "learning_rate": 1.7831667116995406e-06, + "loss": 2.1629, + "step": 1605 + }, + { + "epoch": 0.22, + "learning_rate": 1.7824912185895705e-06, + "loss": 2.1419, + "step": 1610 + }, + { + "epoch": 0.22, + "learning_rate": 1.7818157254796e-06, + "loss": 2.1061, + "step": 1615 + }, + { + "epoch": 0.22, + "learning_rate": 1.78114023236963e-06, + "loss": 2.1188, + "step": 1620 + }, + { + "epoch": 0.22, + "learning_rate": 1.7804647392596595e-06, + "loss": 2.1862, + "step": 1625 + }, + { + "epoch": 0.22, + "learning_rate": 1.7797892461496892e-06, + "loss": 2.153, + "step": 1630 + }, + { + "epoch": 0.22, + "learning_rate": 1.779113753039719e-06, + "loss": 2.0605, + "step": 1635 + }, + { + "epoch": 0.22, + "learning_rate": 1.7784382599297486e-06, + "loss": 2.2079, + "step": 1640 + }, + { + "epoch": 0.22, + "learning_rate": 1.7777627668197784e-06, + "loss": 2.1516, + "step": 1645 + }, + { + "epoch": 0.22, + "learning_rate": 1.777087273709808e-06, + "loss": 2.1234, + "step": 1650 + }, + { + "epoch": 0.22, + "learning_rate": 1.7764117805998378e-06, + "loss": 2.231, + "step": 1655 + }, + { + "epoch": 0.22, + "learning_rate": 1.7757362874898676e-06, + "loss": 2.1475, + "step": 1660 + }, + { + "epoch": 0.22, + "learning_rate": 1.7750607943798973e-06, + "loss": 2.1289, + "step": 1665 + }, + { + "epoch": 0.23, + "learning_rate": 1.774385301269927e-06, + "loss": 2.0901, + "step": 1670 + }, + { + "epoch": 0.23, + "learning_rate": 1.7737098081599567e-06, + "loss": 2.17, + "step": 1675 + }, + { + "epoch": 0.23, + "learning_rate": 1.7730343150499865e-06, + "loss": 2.2153, + "step": 1680 + }, + { + "epoch": 0.23, + "learning_rate": 1.772358821940016e-06, + "loss": 2.1964, + "step": 1685 + }, + { + "epoch": 0.23, + "learning_rate": 1.771683328830046e-06, + "loss": 2.2056, + "step": 1690 + }, + { + "epoch": 0.23, + "learning_rate": 1.7710078357200754e-06, + "loss": 2.0911, + "step": 1695 + }, + { + "epoch": 0.23, + "learning_rate": 1.7703323426101054e-06, + "loss": 2.1362, + "step": 1700 + }, + { + "epoch": 0.23, + "learning_rate": 1.7696568495001349e-06, + "loss": 2.1819, + "step": 1705 + }, + { + "epoch": 0.23, + "learning_rate": 1.7689813563901648e-06, + "loss": 2.155, + "step": 1710 + }, + { + "epoch": 0.23, + "learning_rate": 1.7683058632801943e-06, + "loss": 2.0689, + "step": 1715 + }, + { + "epoch": 0.23, + "learning_rate": 1.7676303701702243e-06, + "loss": 2.1661, + "step": 1720 + }, + { + "epoch": 0.23, + "learning_rate": 1.7669548770602538e-06, + "loss": 2.094, + "step": 1725 + }, + { + "epoch": 0.23, + "learning_rate": 1.7662793839502837e-06, + "loss": 2.061, + "step": 1730 + }, + { + "epoch": 0.23, + "learning_rate": 1.7656038908403132e-06, + "loss": 2.185, + "step": 1735 + }, + { + "epoch": 0.24, + "learning_rate": 1.7649283977303432e-06, + "loss": 2.1631, + "step": 1740 + }, + { + "epoch": 0.24, + "learning_rate": 1.7642529046203727e-06, + "loss": 2.1535, + "step": 1745 + }, + { + "epoch": 0.24, + "learning_rate": 1.7635774115104024e-06, + "loss": 2.2405, + "step": 1750 + }, + { + "epoch": 0.24, + "learning_rate": 1.7629019184004324e-06, + "loss": 2.1495, + "step": 1755 + }, + { + "epoch": 0.24, + "learning_rate": 1.7622264252904619e-06, + "loss": 2.1073, + "step": 1760 + }, + { + "epoch": 0.24, + "learning_rate": 1.7615509321804918e-06, + "loss": 2.1346, + "step": 1765 + }, + { + "epoch": 0.24, + "learning_rate": 1.7608754390705213e-06, + "loss": 2.0585, + "step": 1770 + }, + { + "epoch": 0.24, + "learning_rate": 1.7601999459605513e-06, + "loss": 2.1566, + "step": 1775 + }, + { + "epoch": 0.24, + "learning_rate": 1.7595244528505808e-06, + "loss": 2.1382, + "step": 1780 + }, + { + "epoch": 0.24, + "learning_rate": 1.7588489597406107e-06, + "loss": 2.1801, + "step": 1785 + }, + { + "epoch": 0.24, + "learning_rate": 1.7581734666306402e-06, + "loss": 2.1169, + "step": 1790 + }, + { + "epoch": 0.24, + "learning_rate": 1.7574979735206702e-06, + "loss": 2.1017, + "step": 1795 + }, + { + "epoch": 0.24, + "learning_rate": 1.7568224804106997e-06, + "loss": 2.1697, + "step": 1800 + }, + { + "epoch": 0.24, + "learning_rate": 1.7561469873007296e-06, + "loss": 2.0574, + "step": 1805 + }, + { + "epoch": 0.24, + "learning_rate": 1.7554714941907591e-06, + "loss": 2.1086, + "step": 1810 + }, + { + "epoch": 0.25, + "learning_rate": 1.7547960010807889e-06, + "loss": 2.1359, + "step": 1815 + }, + { + "epoch": 0.25, + "learning_rate": 1.7541205079708186e-06, + "loss": 2.178, + "step": 1820 + }, + { + "epoch": 0.25, + "learning_rate": 1.7534450148608483e-06, + "loss": 2.1483, + "step": 1825 + }, + { + "epoch": 0.25, + "learning_rate": 1.752769521750878e-06, + "loss": 2.1726, + "step": 1830 + }, + { + "epoch": 0.25, + "learning_rate": 1.7520940286409078e-06, + "loss": 2.0898, + "step": 1835 + }, + { + "epoch": 0.25, + "learning_rate": 1.7514185355309375e-06, + "loss": 2.0657, + "step": 1840 + }, + { + "epoch": 0.25, + "learning_rate": 1.7507430424209672e-06, + "loss": 2.1823, + "step": 1845 + }, + { + "epoch": 0.25, + "learning_rate": 1.750067549310997e-06, + "loss": 2.0883, + "step": 1850 + }, + { + "epoch": 0.25, + "learning_rate": 1.7493920562010267e-06, + "loss": 2.1784, + "step": 1855 + }, + { + "epoch": 0.25, + "learning_rate": 1.7487165630910564e-06, + "loss": 2.0749, + "step": 1860 + }, + { + "epoch": 0.25, + "learning_rate": 1.7480410699810861e-06, + "loss": 2.1461, + "step": 1865 + }, + { + "epoch": 0.25, + "learning_rate": 1.7473655768711158e-06, + "loss": 2.2457, + "step": 1870 + }, + { + "epoch": 0.25, + "learning_rate": 1.7466900837611456e-06, + "loss": 2.1692, + "step": 1875 + }, + { + "epoch": 0.25, + "learning_rate": 1.746014590651175e-06, + "loss": 2.1989, + "step": 1880 + }, + { + "epoch": 0.25, + "learning_rate": 1.745339097541205e-06, + "loss": 2.194, + "step": 1885 + }, + { + "epoch": 0.26, + "learning_rate": 1.7446636044312345e-06, + "loss": 2.1568, + "step": 1890 + }, + { + "epoch": 0.26, + "learning_rate": 1.7439881113212645e-06, + "loss": 2.0265, + "step": 1895 + }, + { + "epoch": 0.26, + "learning_rate": 1.7433126182112942e-06, + "loss": 2.165, + "step": 1900 + }, + { + "epoch": 0.26, + "learning_rate": 1.742637125101324e-06, + "loss": 2.1315, + "step": 1905 + }, + { + "epoch": 0.26, + "learning_rate": 1.7419616319913537e-06, + "loss": 2.052, + "step": 1910 + }, + { + "epoch": 0.26, + "learning_rate": 1.7412861388813834e-06, + "loss": 2.151, + "step": 1915 + }, + { + "epoch": 0.26, + "learning_rate": 1.740610645771413e-06, + "loss": 2.1216, + "step": 1920 + }, + { + "epoch": 0.26, + "learning_rate": 1.7399351526614428e-06, + "loss": 2.2173, + "step": 1925 + }, + { + "epoch": 0.26, + "learning_rate": 1.7392596595514726e-06, + "loss": 2.0417, + "step": 1930 + }, + { + "epoch": 0.26, + "learning_rate": 1.7385841664415023e-06, + "loss": 2.1326, + "step": 1935 + }, + { + "epoch": 0.26, + "learning_rate": 1.737908673331532e-06, + "loss": 2.0473, + "step": 1940 + }, + { + "epoch": 0.26, + "learning_rate": 1.7372331802215615e-06, + "loss": 2.1349, + "step": 1945 + }, + { + "epoch": 0.26, + "learning_rate": 1.7365576871115915e-06, + "loss": 2.0019, + "step": 1950 + }, + { + "epoch": 0.26, + "learning_rate": 1.735882194001621e-06, + "loss": 2.1083, + "step": 1955 + }, + { + "epoch": 0.26, + "learning_rate": 1.735206700891651e-06, + "loss": 2.0901, + "step": 1960 + }, + { + "epoch": 0.27, + "learning_rate": 1.7345312077816804e-06, + "loss": 2.1184, + "step": 1965 + }, + { + "epoch": 0.27, + "learning_rate": 1.7338557146717104e-06, + "loss": 2.121, + "step": 1970 + }, + { + "epoch": 0.27, + "learning_rate": 1.7331802215617399e-06, + "loss": 1.9879, + "step": 1975 + }, + { + "epoch": 0.27, + "learning_rate": 1.7325047284517698e-06, + "loss": 2.1164, + "step": 1980 + }, + { + "epoch": 0.27, + "learning_rate": 1.7318292353417993e-06, + "loss": 2.118, + "step": 1985 + }, + { + "epoch": 0.27, + "learning_rate": 1.7311537422318293e-06, + "loss": 2.1429, + "step": 1990 + }, + { + "epoch": 0.27, + "learning_rate": 1.7304782491218588e-06, + "loss": 2.1996, + "step": 1995 + }, + { + "epoch": 0.27, + "learning_rate": 1.7298027560118887e-06, + "loss": 2.1266, + "step": 2000 + }, + { + "epoch": 0.27, + "eval_loss": 2.1100127696990967, + "eval_runtime": 165.3022, + "eval_samples_per_second": 3.606, + "eval_steps_per_second": 0.454, + "step": 2000 + }, + { + "epoch": 0.27, + "learning_rate": 1.7291272629019182e-06, + "loss": 2.2143, + "step": 2005 + }, + { + "epoch": 0.27, + "learning_rate": 1.728451769791948e-06, + "loss": 2.1678, + "step": 2010 + }, + { + "epoch": 0.27, + "learning_rate": 1.7277762766819777e-06, + "loss": 2.1279, + "step": 2015 + }, + { + "epoch": 0.27, + "learning_rate": 1.7271007835720074e-06, + "loss": 2.2299, + "step": 2020 + }, + { + "epoch": 0.27, + "learning_rate": 1.7264252904620371e-06, + "loss": 2.1529, + "step": 2025 + }, + { + "epoch": 0.27, + "learning_rate": 1.7257497973520669e-06, + "loss": 2.1269, + "step": 2030 + }, + { + "epoch": 0.27, + "learning_rate": 1.7250743042420966e-06, + "loss": 2.1651, + "step": 2035 + }, + { + "epoch": 0.28, + "learning_rate": 1.7243988111321263e-06, + "loss": 2.0818, + "step": 2040 + }, + { + "epoch": 0.28, + "learning_rate": 1.7237233180221563e-06, + "loss": 2.0841, + "step": 2045 + }, + { + "epoch": 0.28, + "learning_rate": 1.7230478249121858e-06, + "loss": 2.1054, + "step": 2050 + }, + { + "epoch": 0.28, + "learning_rate": 1.7223723318022157e-06, + "loss": 2.1279, + "step": 2055 + }, + { + "epoch": 0.28, + "learning_rate": 1.7216968386922452e-06, + "loss": 2.1461, + "step": 2060 + }, + { + "epoch": 0.28, + "learning_rate": 1.7210213455822752e-06, + "loss": 2.2039, + "step": 2065 + }, + { + "epoch": 0.28, + "learning_rate": 1.7203458524723047e-06, + "loss": 2.2047, + "step": 2070 + }, + { + "epoch": 0.28, + "learning_rate": 1.7196703593623344e-06, + "loss": 2.11, + "step": 2075 + }, + { + "epoch": 0.28, + "learning_rate": 1.7189948662523641e-06, + "loss": 2.1663, + "step": 2080 + }, + { + "epoch": 0.28, + "learning_rate": 1.7183193731423939e-06, + "loss": 2.0122, + "step": 2085 + }, + { + "epoch": 0.28, + "learning_rate": 1.7176438800324236e-06, + "loss": 2.1258, + "step": 2090 + }, + { + "epoch": 0.28, + "learning_rate": 1.7169683869224533e-06, + "loss": 2.103, + "step": 2095 + }, + { + "epoch": 0.28, + "learning_rate": 1.716292893812483e-06, + "loss": 2.181, + "step": 2100 + }, + { + "epoch": 0.28, + "learning_rate": 1.7156174007025128e-06, + "loss": 2.1502, + "step": 2105 + }, + { + "epoch": 0.29, + "learning_rate": 1.7149419075925425e-06, + "loss": 2.1905, + "step": 2110 + }, + { + "epoch": 0.29, + "learning_rate": 1.7142664144825722e-06, + "loss": 2.1424, + "step": 2115 + }, + { + "epoch": 0.29, + "learning_rate": 1.713590921372602e-06, + "loss": 2.1408, + "step": 2120 + }, + { + "epoch": 0.29, + "learning_rate": 1.7129154282626317e-06, + "loss": 2.0876, + "step": 2125 + }, + { + "epoch": 0.29, + "learning_rate": 1.7122399351526614e-06, + "loss": 2.1308, + "step": 2130 + }, + { + "epoch": 0.29, + "learning_rate": 1.7115644420426911e-06, + "loss": 2.13, + "step": 2135 + }, + { + "epoch": 0.29, + "learning_rate": 1.7108889489327206e-06, + "loss": 2.1297, + "step": 2140 + }, + { + "epoch": 0.29, + "learning_rate": 1.7102134558227506e-06, + "loss": 2.146, + "step": 2145 + }, + { + "epoch": 0.29, + "learning_rate": 1.70953796271278e-06, + "loss": 2.1063, + "step": 2150 + }, + { + "epoch": 0.29, + "learning_rate": 1.70886246960281e-06, + "loss": 2.1605, + "step": 2155 + }, + { + "epoch": 0.29, + "learning_rate": 1.7081869764928395e-06, + "loss": 2.1093, + "step": 2160 + }, + { + "epoch": 0.29, + "learning_rate": 1.7075114833828695e-06, + "loss": 2.1206, + "step": 2165 + }, + { + "epoch": 0.29, + "learning_rate": 1.706835990272899e-06, + "loss": 2.076, + "step": 2170 + }, + { + "epoch": 0.29, + "learning_rate": 1.706160497162929e-06, + "loss": 2.1505, + "step": 2175 + }, + { + "epoch": 0.29, + "learning_rate": 1.7054850040529584e-06, + "loss": 2.1548, + "step": 2180 + }, + { + "epoch": 0.3, + "learning_rate": 1.7048095109429884e-06, + "loss": 2.098, + "step": 2185 + }, + { + "epoch": 0.3, + "learning_rate": 1.7041340178330181e-06, + "loss": 2.1067, + "step": 2190 + }, + { + "epoch": 0.3, + "learning_rate": 1.7034585247230478e-06, + "loss": 2.0598, + "step": 2195 + }, + { + "epoch": 0.3, + "learning_rate": 1.7027830316130776e-06, + "loss": 2.1151, + "step": 2200 + }, + { + "epoch": 0.3, + "learning_rate": 1.702107538503107e-06, + "loss": 2.0832, + "step": 2205 + }, + { + "epoch": 0.3, + "learning_rate": 1.701432045393137e-06, + "loss": 2.1308, + "step": 2210 + }, + { + "epoch": 0.3, + "learning_rate": 1.7007565522831665e-06, + "loss": 2.1276, + "step": 2215 + }, + { + "epoch": 0.3, + "learning_rate": 1.7000810591731965e-06, + "loss": 2.0819, + "step": 2220 + }, + { + "epoch": 0.3, + "learning_rate": 1.699405566063226e-06, + "loss": 2.113, + "step": 2225 + }, + { + "epoch": 0.3, + "learning_rate": 1.698730072953256e-06, + "loss": 2.1185, + "step": 2230 + }, + { + "epoch": 0.3, + "learning_rate": 1.6980545798432854e-06, + "loss": 2.1705, + "step": 2235 + }, + { + "epoch": 0.3, + "learning_rate": 1.6973790867333154e-06, + "loss": 2.1164, + "step": 2240 + }, + { + "epoch": 0.3, + "learning_rate": 1.6967035936233449e-06, + "loss": 2.0782, + "step": 2245 + }, + { + "epoch": 0.3, + "learning_rate": 1.6960281005133748e-06, + "loss": 2.1195, + "step": 2250 + }, + { + "epoch": 0.3, + "learning_rate": 1.6953526074034043e-06, + "loss": 2.0733, + "step": 2255 + }, + { + "epoch": 0.31, + "learning_rate": 1.6946771142934343e-06, + "loss": 2.1518, + "step": 2260 + }, + { + "epoch": 0.31, + "learning_rate": 1.6940016211834638e-06, + "loss": 2.0086, + "step": 2265 + }, + { + "epoch": 0.31, + "learning_rate": 1.6933261280734935e-06, + "loss": 2.0694, + "step": 2270 + }, + { + "epoch": 0.31, + "learning_rate": 1.6926506349635232e-06, + "loss": 2.1871, + "step": 2275 + }, + { + "epoch": 0.31, + "learning_rate": 1.691975141853553e-06, + "loss": 2.1151, + "step": 2280 + }, + { + "epoch": 0.31, + "learning_rate": 1.6912996487435827e-06, + "loss": 2.1302, + "step": 2285 + }, + { + "epoch": 0.31, + "learning_rate": 1.6906241556336124e-06, + "loss": 2.0942, + "step": 2290 + }, + { + "epoch": 0.31, + "learning_rate": 1.6899486625236422e-06, + "loss": 2.1311, + "step": 2295 + }, + { + "epoch": 0.31, + "learning_rate": 1.6892731694136719e-06, + "loss": 2.0993, + "step": 2300 + }, + { + "epoch": 0.31, + "learning_rate": 1.6885976763037016e-06, + "loss": 2.1066, + "step": 2305 + }, + { + "epoch": 0.31, + "learning_rate": 1.6879221831937313e-06, + "loss": 2.1216, + "step": 2310 + }, + { + "epoch": 0.31, + "learning_rate": 1.687246690083761e-06, + "loss": 2.0656, + "step": 2315 + }, + { + "epoch": 0.31, + "learning_rate": 1.6865711969737908e-06, + "loss": 2.0993, + "step": 2320 + }, + { + "epoch": 0.31, + "learning_rate": 1.6858957038638205e-06, + "loss": 2.0928, + "step": 2325 + }, + { + "epoch": 0.31, + "learning_rate": 1.6852202107538502e-06, + "loss": 2.1319, + "step": 2330 + }, + { + "epoch": 0.32, + "learning_rate": 1.68454471764388e-06, + "loss": 2.1125, + "step": 2335 + }, + { + "epoch": 0.32, + "learning_rate": 1.6838692245339097e-06, + "loss": 1.9319, + "step": 2340 + }, + { + "epoch": 0.32, + "learning_rate": 1.6831937314239394e-06, + "loss": 2.1942, + "step": 2345 + }, + { + "epoch": 0.32, + "learning_rate": 1.6825182383139691e-06, + "loss": 2.1001, + "step": 2350 + }, + { + "epoch": 0.32, + "learning_rate": 1.6818427452039989e-06, + "loss": 2.1189, + "step": 2355 + }, + { + "epoch": 0.32, + "learning_rate": 1.6811672520940286e-06, + "loss": 2.1774, + "step": 2360 + }, + { + "epoch": 0.32, + "learning_rate": 1.6804917589840583e-06, + "loss": 2.1566, + "step": 2365 + }, + { + "epoch": 0.32, + "learning_rate": 1.679816265874088e-06, + "loss": 2.1077, + "step": 2370 + }, + { + "epoch": 0.32, + "learning_rate": 1.6791407727641178e-06, + "loss": 1.994, + "step": 2375 + }, + { + "epoch": 0.32, + "learning_rate": 1.6784652796541475e-06, + "loss": 2.1114, + "step": 2380 + }, + { + "epoch": 0.32, + "learning_rate": 1.6777897865441772e-06, + "loss": 2.1286, + "step": 2385 + }, + { + "epoch": 0.32, + "learning_rate": 1.677114293434207e-06, + "loss": 2.1445, + "step": 2390 + }, + { + "epoch": 0.32, + "learning_rate": 1.6764388003242367e-06, + "loss": 2.0908, + "step": 2395 + }, + { + "epoch": 0.32, + "learning_rate": 1.6757633072142662e-06, + "loss": 2.0279, + "step": 2400 + }, + { + "epoch": 0.32, + "eval_loss": 2.090486764907837, + "eval_runtime": 165.4313, + "eval_samples_per_second": 3.603, + "eval_steps_per_second": 0.453, + "step": 2400 + }, + { + "epoch": 0.32, + "learning_rate": 1.6750878141042961e-06, + "loss": 2.0959, + "step": 2405 + }, + { + "epoch": 0.33, + "learning_rate": 1.6744123209943256e-06, + "loss": 2.1049, + "step": 2410 + }, + { + "epoch": 0.33, + "learning_rate": 1.6737368278843556e-06, + "loss": 2.1089, + "step": 2415 + }, + { + "epoch": 0.33, + "learning_rate": 1.673061334774385e-06, + "loss": 2.1387, + "step": 2420 + }, + { + "epoch": 0.33, + "learning_rate": 1.672385841664415e-06, + "loss": 2.1712, + "step": 2425 + }, + { + "epoch": 0.33, + "learning_rate": 1.6717103485544446e-06, + "loss": 2.0841, + "step": 2430 + }, + { + "epoch": 0.33, + "learning_rate": 1.6710348554444745e-06, + "loss": 2.1344, + "step": 2435 + }, + { + "epoch": 0.33, + "learning_rate": 1.670359362334504e-06, + "loss": 2.0958, + "step": 2440 + }, + { + "epoch": 0.33, + "learning_rate": 1.669683869224534e-06, + "loss": 2.0917, + "step": 2445 + }, + { + "epoch": 0.33, + "learning_rate": 1.6690083761145635e-06, + "loss": 2.0386, + "step": 2450 + }, + { + "epoch": 0.33, + "learning_rate": 1.6683328830045934e-06, + "loss": 2.0025, + "step": 2455 + }, + { + "epoch": 0.33, + "learning_rate": 1.667657389894623e-06, + "loss": 2.1355, + "step": 2460 + }, + { + "epoch": 0.33, + "learning_rate": 1.6669818967846526e-06, + "loss": 2.1008, + "step": 2465 + }, + { + "epoch": 0.33, + "learning_rate": 1.6663064036746824e-06, + "loss": 2.0849, + "step": 2470 + }, + { + "epoch": 0.33, + "learning_rate": 1.665630910564712e-06, + "loss": 2.1699, + "step": 2475 + }, + { + "epoch": 0.34, + "learning_rate": 1.664955417454742e-06, + "loss": 2.0264, + "step": 2480 + }, + { + "epoch": 0.34, + "learning_rate": 1.6642799243447715e-06, + "loss": 2.0421, + "step": 2485 + }, + { + "epoch": 0.34, + "learning_rate": 1.6636044312348015e-06, + "loss": 2.1003, + "step": 2490 + }, + { + "epoch": 0.34, + "learning_rate": 1.662928938124831e-06, + "loss": 2.1109, + "step": 2495 + }, + { + "epoch": 0.34, + "learning_rate": 1.662253445014861e-06, + "loss": 2.1525, + "step": 2500 + }, + { + "epoch": 0.34, + "learning_rate": 1.6615779519048904e-06, + "loss": 2.0433, + "step": 2505 + }, + { + "epoch": 0.34, + "learning_rate": 1.6609024587949204e-06, + "loss": 2.0748, + "step": 2510 + }, + { + "epoch": 0.34, + "learning_rate": 1.66022696568495e-06, + "loss": 2.1392, + "step": 2515 + }, + { + "epoch": 0.34, + "learning_rate": 1.6595514725749798e-06, + "loss": 2.0972, + "step": 2520 + }, + { + "epoch": 0.34, + "learning_rate": 1.6588759794650093e-06, + "loss": 2.1862, + "step": 2525 + }, + { + "epoch": 0.34, + "learning_rate": 1.658200486355039e-06, + "loss": 2.1236, + "step": 2530 + }, + { + "epoch": 0.34, + "learning_rate": 1.6575249932450688e-06, + "loss": 2.19, + "step": 2535 + }, + { + "epoch": 0.34, + "learning_rate": 1.6568495001350985e-06, + "loss": 2.1086, + "step": 2540 + }, + { + "epoch": 0.34, + "learning_rate": 1.6561740070251283e-06, + "loss": 2.133, + "step": 2545 + }, + { + "epoch": 0.34, + "learning_rate": 1.655498513915158e-06, + "loss": 2.0417, + "step": 2550 + }, + { + "epoch": 0.35, + "learning_rate": 1.6548230208051877e-06, + "loss": 2.1212, + "step": 2555 + }, + { + "epoch": 0.35, + "learning_rate": 1.6541475276952174e-06, + "loss": 2.0668, + "step": 2560 + }, + { + "epoch": 0.35, + "learning_rate": 1.6534720345852472e-06, + "loss": 2.1242, + "step": 2565 + }, + { + "epoch": 0.35, + "learning_rate": 1.6527965414752769e-06, + "loss": 2.0738, + "step": 2570 + }, + { + "epoch": 0.35, + "learning_rate": 1.6521210483653066e-06, + "loss": 2.1083, + "step": 2575 + }, + { + "epoch": 0.35, + "learning_rate": 1.6514455552553363e-06, + "loss": 2.0711, + "step": 2580 + }, + { + "epoch": 0.35, + "learning_rate": 1.650770062145366e-06, + "loss": 2.0547, + "step": 2585 + }, + { + "epoch": 0.35, + "learning_rate": 1.6500945690353958e-06, + "loss": 2.0984, + "step": 2590 + }, + { + "epoch": 0.35, + "learning_rate": 1.6494190759254253e-06, + "loss": 2.0657, + "step": 2595 + }, + { + "epoch": 0.35, + "learning_rate": 1.6487435828154552e-06, + "loss": 2.1342, + "step": 2600 + }, + { + "epoch": 0.35, + "learning_rate": 1.6480680897054848e-06, + "loss": 2.1238, + "step": 2605 + }, + { + "epoch": 0.35, + "learning_rate": 1.6473925965955147e-06, + "loss": 2.0945, + "step": 2610 + }, + { + "epoch": 0.35, + "learning_rate": 1.6467171034855442e-06, + "loss": 2.1569, + "step": 2615 + }, + { + "epoch": 0.35, + "learning_rate": 1.6460416103755741e-06, + "loss": 2.055, + "step": 2620 + }, + { + "epoch": 0.35, + "learning_rate": 1.6453661172656039e-06, + "loss": 2.1235, + "step": 2625 + }, + { + "epoch": 0.36, + "learning_rate": 1.6446906241556336e-06, + "loss": 2.0776, + "step": 2630 + }, + { + "epoch": 0.36, + "learning_rate": 1.6440151310456633e-06, + "loss": 2.1014, + "step": 2635 + }, + { + "epoch": 0.36, + "learning_rate": 1.643339637935693e-06, + "loss": 2.0771, + "step": 2640 + }, + { + "epoch": 0.36, + "learning_rate": 1.6426641448257228e-06, + "loss": 2.0169, + "step": 2645 + }, + { + "epoch": 0.36, + "learning_rate": 1.6419886517157525e-06, + "loss": 2.0718, + "step": 2650 + }, + { + "epoch": 0.36, + "learning_rate": 1.6413131586057822e-06, + "loss": 2.1339, + "step": 2655 + }, + { + "epoch": 0.36, + "learning_rate": 1.6406376654958117e-06, + "loss": 2.1413, + "step": 2660 + }, + { + "epoch": 0.36, + "learning_rate": 1.6399621723858417e-06, + "loss": 2.1967, + "step": 2665 + }, + { + "epoch": 0.36, + "learning_rate": 1.6392866792758712e-06, + "loss": 2.0654, + "step": 2670 + }, + { + "epoch": 0.36, + "learning_rate": 1.6386111861659011e-06, + "loss": 2.0067, + "step": 2675 + }, + { + "epoch": 0.36, + "learning_rate": 1.6379356930559307e-06, + "loss": 1.9827, + "step": 2680 + }, + { + "epoch": 0.36, + "learning_rate": 1.6372601999459606e-06, + "loss": 2.1348, + "step": 2685 + }, + { + "epoch": 0.36, + "learning_rate": 1.63658470683599e-06, + "loss": 2.0717, + "step": 2690 + }, + { + "epoch": 0.36, + "learning_rate": 1.63590921372602e-06, + "loss": 2.0616, + "step": 2695 + }, + { + "epoch": 0.36, + "learning_rate": 1.6352337206160496e-06, + "loss": 2.1373, + "step": 2700 + }, + { + "epoch": 0.37, + "learning_rate": 1.6345582275060795e-06, + "loss": 2.07, + "step": 2705 + }, + { + "epoch": 0.37, + "learning_rate": 1.633882734396109e-06, + "loss": 2.0335, + "step": 2710 + }, + { + "epoch": 0.37, + "learning_rate": 1.633207241286139e-06, + "loss": 2.119, + "step": 2715 + }, + { + "epoch": 0.37, + "learning_rate": 1.6325317481761685e-06, + "loss": 2.0989, + "step": 2720 + }, + { + "epoch": 0.37, + "learning_rate": 1.6318562550661982e-06, + "loss": 2.1273, + "step": 2725 + }, + { + "epoch": 0.37, + "learning_rate": 1.631180761956228e-06, + "loss": 2.0868, + "step": 2730 + }, + { + "epoch": 0.37, + "learning_rate": 1.6305052688462576e-06, + "loss": 2.0753, + "step": 2735 + }, + { + "epoch": 0.37, + "learning_rate": 1.6298297757362874e-06, + "loss": 2.0779, + "step": 2740 + }, + { + "epoch": 0.37, + "learning_rate": 1.629154282626317e-06, + "loss": 1.9635, + "step": 2745 + }, + { + "epoch": 0.37, + "learning_rate": 1.6284787895163468e-06, + "loss": 2.0501, + "step": 2750 + }, + { + "epoch": 0.37, + "learning_rate": 1.6278032964063765e-06, + "loss": 2.0689, + "step": 2755 + }, + { + "epoch": 0.37, + "learning_rate": 1.6271278032964063e-06, + "loss": 2.0779, + "step": 2760 + }, + { + "epoch": 0.37, + "learning_rate": 1.626452310186436e-06, + "loss": 2.0272, + "step": 2765 + }, + { + "epoch": 0.37, + "learning_rate": 1.625776817076466e-06, + "loss": 2.1481, + "step": 2770 + }, + { + "epoch": 0.37, + "learning_rate": 1.6251013239664955e-06, + "loss": 2.0285, + "step": 2775 + }, + { + "epoch": 0.38, + "learning_rate": 1.6244258308565254e-06, + "loss": 2.013, + "step": 2780 + }, + { + "epoch": 0.38, + "learning_rate": 1.623750337746555e-06, + "loss": 2.1225, + "step": 2785 + }, + { + "epoch": 0.38, + "learning_rate": 1.6230748446365846e-06, + "loss": 2.067, + "step": 2790 + }, + { + "epoch": 0.38, + "learning_rate": 1.6223993515266144e-06, + "loss": 2.1046, + "step": 2795 + }, + { + "epoch": 0.38, + "learning_rate": 1.621723858416644e-06, + "loss": 2.0699, + "step": 2800 + }, + { + "epoch": 0.38, + "eval_loss": 2.0739200115203857, + "eval_runtime": 165.5539, + "eval_samples_per_second": 3.6, + "eval_steps_per_second": 0.453, + "step": 2800 + }, + { + "epoch": 0.38, + "learning_rate": 1.6210483653066738e-06, + "loss": 2.119, + "step": 2805 + }, + { + "epoch": 0.38, + "learning_rate": 1.6203728721967035e-06, + "loss": 2.0534, + "step": 2810 + }, + { + "epoch": 0.38, + "learning_rate": 1.6196973790867333e-06, + "loss": 2.1377, + "step": 2815 + }, + { + "epoch": 0.38, + "learning_rate": 1.619021885976763e-06, + "loss": 2.0943, + "step": 2820 + }, + { + "epoch": 0.38, + "learning_rate": 1.6183463928667927e-06, + "loss": 2.1415, + "step": 2825 + }, + { + "epoch": 0.38, + "learning_rate": 1.6176708997568224e-06, + "loss": 2.0923, + "step": 2830 + }, + { + "epoch": 0.38, + "learning_rate": 1.6169954066468522e-06, + "loss": 2.0528, + "step": 2835 + }, + { + "epoch": 0.38, + "learning_rate": 1.6163199135368819e-06, + "loss": 2.0935, + "step": 2840 + }, + { + "epoch": 0.38, + "learning_rate": 1.6156444204269116e-06, + "loss": 2.1159, + "step": 2845 + }, + { + "epoch": 0.39, + "learning_rate": 1.6149689273169413e-06, + "loss": 2.0728, + "step": 2850 + }, + { + "epoch": 0.39, + "learning_rate": 1.6142934342069709e-06, + "loss": 2.0722, + "step": 2855 + }, + { + "epoch": 0.39, + "learning_rate": 1.6136179410970008e-06, + "loss": 2.0461, + "step": 2860 + }, + { + "epoch": 0.39, + "learning_rate": 1.6129424479870303e-06, + "loss": 2.1287, + "step": 2865 + }, + { + "epoch": 0.39, + "learning_rate": 1.6122669548770603e-06, + "loss": 2.0648, + "step": 2870 + }, + { + "epoch": 0.39, + "learning_rate": 1.6115914617670898e-06, + "loss": 2.0533, + "step": 2875 + }, + { + "epoch": 0.39, + "learning_rate": 1.6109159686571197e-06, + "loss": 2.0866, + "step": 2880 + }, + { + "epoch": 0.39, + "learning_rate": 1.6102404755471492e-06, + "loss": 2.1042, + "step": 2885 + }, + { + "epoch": 0.39, + "learning_rate": 1.6095649824371792e-06, + "loss": 2.1186, + "step": 2890 + }, + { + "epoch": 0.39, + "learning_rate": 1.6088894893272087e-06, + "loss": 2.1474, + "step": 2895 + }, + { + "epoch": 0.39, + "learning_rate": 1.6082139962172386e-06, + "loss": 2.1159, + "step": 2900 + }, + { + "epoch": 0.39, + "learning_rate": 1.6075385031072681e-06, + "loss": 2.0443, + "step": 2905 + }, + { + "epoch": 0.39, + "learning_rate": 1.606863009997298e-06, + "loss": 2.0029, + "step": 2910 + }, + { + "epoch": 0.39, + "learning_rate": 1.6061875168873278e-06, + "loss": 2.0684, + "step": 2915 + }, + { + "epoch": 0.39, + "learning_rate": 1.6055120237773573e-06, + "loss": 1.9991, + "step": 2920 + }, + { + "epoch": 0.4, + "learning_rate": 1.6048365306673872e-06, + "loss": 2.0945, + "step": 2925 + }, + { + "epoch": 0.4, + "learning_rate": 1.6041610375574168e-06, + "loss": 2.0717, + "step": 2930 + }, + { + "epoch": 0.4, + "learning_rate": 1.6034855444474467e-06, + "loss": 2.0839, + "step": 2935 + }, + { + "epoch": 0.4, + "learning_rate": 1.6028100513374762e-06, + "loss": 2.1329, + "step": 2940 + }, + { + "epoch": 0.4, + "learning_rate": 1.6021345582275061e-06, + "loss": 2.0077, + "step": 2945 + }, + { + "epoch": 0.4, + "learning_rate": 1.6014590651175357e-06, + "loss": 2.0517, + "step": 2950 + }, + { + "epoch": 0.4, + "learning_rate": 1.6007835720075656e-06, + "loss": 2.0322, + "step": 2955 + }, + { + "epoch": 0.4, + "learning_rate": 1.6001080788975951e-06, + "loss": 2.055, + "step": 2960 + }, + { + "epoch": 0.4, + "learning_rate": 1.599432585787625e-06, + "loss": 2.1037, + "step": 2965 + }, + { + "epoch": 0.4, + "learning_rate": 1.5987570926776546e-06, + "loss": 2.059, + "step": 2970 + }, + { + "epoch": 0.4, + "learning_rate": 1.5980815995676845e-06, + "loss": 2.0551, + "step": 2975 + }, + { + "epoch": 0.4, + "learning_rate": 1.597406106457714e-06, + "loss": 2.0848, + "step": 2980 + }, + { + "epoch": 0.4, + "learning_rate": 1.5967306133477437e-06, + "loss": 2.0801, + "step": 2985 + }, + { + "epoch": 0.4, + "learning_rate": 1.5960551202377735e-06, + "loss": 2.0898, + "step": 2990 + }, + { + "epoch": 0.4, + "learning_rate": 1.5953796271278032e-06, + "loss": 2.0975, + "step": 2995 + }, + { + "epoch": 0.41, + "learning_rate": 1.594704134017833e-06, + "loss": 2.0278, + "step": 3000 + }, + { + "epoch": 0.41, + "learning_rate": 1.5940286409078626e-06, + "loss": 2.0342, + "step": 3005 + }, + { + "epoch": 0.41, + "learning_rate": 1.5933531477978924e-06, + "loss": 2.068, + "step": 3010 + }, + { + "epoch": 0.41, + "learning_rate": 1.592677654687922e-06, + "loss": 2.0488, + "step": 3015 + }, + { + "epoch": 0.41, + "learning_rate": 1.5920021615779518e-06, + "loss": 2.0348, + "step": 3020 + }, + { + "epoch": 0.41, + "learning_rate": 1.5913266684679816e-06, + "loss": 2.0646, + "step": 3025 + }, + { + "epoch": 0.41, + "learning_rate": 1.5906511753580113e-06, + "loss": 2.0238, + "step": 3030 + }, + { + "epoch": 0.41, + "learning_rate": 1.589975682248041e-06, + "loss": 2.1214, + "step": 3035 + }, + { + "epoch": 0.41, + "learning_rate": 1.5893001891380705e-06, + "loss": 2.0474, + "step": 3040 + }, + { + "epoch": 0.41, + "learning_rate": 1.5886246960281005e-06, + "loss": 2.0923, + "step": 3045 + }, + { + "epoch": 0.41, + "learning_rate": 1.58794920291813e-06, + "loss": 2.0824, + "step": 3050 + }, + { + "epoch": 0.41, + "learning_rate": 1.58727370980816e-06, + "loss": 2.0408, + "step": 3055 + }, + { + "epoch": 0.41, + "learning_rate": 1.5865982166981896e-06, + "loss": 2.0531, + "step": 3060 + }, + { + "epoch": 0.41, + "learning_rate": 1.5859227235882194e-06, + "loss": 2.0216, + "step": 3065 + }, + { + "epoch": 0.41, + "learning_rate": 1.585247230478249e-06, + "loss": 2.1115, + "step": 3070 + }, + { + "epoch": 0.42, + "learning_rate": 1.5845717373682788e-06, + "loss": 2.0333, + "step": 3075 + }, + { + "epoch": 0.42, + "learning_rate": 1.5838962442583085e-06, + "loss": 2.0941, + "step": 3080 + }, + { + "epoch": 0.42, + "learning_rate": 1.5832207511483383e-06, + "loss": 2.0995, + "step": 3085 + }, + { + "epoch": 0.42, + "learning_rate": 1.582545258038368e-06, + "loss": 2.1471, + "step": 3090 + }, + { + "epoch": 0.42, + "learning_rate": 1.5818697649283977e-06, + "loss": 1.9764, + "step": 3095 + }, + { + "epoch": 0.42, + "learning_rate": 1.5811942718184274e-06, + "loss": 2.0089, + "step": 3100 + }, + { + "epoch": 0.42, + "learning_rate": 1.580518778708457e-06, + "loss": 2.1237, + "step": 3105 + }, + { + "epoch": 0.42, + "learning_rate": 1.579843285598487e-06, + "loss": 2.1105, + "step": 3110 + }, + { + "epoch": 0.42, + "learning_rate": 1.5791677924885164e-06, + "loss": 2.1469, + "step": 3115 + }, + { + "epoch": 0.42, + "learning_rate": 1.5784922993785464e-06, + "loss": 2.0442, + "step": 3120 + }, + { + "epoch": 0.42, + "learning_rate": 1.5778168062685759e-06, + "loss": 2.194, + "step": 3125 + }, + { + "epoch": 0.42, + "learning_rate": 1.5771413131586058e-06, + "loss": 2.0288, + "step": 3130 + }, + { + "epoch": 0.42, + "learning_rate": 1.5764658200486353e-06, + "loss": 2.0379, + "step": 3135 + }, + { + "epoch": 0.42, + "learning_rate": 1.5757903269386653e-06, + "loss": 2.07, + "step": 3140 + }, + { + "epoch": 0.42, + "learning_rate": 1.5751148338286948e-06, + "loss": 2.0844, + "step": 3145 + }, + { + "epoch": 0.43, + "learning_rate": 1.5744393407187247e-06, + "loss": 2.0518, + "step": 3150 + }, + { + "epoch": 0.43, + "learning_rate": 1.5737638476087542e-06, + "loss": 2.0537, + "step": 3155 + }, + { + "epoch": 0.43, + "learning_rate": 1.5730883544987842e-06, + "loss": 2.0722, + "step": 3160 + }, + { + "epoch": 0.43, + "learning_rate": 1.5724128613888137e-06, + "loss": 2.1186, + "step": 3165 + }, + { + "epoch": 0.43, + "learning_rate": 1.5717373682788436e-06, + "loss": 2.0531, + "step": 3170 + }, + { + "epoch": 0.43, + "learning_rate": 1.5710618751688731e-06, + "loss": 2.0955, + "step": 3175 + }, + { + "epoch": 0.43, + "learning_rate": 1.5703863820589029e-06, + "loss": 2.0703, + "step": 3180 + }, + { + "epoch": 0.43, + "learning_rate": 1.5697108889489326e-06, + "loss": 2.1432, + "step": 3185 + }, + { + "epoch": 0.43, + "learning_rate": 1.5690353958389623e-06, + "loss": 2.0921, + "step": 3190 + }, + { + "epoch": 0.43, + "learning_rate": 1.568359902728992e-06, + "loss": 2.0203, + "step": 3195 + }, + { + "epoch": 0.43, + "learning_rate": 1.5676844096190218e-06, + "loss": 2.048, + "step": 3200 + }, + { + "epoch": 0.43, + "eval_loss": 2.060811758041382, + "eval_runtime": 165.4321, + "eval_samples_per_second": 3.603, + "eval_steps_per_second": 0.453, + "step": 3200 + }, + { + "epoch": 0.43, + "learning_rate": 1.5670089165090515e-06, + "loss": 2.0813, + "step": 3205 + }, + { + "epoch": 0.43, + "learning_rate": 1.5663334233990812e-06, + "loss": 2.1681, + "step": 3210 + }, + { + "epoch": 0.43, + "learning_rate": 1.5656579302891112e-06, + "loss": 2.009, + "step": 3215 + }, + { + "epoch": 0.44, + "learning_rate": 1.5649824371791407e-06, + "loss": 2.1227, + "step": 3220 + }, + { + "epoch": 0.44, + "learning_rate": 1.5643069440691706e-06, + "loss": 1.9925, + "step": 3225 + }, + { + "epoch": 0.44, + "learning_rate": 1.5636314509592001e-06, + "loss": 2.0469, + "step": 3230 + }, + { + "epoch": 0.44, + "learning_rate": 1.56295595784923e-06, + "loss": 2.0438, + "step": 3235 + }, + { + "epoch": 0.44, + "learning_rate": 1.5622804647392596e-06, + "loss": 2.0806, + "step": 3240 + }, + { + "epoch": 0.44, + "learning_rate": 1.5616049716292893e-06, + "loss": 2.1108, + "step": 3245 + }, + { + "epoch": 0.44, + "learning_rate": 1.560929478519319e-06, + "loss": 2.0461, + "step": 3250 + }, + { + "epoch": 0.44, + "learning_rate": 1.5602539854093488e-06, + "loss": 2.13, + "step": 3255 + }, + { + "epoch": 0.44, + "learning_rate": 1.5595784922993785e-06, + "loss": 2.1125, + "step": 3260 + }, + { + "epoch": 0.44, + "learning_rate": 1.5589029991894082e-06, + "loss": 2.0172, + "step": 3265 + }, + { + "epoch": 0.44, + "learning_rate": 1.558227506079438e-06, + "loss": 2.1299, + "step": 3270 + }, + { + "epoch": 0.44, + "learning_rate": 1.5575520129694677e-06, + "loss": 2.0406, + "step": 3275 + }, + { + "epoch": 0.44, + "learning_rate": 1.5568765198594974e-06, + "loss": 2.0501, + "step": 3280 + }, + { + "epoch": 0.44, + "learning_rate": 1.5562010267495271e-06, + "loss": 2.0377, + "step": 3285 + }, + { + "epoch": 0.44, + "learning_rate": 1.5555255336395568e-06, + "loss": 2.0649, + "step": 3290 + }, + { + "epoch": 0.45, + "learning_rate": 1.5548500405295866e-06, + "loss": 2.0212, + "step": 3295 + }, + { + "epoch": 0.45, + "learning_rate": 1.554174547419616e-06, + "loss": 2.0358, + "step": 3300 + }, + { + "epoch": 0.45, + "learning_rate": 1.553499054309646e-06, + "loss": 2.1017, + "step": 3305 + }, + { + "epoch": 0.45, + "learning_rate": 1.5528235611996755e-06, + "loss": 1.9907, + "step": 3310 + }, + { + "epoch": 0.45, + "learning_rate": 1.5521480680897055e-06, + "loss": 2.0097, + "step": 3315 + }, + { + "epoch": 0.45, + "learning_rate": 1.551472574979735e-06, + "loss": 2.0266, + "step": 3320 + }, + { + "epoch": 0.45, + "learning_rate": 1.550797081869765e-06, + "loss": 2.1016, + "step": 3325 + }, + { + "epoch": 0.45, + "learning_rate": 1.5501215887597944e-06, + "loss": 2.162, + "step": 3330 + }, + { + "epoch": 0.45, + "learning_rate": 1.5494460956498244e-06, + "loss": 2.0395, + "step": 3335 + }, + { + "epoch": 0.45, + "learning_rate": 1.5487706025398539e-06, + "loss": 2.0559, + "step": 3340 + }, + { + "epoch": 0.45, + "learning_rate": 1.5480951094298838e-06, + "loss": 2.1894, + "step": 3345 + }, + { + "epoch": 0.45, + "learning_rate": 1.5474196163199133e-06, + "loss": 2.0609, + "step": 3350 + }, + { + "epoch": 0.45, + "learning_rate": 1.5467441232099433e-06, + "loss": 2.1246, + "step": 3355 + }, + { + "epoch": 0.45, + "learning_rate": 1.546068630099973e-06, + "loss": 2.125, + "step": 3360 + }, + { + "epoch": 0.45, + "learning_rate": 1.5453931369900025e-06, + "loss": 2.1067, + "step": 3365 + }, + { + "epoch": 0.46, + "learning_rate": 1.5447176438800325e-06, + "loss": 1.9872, + "step": 3370 + }, + { + "epoch": 0.46, + "learning_rate": 1.544042150770062e-06, + "loss": 2.1292, + "step": 3375 + }, + { + "epoch": 0.46, + "learning_rate": 1.543366657660092e-06, + "loss": 2.1035, + "step": 3380 + }, + { + "epoch": 0.46, + "learning_rate": 1.5426911645501214e-06, + "loss": 2.1686, + "step": 3385 + }, + { + "epoch": 0.46, + "learning_rate": 1.5420156714401514e-06, + "loss": 2.0928, + "step": 3390 + }, + { + "epoch": 0.46, + "learning_rate": 1.5413401783301809e-06, + "loss": 2.0584, + "step": 3395 + }, + { + "epoch": 0.46, + "learning_rate": 1.5406646852202108e-06, + "loss": 2.0046, + "step": 3400 + }, + { + "epoch": 0.46, + "learning_rate": 1.5399891921102403e-06, + "loss": 2.0863, + "step": 3405 + }, + { + "epoch": 0.46, + "learning_rate": 1.5393136990002703e-06, + "loss": 2.0954, + "step": 3410 + }, + { + "epoch": 0.46, + "learning_rate": 1.5386382058902998e-06, + "loss": 2.0727, + "step": 3415 + }, + { + "epoch": 0.46, + "learning_rate": 1.5379627127803297e-06, + "loss": 2.1576, + "step": 3420 + }, + { + "epoch": 0.46, + "learning_rate": 1.5372872196703592e-06, + "loss": 2.046, + "step": 3425 + }, + { + "epoch": 0.46, + "learning_rate": 1.5366117265603892e-06, + "loss": 2.0489, + "step": 3430 + }, + { + "epoch": 0.46, + "learning_rate": 1.5359362334504187e-06, + "loss": 2.0607, + "step": 3435 + }, + { + "epoch": 0.46, + "learning_rate": 1.5352607403404484e-06, + "loss": 1.995, + "step": 3440 + }, + { + "epoch": 0.47, + "learning_rate": 1.5345852472304781e-06, + "loss": 2.0571, + "step": 3445 + }, + { + "epoch": 0.47, + "learning_rate": 1.5339097541205079e-06, + "loss": 2.0897, + "step": 3450 + }, + { + "epoch": 0.47, + "learning_rate": 1.5332342610105376e-06, + "loss": 2.0928, + "step": 3455 + }, + { + "epoch": 0.47, + "learning_rate": 1.5325587679005673e-06, + "loss": 2.0929, + "step": 3460 + }, + { + "epoch": 0.47, + "learning_rate": 1.531883274790597e-06, + "loss": 2.123, + "step": 3465 + }, + { + "epoch": 0.47, + "learning_rate": 1.5312077816806268e-06, + "loss": 2.1284, + "step": 3470 + }, + { + "epoch": 0.47, + "learning_rate": 1.5305322885706565e-06, + "loss": 2.0668, + "step": 3475 + }, + { + "epoch": 0.47, + "learning_rate": 1.5298567954606862e-06, + "loss": 2.038, + "step": 3480 + }, + { + "epoch": 0.47, + "learning_rate": 1.529181302350716e-06, + "loss": 2.0609, + "step": 3485 + }, + { + "epoch": 0.47, + "learning_rate": 1.5285058092407457e-06, + "loss": 2.1102, + "step": 3490 + }, + { + "epoch": 0.47, + "learning_rate": 1.5278303161307752e-06, + "loss": 2.0901, + "step": 3495 + }, + { + "epoch": 0.47, + "learning_rate": 1.5271548230208051e-06, + "loss": 2.0164, + "step": 3500 + }, + { + "epoch": 0.47, + "learning_rate": 1.5264793299108349e-06, + "loss": 2.0328, + "step": 3505 + }, + { + "epoch": 0.47, + "learning_rate": 1.5258038368008646e-06, + "loss": 2.131, + "step": 3510 + }, + { + "epoch": 0.47, + "learning_rate": 1.5251283436908943e-06, + "loss": 2.1453, + "step": 3515 + }, + { + "epoch": 0.48, + "learning_rate": 1.524452850580924e-06, + "loss": 2.0078, + "step": 3520 + }, + { + "epoch": 0.48, + "learning_rate": 1.5237773574709538e-06, + "loss": 2.0995, + "step": 3525 + }, + { + "epoch": 0.48, + "learning_rate": 1.5231018643609835e-06, + "loss": 2.0006, + "step": 3530 + }, + { + "epoch": 0.48, + "learning_rate": 1.5224263712510132e-06, + "loss": 2.1118, + "step": 3535 + }, + { + "epoch": 0.48, + "learning_rate": 1.521750878141043e-06, + "loss": 2.1054, + "step": 3540 + }, + { + "epoch": 0.48, + "learning_rate": 1.5210753850310727e-06, + "loss": 2.0164, + "step": 3545 + }, + { + "epoch": 0.48, + "learning_rate": 1.5203998919211024e-06, + "loss": 2.146, + "step": 3550 + }, + { + "epoch": 0.48, + "learning_rate": 1.5197243988111321e-06, + "loss": 2.0721, + "step": 3555 + }, + { + "epoch": 0.48, + "learning_rate": 1.5190489057011616e-06, + "loss": 2.0081, + "step": 3560 + }, + { + "epoch": 0.48, + "learning_rate": 1.5183734125911916e-06, + "loss": 2.1044, + "step": 3565 + }, + { + "epoch": 0.48, + "learning_rate": 1.517697919481221e-06, + "loss": 2.097, + "step": 3570 + }, + { + "epoch": 0.48, + "learning_rate": 1.517022426371251e-06, + "loss": 2.0469, + "step": 3575 + }, + { + "epoch": 0.48, + "learning_rate": 1.5163469332612805e-06, + "loss": 2.0239, + "step": 3580 + }, + { + "epoch": 0.48, + "learning_rate": 1.5156714401513105e-06, + "loss": 2.0602, + "step": 3585 + }, + { + "epoch": 0.48, + "learning_rate": 1.51499594704134e-06, + "loss": 2.0721, + "step": 3590 + }, + { + "epoch": 0.49, + "learning_rate": 1.51432045393137e-06, + "loss": 2.0337, + "step": 3595 + }, + { + "epoch": 0.49, + "learning_rate": 1.5136449608213994e-06, + "loss": 1.8914, + "step": 3600 + }, + { + "epoch": 0.49, + "eval_loss": 2.049622058868408, + "eval_runtime": 165.1588, + "eval_samples_per_second": 3.609, + "eval_steps_per_second": 0.454, + "step": 3600 + }, + { + "epoch": 0.49, + "learning_rate": 1.5129694677114294e-06, + "loss": 1.9725, + "step": 3605 + }, + { + "epoch": 0.49, + "learning_rate": 1.5122939746014589e-06, + "loss": 1.9695, + "step": 3610 + }, + { + "epoch": 0.49, + "learning_rate": 1.5116184814914888e-06, + "loss": 2.0596, + "step": 3615 + }, + { + "epoch": 0.49, + "learning_rate": 1.5109429883815183e-06, + "loss": 1.993, + "step": 3620 + }, + { + "epoch": 0.49, + "learning_rate": 1.510267495271548e-06, + "loss": 2.0617, + "step": 3625 + }, + { + "epoch": 0.49, + "learning_rate": 1.5095920021615778e-06, + "loss": 2.0145, + "step": 3630 + }, + { + "epoch": 0.49, + "learning_rate": 1.5089165090516075e-06, + "loss": 2.1149, + "step": 3635 + }, + { + "epoch": 0.49, + "learning_rate": 1.5082410159416373e-06, + "loss": 2.0825, + "step": 3640 + }, + { + "epoch": 0.49, + "learning_rate": 1.507565522831667e-06, + "loss": 2.0762, + "step": 3645 + }, + { + "epoch": 0.49, + "learning_rate": 1.506890029721697e-06, + "loss": 2.1251, + "step": 3650 + }, + { + "epoch": 0.49, + "learning_rate": 1.5062145366117264e-06, + "loss": 2.0812, + "step": 3655 + }, + { + "epoch": 0.49, + "learning_rate": 1.5055390435017564e-06, + "loss": 2.0488, + "step": 3660 + }, + { + "epoch": 0.5, + "learning_rate": 1.5048635503917859e-06, + "loss": 2.0906, + "step": 3665 + }, + { + "epoch": 0.5, + "learning_rate": 1.5041880572818158e-06, + "loss": 2.0587, + "step": 3670 + }, + { + "epoch": 0.5, + "learning_rate": 1.5035125641718453e-06, + "loss": 2.1736, + "step": 3675 + }, + { + "epoch": 0.5, + "learning_rate": 1.5028370710618753e-06, + "loss": 2.0042, + "step": 3680 + }, + { + "epoch": 0.5, + "learning_rate": 1.5021615779519048e-06, + "loss": 2.0003, + "step": 3685 + }, + { + "epoch": 0.5, + "learning_rate": 1.5014860848419347e-06, + "loss": 2.0667, + "step": 3690 + }, + { + "epoch": 0.5, + "learning_rate": 1.5008105917319642e-06, + "loss": 2.111, + "step": 3695 + }, + { + "epoch": 0.5, + "learning_rate": 1.500135098621994e-06, + "loss": 2.0688, + "step": 3700 + }, + { + "epoch": 0.5, + "learning_rate": 1.4994596055120237e-06, + "loss": 1.9983, + "step": 3705 + }, + { + "epoch": 0.5, + "learning_rate": 1.4987841124020534e-06, + "loss": 2.0898, + "step": 3710 + }, + { + "epoch": 0.5, + "learning_rate": 1.4981086192920831e-06, + "loss": 2.0828, + "step": 3715 + }, + { + "epoch": 0.5, + "learning_rate": 1.4974331261821129e-06, + "loss": 2.0781, + "step": 3720 + }, + { + "epoch": 0.5, + "learning_rate": 1.4967576330721426e-06, + "loss": 2.0078, + "step": 3725 + }, + { + "epoch": 0.5, + "learning_rate": 1.4960821399621723e-06, + "loss": 2.0798, + "step": 3730 + }, + { + "epoch": 0.5, + "learning_rate": 1.495406646852202e-06, + "loss": 2.0405, + "step": 3735 + }, + { + "epoch": 0.51, + "learning_rate": 1.4947311537422318e-06, + "loss": 2.0513, + "step": 3740 + }, + { + "epoch": 0.51, + "learning_rate": 1.4940556606322615e-06, + "loss": 2.0381, + "step": 3745 + }, + { + "epoch": 0.51, + "learning_rate": 1.4933801675222912e-06, + "loss": 2.0066, + "step": 3750 + }, + { + "epoch": 0.51, + "learning_rate": 1.4927046744123207e-06, + "loss": 2.0412, + "step": 3755 + }, + { + "epoch": 0.51, + "learning_rate": 1.4920291813023507e-06, + "loss": 2.0252, + "step": 3760 + }, + { + "epoch": 0.51, + "learning_rate": 1.4913536881923802e-06, + "loss": 2.1475, + "step": 3765 + }, + { + "epoch": 0.51, + "learning_rate": 1.4906781950824101e-06, + "loss": 2.0673, + "step": 3770 + }, + { + "epoch": 0.51, + "learning_rate": 1.4900027019724396e-06, + "loss": 2.0293, + "step": 3775 + }, + { + "epoch": 0.51, + "learning_rate": 1.4893272088624696e-06, + "loss": 2.0817, + "step": 3780 + }, + { + "epoch": 0.51, + "learning_rate": 1.488651715752499e-06, + "loss": 2.08, + "step": 3785 + }, + { + "epoch": 0.51, + "learning_rate": 1.487976222642529e-06, + "loss": 2.044, + "step": 3790 + }, + { + "epoch": 0.51, + "learning_rate": 1.4873007295325588e-06, + "loss": 2.0545, + "step": 3795 + }, + { + "epoch": 0.51, + "learning_rate": 1.4866252364225885e-06, + "loss": 2.0712, + "step": 3800 + }, + { + "epoch": 0.51, + "learning_rate": 1.4859497433126182e-06, + "loss": 2.0178, + "step": 3805 + }, + { + "epoch": 0.51, + "learning_rate": 1.485274250202648e-06, + "loss": 2.0222, + "step": 3810 + }, + { + "epoch": 0.52, + "learning_rate": 1.4845987570926777e-06, + "loss": 2.0417, + "step": 3815 + }, + { + "epoch": 0.52, + "learning_rate": 1.4839232639827072e-06, + "loss": 2.0045, + "step": 3820 + }, + { + "epoch": 0.52, + "learning_rate": 1.4832477708727371e-06, + "loss": 2.048, + "step": 3825 + }, + { + "epoch": 0.52, + "learning_rate": 1.4825722777627666e-06, + "loss": 2.1043, + "step": 3830 + }, + { + "epoch": 0.52, + "learning_rate": 1.4818967846527966e-06, + "loss": 2.0242, + "step": 3835 + }, + { + "epoch": 0.52, + "learning_rate": 1.481221291542826e-06, + "loss": 2.1304, + "step": 3840 + }, + { + "epoch": 0.52, + "learning_rate": 1.480545798432856e-06, + "loss": 2.0436, + "step": 3845 + }, + { + "epoch": 0.52, + "learning_rate": 1.4798703053228855e-06, + "loss": 2.0306, + "step": 3850 + }, + { + "epoch": 0.52, + "learning_rate": 1.4791948122129155e-06, + "loss": 1.9944, + "step": 3855 + }, + { + "epoch": 0.52, + "learning_rate": 1.478519319102945e-06, + "loss": 2.0746, + "step": 3860 + }, + { + "epoch": 0.52, + "learning_rate": 1.477843825992975e-06, + "loss": 2.0468, + "step": 3865 + }, + { + "epoch": 0.52, + "learning_rate": 1.4771683328830044e-06, + "loss": 2.0807, + "step": 3870 + }, + { + "epoch": 0.52, + "learning_rate": 1.4764928397730344e-06, + "loss": 2.1531, + "step": 3875 + }, + { + "epoch": 0.52, + "learning_rate": 1.475817346663064e-06, + "loss": 2.0949, + "step": 3880 + }, + { + "epoch": 0.52, + "learning_rate": 1.4751418535530936e-06, + "loss": 1.9704, + "step": 3885 + }, + { + "epoch": 0.53, + "learning_rate": 1.4744663604431234e-06, + "loss": 1.9872, + "step": 3890 + }, + { + "epoch": 0.53, + "learning_rate": 1.473790867333153e-06, + "loss": 1.997, + "step": 3895 + }, + { + "epoch": 0.53, + "learning_rate": 1.4731153742231828e-06, + "loss": 2.0673, + "step": 3900 + }, + { + "epoch": 0.53, + "learning_rate": 1.4724398811132125e-06, + "loss": 2.0324, + "step": 3905 + }, + { + "epoch": 0.53, + "learning_rate": 1.4717643880032423e-06, + "loss": 2.0306, + "step": 3910 + }, + { + "epoch": 0.53, + "learning_rate": 1.471088894893272e-06, + "loss": 2.0718, + "step": 3915 + }, + { + "epoch": 0.53, + "learning_rate": 1.4704134017833017e-06, + "loss": 2.1204, + "step": 3920 + }, + { + "epoch": 0.53, + "learning_rate": 1.4697379086733314e-06, + "loss": 1.9834, + "step": 3925 + }, + { + "epoch": 0.53, + "learning_rate": 1.4690624155633612e-06, + "loss": 2.0658, + "step": 3930 + }, + { + "epoch": 0.53, + "learning_rate": 1.4683869224533909e-06, + "loss": 2.0699, + "step": 3935 + }, + { + "epoch": 0.53, + "learning_rate": 1.4677114293434208e-06, + "loss": 2.0704, + "step": 3940 + }, + { + "epoch": 0.53, + "learning_rate": 1.4670359362334503e-06, + "loss": 2.0563, + "step": 3945 + }, + { + "epoch": 0.53, + "learning_rate": 1.46636044312348e-06, + "loss": 2.0176, + "step": 3950 + }, + { + "epoch": 0.53, + "learning_rate": 1.4656849500135098e-06, + "loss": 2.067, + "step": 3955 + }, + { + "epoch": 0.53, + "learning_rate": 1.4650094569035395e-06, + "loss": 2.1244, + "step": 3960 + }, + { + "epoch": 0.54, + "learning_rate": 1.4643339637935692e-06, + "loss": 2.0181, + "step": 3965 + }, + { + "epoch": 0.54, + "learning_rate": 1.463658470683599e-06, + "loss": 2.0596, + "step": 3970 + }, + { + "epoch": 0.54, + "learning_rate": 1.4629829775736287e-06, + "loss": 2.0484, + "step": 3975 + }, + { + "epoch": 0.54, + "learning_rate": 1.4623074844636584e-06, + "loss": 2.0901, + "step": 3980 + }, + { + "epoch": 0.54, + "learning_rate": 1.4616319913536882e-06, + "loss": 1.9073, + "step": 3985 + }, + { + "epoch": 0.54, + "learning_rate": 1.4609564982437179e-06, + "loss": 2.0358, + "step": 3990 + }, + { + "epoch": 0.54, + "learning_rate": 1.4602810051337476e-06, + "loss": 2.0291, + "step": 3995 + }, + { + "epoch": 0.54, + "learning_rate": 1.4596055120237773e-06, + "loss": 2.0276, + "step": 4000 + }, + { + "epoch": 0.54, + "eval_loss": 2.039377212524414, + "eval_runtime": 165.26, + "eval_samples_per_second": 3.606, + "eval_steps_per_second": 0.454, + "step": 4000 + }, + { + "epoch": 0.54, + "learning_rate": 1.458930018913807e-06, + "loss": 2.1331, + "step": 4005 + }, + { + "epoch": 0.54, + "learning_rate": 1.4582545258038368e-06, + "loss": 2.0162, + "step": 4010 + }, + { + "epoch": 0.54, + "learning_rate": 1.4575790326938663e-06, + "loss": 1.9811, + "step": 4015 + }, + { + "epoch": 0.54, + "learning_rate": 1.4569035395838962e-06, + "loss": 1.9574, + "step": 4020 + }, + { + "epoch": 0.54, + "learning_rate": 1.4562280464739258e-06, + "loss": 1.9547, + "step": 4025 + }, + { + "epoch": 0.54, + "learning_rate": 1.4555525533639557e-06, + "loss": 2.0814, + "step": 4030 + }, + { + "epoch": 0.55, + "learning_rate": 1.4548770602539852e-06, + "loss": 2.0517, + "step": 4035 + }, + { + "epoch": 0.55, + "learning_rate": 1.4542015671440151e-06, + "loss": 2.0873, + "step": 4040 + }, + { + "epoch": 0.55, + "learning_rate": 1.4535260740340447e-06, + "loss": 2.0353, + "step": 4045 + }, + { + "epoch": 0.55, + "learning_rate": 1.4528505809240746e-06, + "loss": 2.0967, + "step": 4050 + }, + { + "epoch": 0.55, + "learning_rate": 1.4521750878141041e-06, + "loss": 1.9676, + "step": 4055 + }, + { + "epoch": 0.55, + "learning_rate": 1.451499594704134e-06, + "loss": 2.0532, + "step": 4060 + }, + { + "epoch": 0.55, + "learning_rate": 1.4508241015941636e-06, + "loss": 2.0872, + "step": 4065 + }, + { + "epoch": 0.55, + "learning_rate": 1.4501486084841935e-06, + "loss": 2.092, + "step": 4070 + }, + { + "epoch": 0.55, + "learning_rate": 1.449473115374223e-06, + "loss": 1.9767, + "step": 4075 + }, + { + "epoch": 0.55, + "learning_rate": 1.4487976222642527e-06, + "loss": 2.0484, + "step": 4080 + }, + { + "epoch": 0.55, + "learning_rate": 1.4481221291542827e-06, + "loss": 2.0448, + "step": 4085 + }, + { + "epoch": 0.55, + "learning_rate": 1.4474466360443122e-06, + "loss": 2.0531, + "step": 4090 + }, + { + "epoch": 0.55, + "learning_rate": 1.4467711429343421e-06, + "loss": 2.014, + "step": 4095 + }, + { + "epoch": 0.55, + "learning_rate": 1.4460956498243716e-06, + "loss": 2.0123, + "step": 4100 + }, + { + "epoch": 0.55, + "learning_rate": 1.4454201567144016e-06, + "loss": 2.0659, + "step": 4105 + }, + { + "epoch": 0.56, + "learning_rate": 1.444744663604431e-06, + "loss": 2.0284, + "step": 4110 + }, + { + "epoch": 0.56, + "learning_rate": 1.444069170494461e-06, + "loss": 2.1546, + "step": 4115 + }, + { + "epoch": 0.56, + "learning_rate": 1.4433936773844905e-06, + "loss": 2.0555, + "step": 4120 + }, + { + "epoch": 0.56, + "learning_rate": 1.4427181842745205e-06, + "loss": 2.0946, + "step": 4125 + }, + { + "epoch": 0.56, + "learning_rate": 1.44204269116455e-06, + "loss": 2.037, + "step": 4130 + }, + { + "epoch": 0.56, + "learning_rate": 1.44136719805458e-06, + "loss": 2.0163, + "step": 4135 + }, + { + "epoch": 0.56, + "learning_rate": 1.4406917049446095e-06, + "loss": 2.0224, + "step": 4140 + }, + { + "epoch": 0.56, + "learning_rate": 1.4400162118346392e-06, + "loss": 2.0542, + "step": 4145 + }, + { + "epoch": 0.56, + "learning_rate": 1.439340718724669e-06, + "loss": 2.0628, + "step": 4150 + }, + { + "epoch": 0.56, + "learning_rate": 1.4386652256146986e-06, + "loss": 2.0527, + "step": 4155 + }, + { + "epoch": 0.56, + "learning_rate": 1.4379897325047284e-06, + "loss": 1.9956, + "step": 4160 + }, + { + "epoch": 0.56, + "learning_rate": 1.437314239394758e-06, + "loss": 1.9983, + "step": 4165 + }, + { + "epoch": 0.56, + "learning_rate": 1.4366387462847878e-06, + "loss": 2.0351, + "step": 4170 + }, + { + "epoch": 0.56, + "learning_rate": 1.4359632531748175e-06, + "loss": 1.9065, + "step": 4175 + }, + { + "epoch": 0.56, + "learning_rate": 1.4352877600648473e-06, + "loss": 2.0242, + "step": 4180 + }, + { + "epoch": 0.57, + "learning_rate": 1.434612266954877e-06, + "loss": 2.0076, + "step": 4185 + }, + { + "epoch": 0.57, + "learning_rate": 1.4339367738449067e-06, + "loss": 2.0707, + "step": 4190 + }, + { + "epoch": 0.57, + "learning_rate": 1.4332612807349364e-06, + "loss": 2.0493, + "step": 4195 + }, + { + "epoch": 0.57, + "learning_rate": 1.4325857876249662e-06, + "loss": 2.0358, + "step": 4200 + }, + { + "epoch": 0.57, + "learning_rate": 1.431910294514996e-06, + "loss": 1.9821, + "step": 4205 + }, + { + "epoch": 0.57, + "learning_rate": 1.4312348014050254e-06, + "loss": 2.0288, + "step": 4210 + }, + { + "epoch": 0.57, + "learning_rate": 1.4305593082950553e-06, + "loss": 2.0614, + "step": 4215 + }, + { + "epoch": 0.57, + "learning_rate": 1.4298838151850849e-06, + "loss": 2.1316, + "step": 4220 + }, + { + "epoch": 0.57, + "learning_rate": 1.4292083220751148e-06, + "loss": 1.9865, + "step": 4225 + }, + { + "epoch": 0.57, + "learning_rate": 1.4285328289651445e-06, + "loss": 2.1231, + "step": 4230 + }, + { + "epoch": 0.57, + "learning_rate": 1.4278573358551743e-06, + "loss": 2.0609, + "step": 4235 + }, + { + "epoch": 0.57, + "learning_rate": 1.427181842745204e-06, + "loss": 1.995, + "step": 4240 + }, + { + "epoch": 0.57, + "learning_rate": 1.4265063496352337e-06, + "loss": 2.0347, + "step": 4245 + }, + { + "epoch": 0.57, + "learning_rate": 1.4258308565252634e-06, + "loss": 2.0714, + "step": 4250 + }, + { + "epoch": 0.57, + "learning_rate": 1.4251553634152932e-06, + "loss": 1.9985, + "step": 4255 + }, + { + "epoch": 0.58, + "learning_rate": 1.4244798703053229e-06, + "loss": 2.0541, + "step": 4260 + }, + { + "epoch": 0.58, + "learning_rate": 1.4238043771953526e-06, + "loss": 2.0663, + "step": 4265 + }, + { + "epoch": 0.58, + "learning_rate": 1.4231288840853823e-06, + "loss": 2.0048, + "step": 4270 + }, + { + "epoch": 0.58, + "learning_rate": 1.4224533909754119e-06, + "loss": 2.0742, + "step": 4275 + }, + { + "epoch": 0.58, + "learning_rate": 1.4217778978654418e-06, + "loss": 2.0085, + "step": 4280 + }, + { + "epoch": 0.58, + "learning_rate": 1.4211024047554713e-06, + "loss": 1.9951, + "step": 4285 + }, + { + "epoch": 0.58, + "learning_rate": 1.4204269116455012e-06, + "loss": 2.0308, + "step": 4290 + }, + { + "epoch": 0.58, + "learning_rate": 1.4197514185355308e-06, + "loss": 2.1408, + "step": 4295 + }, + { + "epoch": 0.58, + "learning_rate": 1.4190759254255607e-06, + "loss": 1.9765, + "step": 4300 + }, + { + "epoch": 0.58, + "learning_rate": 1.4184004323155902e-06, + "loss": 1.9744, + "step": 4305 + }, + { + "epoch": 0.58, + "learning_rate": 1.4177249392056201e-06, + "loss": 2.0597, + "step": 4310 + }, + { + "epoch": 0.58, + "learning_rate": 1.4170494460956497e-06, + "loss": 2.0787, + "step": 4315 + }, + { + "epoch": 0.58, + "learning_rate": 1.4163739529856796e-06, + "loss": 1.9937, + "step": 4320 + }, + { + "epoch": 0.58, + "learning_rate": 1.4156984598757091e-06, + "loss": 2.0415, + "step": 4325 + }, + { + "epoch": 0.58, + "learning_rate": 1.415022966765739e-06, + "loss": 1.9378, + "step": 4330 + }, + { + "epoch": 0.59, + "learning_rate": 1.4143474736557686e-06, + "loss": 2.0868, + "step": 4335 + }, + { + "epoch": 0.59, + "learning_rate": 1.4136719805457983e-06, + "loss": 2.0547, + "step": 4340 + }, + { + "epoch": 0.59, + "learning_rate": 1.412996487435828e-06, + "loss": 2.0683, + "step": 4345 + }, + { + "epoch": 0.59, + "learning_rate": 1.4123209943258577e-06, + "loss": 2.1531, + "step": 4350 + }, + { + "epoch": 0.59, + "learning_rate": 1.4116455012158875e-06, + "loss": 2.0331, + "step": 4355 + }, + { + "epoch": 0.59, + "learning_rate": 1.4109700081059172e-06, + "loss": 2.0184, + "step": 4360 + }, + { + "epoch": 0.59, + "learning_rate": 1.410294514995947e-06, + "loss": 2.0473, + "step": 4365 + }, + { + "epoch": 0.59, + "learning_rate": 1.4096190218859767e-06, + "loss": 1.9568, + "step": 4370 + }, + { + "epoch": 0.59, + "learning_rate": 1.4089435287760066e-06, + "loss": 2.031, + "step": 4375 + }, + { + "epoch": 0.59, + "learning_rate": 1.408268035666036e-06, + "loss": 1.9944, + "step": 4380 + }, + { + "epoch": 0.59, + "learning_rate": 1.407592542556066e-06, + "loss": 2.1136, + "step": 4385 + }, + { + "epoch": 0.59, + "learning_rate": 1.4069170494460956e-06, + "loss": 2.0197, + "step": 4390 + }, + { + "epoch": 0.59, + "learning_rate": 1.4062415563361255e-06, + "loss": 2.1447, + "step": 4395 + }, + { + "epoch": 0.59, + "learning_rate": 1.405566063226155e-06, + "loss": 1.9879, + "step": 4400 + }, + { + "epoch": 0.59, + "eval_loss": 2.0300517082214355, + "eval_runtime": 165.4272, + "eval_samples_per_second": 3.603, + "eval_steps_per_second": 0.453, + "step": 4400 + }, + { + "epoch": 0.6, + "learning_rate": 1.4048905701161847e-06, + "loss": 2.1061, + "step": 4405 + }, + { + "epoch": 0.6, + "learning_rate": 1.4042150770062145e-06, + "loss": 2.0397, + "step": 4410 + }, + { + "epoch": 0.6, + "learning_rate": 1.4035395838962442e-06, + "loss": 1.943, + "step": 4415 + }, + { + "epoch": 0.6, + "learning_rate": 1.402864090786274e-06, + "loss": 2.0362, + "step": 4420 + }, + { + "epoch": 0.6, + "learning_rate": 1.4021885976763036e-06, + "loss": 2.0861, + "step": 4425 + }, + { + "epoch": 0.6, + "learning_rate": 1.4015131045663334e-06, + "loss": 2.0146, + "step": 4430 + }, + { + "epoch": 0.6, + "learning_rate": 1.400837611456363e-06, + "loss": 2.0927, + "step": 4435 + }, + { + "epoch": 0.6, + "learning_rate": 1.4001621183463928e-06, + "loss": 2.0839, + "step": 4440 + }, + { + "epoch": 0.6, + "learning_rate": 1.3994866252364225e-06, + "loss": 2.0464, + "step": 4445 + }, + { + "epoch": 0.6, + "learning_rate": 1.3988111321264523e-06, + "loss": 2.0034, + "step": 4450 + }, + { + "epoch": 0.6, + "learning_rate": 1.398135639016482e-06, + "loss": 2.0405, + "step": 4455 + }, + { + "epoch": 0.6, + "learning_rate": 1.3974601459065117e-06, + "loss": 1.9895, + "step": 4460 + }, + { + "epoch": 0.6, + "learning_rate": 1.3967846527965415e-06, + "loss": 2.058, + "step": 4465 + }, + { + "epoch": 0.6, + "learning_rate": 1.396109159686571e-06, + "loss": 2.0722, + "step": 4470 + }, + { + "epoch": 0.6, + "learning_rate": 1.395433666576601e-06, + "loss": 1.9765, + "step": 4475 + }, + { + "epoch": 0.61, + "learning_rate": 1.3947581734666304e-06, + "loss": 1.9971, + "step": 4480 + }, + { + "epoch": 0.61, + "learning_rate": 1.3940826803566604e-06, + "loss": 2.0599, + "step": 4485 + }, + { + "epoch": 0.61, + "learning_rate": 1.3934071872466899e-06, + "loss": 2.0489, + "step": 4490 + }, + { + "epoch": 0.61, + "learning_rate": 1.3927316941367198e-06, + "loss": 1.9994, + "step": 4495 + }, + { + "epoch": 0.61, + "learning_rate": 1.3920562010267493e-06, + "loss": 2.0765, + "step": 4500 + }, + { + "epoch": 0.61, + "learning_rate": 1.3913807079167793e-06, + "loss": 2.1492, + "step": 4505 + }, + { + "epoch": 0.61, + "learning_rate": 1.3907052148068088e-06, + "loss": 2.0749, + "step": 4510 + }, + { + "epoch": 0.61, + "learning_rate": 1.3900297216968387e-06, + "loss": 1.9798, + "step": 4515 + }, + { + "epoch": 0.61, + "learning_rate": 1.3893542285868684e-06, + "loss": 2.0888, + "step": 4520 + }, + { + "epoch": 0.61, + "learning_rate": 1.3886787354768982e-06, + "loss": 1.9906, + "step": 4525 + }, + { + "epoch": 0.61, + "learning_rate": 1.3880032423669279e-06, + "loss": 2.0952, + "step": 4530 + }, + { + "epoch": 0.61, + "learning_rate": 1.3873277492569574e-06, + "loss": 2.0239, + "step": 4535 + }, + { + "epoch": 0.61, + "learning_rate": 1.3866522561469873e-06, + "loss": 2.0986, + "step": 4540 + }, + { + "epoch": 0.61, + "learning_rate": 1.3859767630370169e-06, + "loss": 2.1158, + "step": 4545 + }, + { + "epoch": 0.61, + "learning_rate": 1.3853012699270468e-06, + "loss": 1.9589, + "step": 4550 + }, + { + "epoch": 0.62, + "learning_rate": 1.3846257768170763e-06, + "loss": 2.0019, + "step": 4555 + }, + { + "epoch": 0.62, + "learning_rate": 1.3839502837071062e-06, + "loss": 2.0844, + "step": 4560 + }, + { + "epoch": 0.62, + "learning_rate": 1.3832747905971358e-06, + "loss": 2.0934, + "step": 4565 + }, + { + "epoch": 0.62, + "learning_rate": 1.3825992974871657e-06, + "loss": 2.007, + "step": 4570 + }, + { + "epoch": 0.62, + "learning_rate": 1.3819238043771952e-06, + "loss": 1.9538, + "step": 4575 + }, + { + "epoch": 0.62, + "learning_rate": 1.3812483112672252e-06, + "loss": 2.0167, + "step": 4580 + }, + { + "epoch": 0.62, + "learning_rate": 1.3805728181572547e-06, + "loss": 2.0112, + "step": 4585 + }, + { + "epoch": 0.62, + "learning_rate": 1.3798973250472846e-06, + "loss": 2.0415, + "step": 4590 + }, + { + "epoch": 0.62, + "learning_rate": 1.3792218319373141e-06, + "loss": 2.0616, + "step": 4595 + }, + { + "epoch": 0.62, + "learning_rate": 1.3785463388273438e-06, + "loss": 2.0704, + "step": 4600 + }, + { + "epoch": 0.62, + "learning_rate": 1.3778708457173736e-06, + "loss": 2.1166, + "step": 4605 + }, + { + "epoch": 0.62, + "learning_rate": 1.3771953526074033e-06, + "loss": 2.0665, + "step": 4610 + }, + { + "epoch": 0.62, + "learning_rate": 1.376519859497433e-06, + "loss": 2.0999, + "step": 4615 + }, + { + "epoch": 0.62, + "learning_rate": 1.3758443663874628e-06, + "loss": 2.0465, + "step": 4620 + }, + { + "epoch": 0.62, + "learning_rate": 1.3751688732774925e-06, + "loss": 2.0234, + "step": 4625 + }, + { + "epoch": 0.63, + "learning_rate": 1.3744933801675222e-06, + "loss": 2.0602, + "step": 4630 + }, + { + "epoch": 0.63, + "learning_rate": 1.373817887057552e-06, + "loss": 2.0471, + "step": 4635 + }, + { + "epoch": 0.63, + "learning_rate": 1.3731423939475817e-06, + "loss": 2.0579, + "step": 4640 + }, + { + "epoch": 0.63, + "learning_rate": 1.3724669008376114e-06, + "loss": 2.0118, + "step": 4645 + }, + { + "epoch": 0.63, + "learning_rate": 1.3717914077276411e-06, + "loss": 1.9907, + "step": 4650 + }, + { + "epoch": 0.63, + "learning_rate": 1.3711159146176708e-06, + "loss": 1.9865, + "step": 4655 + }, + { + "epoch": 0.63, + "learning_rate": 1.3704404215077006e-06, + "loss": 2.0415, + "step": 4660 + }, + { + "epoch": 0.63, + "learning_rate": 1.3697649283977303e-06, + "loss": 2.0445, + "step": 4665 + }, + { + "epoch": 0.63, + "learning_rate": 1.36908943528776e-06, + "loss": 1.9549, + "step": 4670 + }, + { + "epoch": 0.63, + "learning_rate": 1.3684139421777897e-06, + "loss": 1.9776, + "step": 4675 + }, + { + "epoch": 0.63, + "learning_rate": 1.3677384490678195e-06, + "loss": 2.0224, + "step": 4680 + }, + { + "epoch": 0.63, + "learning_rate": 1.3670629559578492e-06, + "loss": 1.9824, + "step": 4685 + }, + { + "epoch": 0.63, + "learning_rate": 1.366387462847879e-06, + "loss": 1.9642, + "step": 4690 + }, + { + "epoch": 0.63, + "learning_rate": 1.3657119697379086e-06, + "loss": 2.0882, + "step": 4695 + }, + { + "epoch": 0.63, + "learning_rate": 1.3650364766279384e-06, + "loss": 2.098, + "step": 4700 + }, + { + "epoch": 0.64, + "learning_rate": 1.364360983517968e-06, + "loss": 2.0735, + "step": 4705 + }, + { + "epoch": 0.64, + "learning_rate": 1.3636854904079978e-06, + "loss": 2.011, + "step": 4710 + }, + { + "epoch": 0.64, + "learning_rate": 1.3630099972980276e-06, + "loss": 1.9934, + "step": 4715 + }, + { + "epoch": 0.64, + "learning_rate": 1.3623345041880573e-06, + "loss": 2.0129, + "step": 4720 + }, + { + "epoch": 0.64, + "learning_rate": 1.361659011078087e-06, + "loss": 2.0225, + "step": 4725 + }, + { + "epoch": 0.64, + "learning_rate": 1.3609835179681165e-06, + "loss": 2.1099, + "step": 4730 + }, + { + "epoch": 0.64, + "learning_rate": 1.3603080248581465e-06, + "loss": 1.9598, + "step": 4735 + }, + { + "epoch": 0.64, + "learning_rate": 1.359632531748176e-06, + "loss": 1.9245, + "step": 4740 + }, + { + "epoch": 0.64, + "learning_rate": 1.358957038638206e-06, + "loss": 2.0642, + "step": 4745 + }, + { + "epoch": 0.64, + "learning_rate": 1.3582815455282354e-06, + "loss": 2.1169, + "step": 4750 + }, + { + "epoch": 0.64, + "learning_rate": 1.3576060524182654e-06, + "loss": 2.0505, + "step": 4755 + }, + { + "epoch": 0.64, + "learning_rate": 1.3569305593082949e-06, + "loss": 2.0364, + "step": 4760 + }, + { + "epoch": 0.64, + "learning_rate": 1.3562550661983248e-06, + "loss": 2.1029, + "step": 4765 + }, + { + "epoch": 0.64, + "learning_rate": 1.3555795730883543e-06, + "loss": 2.136, + "step": 4770 + }, + { + "epoch": 0.65, + "learning_rate": 1.3549040799783843e-06, + "loss": 1.9615, + "step": 4775 + }, + { + "epoch": 0.65, + "learning_rate": 1.3542285868684138e-06, + "loss": 1.9855, + "step": 4780 + }, + { + "epoch": 0.65, + "learning_rate": 1.3535530937584437e-06, + "loss": 1.9529, + "step": 4785 + }, + { + "epoch": 0.65, + "learning_rate": 1.3528776006484732e-06, + "loss": 2.0281, + "step": 4790 + }, + { + "epoch": 0.65, + "learning_rate": 1.352202107538503e-06, + "loss": 1.9702, + "step": 4795 + }, + { + "epoch": 0.65, + "learning_rate": 1.3515266144285327e-06, + "loss": 2.0656, + "step": 4800 + }, + { + "epoch": 0.65, + "eval_loss": 2.021768808364868, + "eval_runtime": 165.3452, + "eval_samples_per_second": 3.605, + "eval_steps_per_second": 0.454, + "step": 4800 + }, + { + "epoch": 0.65, + "learning_rate": 1.3508511213185624e-06, + "loss": 1.9761, + "step": 4805 + }, + { + "epoch": 0.65, + "learning_rate": 1.3501756282085924e-06, + "loss": 2.043, + "step": 4810 + }, + { + "epoch": 0.65, + "learning_rate": 1.3495001350986219e-06, + "loss": 2.0883, + "step": 4815 + }, + { + "epoch": 0.65, + "learning_rate": 1.3488246419886518e-06, + "loss": 2.0304, + "step": 4820 + }, + { + "epoch": 0.65, + "learning_rate": 1.3481491488786813e-06, + "loss": 2.0323, + "step": 4825 + }, + { + "epoch": 0.65, + "learning_rate": 1.3474736557687113e-06, + "loss": 2.032, + "step": 4830 + }, + { + "epoch": 0.65, + "learning_rate": 1.3467981626587408e-06, + "loss": 2.0289, + "step": 4835 + }, + { + "epoch": 0.65, + "learning_rate": 1.3461226695487707e-06, + "loss": 1.9711, + "step": 4840 + }, + { + "epoch": 0.65, + "learning_rate": 1.3454471764388002e-06, + "loss": 2.0748, + "step": 4845 + }, + { + "epoch": 0.66, + "learning_rate": 1.3447716833288302e-06, + "loss": 2.0305, + "step": 4850 + }, + { + "epoch": 0.66, + "learning_rate": 1.3440961902188597e-06, + "loss": 2.1066, + "step": 4855 + }, + { + "epoch": 0.66, + "learning_rate": 1.3434206971088894e-06, + "loss": 2.009, + "step": 4860 + }, + { + "epoch": 0.66, + "learning_rate": 1.3427452039989191e-06, + "loss": 1.9678, + "step": 4865 + }, + { + "epoch": 0.66, + "learning_rate": 1.3420697108889489e-06, + "loss": 2.1183, + "step": 4870 + }, + { + "epoch": 0.66, + "learning_rate": 1.3413942177789786e-06, + "loss": 2.0144, + "step": 4875 + }, + { + "epoch": 0.66, + "learning_rate": 1.3407187246690083e-06, + "loss": 2.1348, + "step": 4880 + }, + { + "epoch": 0.66, + "learning_rate": 1.340043231559038e-06, + "loss": 1.955, + "step": 4885 + }, + { + "epoch": 0.66, + "learning_rate": 1.3393677384490678e-06, + "loss": 1.9506, + "step": 4890 + }, + { + "epoch": 0.66, + "learning_rate": 1.3386922453390975e-06, + "loss": 2.0105, + "step": 4895 + }, + { + "epoch": 0.66, + "learning_rate": 1.3380167522291272e-06, + "loss": 1.9965, + "step": 4900 + }, + { + "epoch": 0.66, + "learning_rate": 1.337341259119157e-06, + "loss": 1.9777, + "step": 4905 + }, + { + "epoch": 0.66, + "learning_rate": 1.3366657660091867e-06, + "loss": 2.0761, + "step": 4910 + }, + { + "epoch": 0.66, + "learning_rate": 1.3359902728992164e-06, + "loss": 2.0911, + "step": 4915 + }, + { + "epoch": 0.66, + "learning_rate": 1.3353147797892461e-06, + "loss": 1.9092, + "step": 4920 + }, + { + "epoch": 0.67, + "learning_rate": 1.3346392866792756e-06, + "loss": 2.006, + "step": 4925 + }, + { + "epoch": 0.67, + "learning_rate": 1.3339637935693056e-06, + "loss": 1.9245, + "step": 4930 + }, + { + "epoch": 0.67, + "learning_rate": 1.333288300459335e-06, + "loss": 2.0058, + "step": 4935 + }, + { + "epoch": 0.67, + "learning_rate": 1.332612807349365e-06, + "loss": 2.0073, + "step": 4940 + }, + { + "epoch": 0.67, + "learning_rate": 1.3319373142393945e-06, + "loss": 1.9605, + "step": 4945 + }, + { + "epoch": 0.67, + "learning_rate": 1.3312618211294245e-06, + "loss": 1.9436, + "step": 4950 + }, + { + "epoch": 0.67, + "learning_rate": 1.3305863280194542e-06, + "loss": 2.0926, + "step": 4955 + }, + { + "epoch": 0.67, + "learning_rate": 1.329910834909484e-06, + "loss": 2.0783, + "step": 4960 + }, + { + "epoch": 0.67, + "learning_rate": 1.3292353417995137e-06, + "loss": 1.9943, + "step": 4965 + }, + { + "epoch": 0.67, + "learning_rate": 1.3285598486895434e-06, + "loss": 2.0203, + "step": 4970 + }, + { + "epoch": 0.67, + "learning_rate": 1.3278843555795731e-06, + "loss": 2.0214, + "step": 4975 + }, + { + "epoch": 0.67, + "learning_rate": 1.3272088624696028e-06, + "loss": 2.1185, + "step": 4980 + }, + { + "epoch": 0.67, + "learning_rate": 1.3265333693596326e-06, + "loss": 2.0785, + "step": 4985 + }, + { + "epoch": 0.67, + "learning_rate": 1.325857876249662e-06, + "loss": 1.9431, + "step": 4990 + }, + { + "epoch": 0.67, + "learning_rate": 1.325182383139692e-06, + "loss": 2.0834, + "step": 4995 + }, + { + "epoch": 0.68, + "learning_rate": 1.3245068900297215e-06, + "loss": 1.8992, + "step": 5000 + }, + { + "epoch": 0.68, + "learning_rate": 1.3238313969197515e-06, + "loss": 2.0508, + "step": 5005 + }, + { + "epoch": 0.68, + "learning_rate": 1.323155903809781e-06, + "loss": 2.0358, + "step": 5010 + }, + { + "epoch": 0.68, + "learning_rate": 1.322480410699811e-06, + "loss": 2.0482, + "step": 5015 + }, + { + "epoch": 0.68, + "learning_rate": 1.3218049175898404e-06, + "loss": 1.9552, + "step": 5020 + }, + { + "epoch": 0.68, + "learning_rate": 1.3211294244798704e-06, + "loss": 1.9477, + "step": 5025 + }, + { + "epoch": 0.68, + "learning_rate": 1.3204539313698999e-06, + "loss": 2.0353, + "step": 5030 + }, + { + "epoch": 0.68, + "learning_rate": 1.3197784382599298e-06, + "loss": 1.9951, + "step": 5035 + }, + { + "epoch": 0.68, + "learning_rate": 1.3191029451499593e-06, + "loss": 2.0656, + "step": 5040 + }, + { + "epoch": 0.68, + "learning_rate": 1.3184274520399893e-06, + "loss": 2.0767, + "step": 5045 + }, + { + "epoch": 0.68, + "learning_rate": 1.3177519589300188e-06, + "loss": 2.0943, + "step": 5050 + }, + { + "epoch": 0.68, + "learning_rate": 1.3170764658200485e-06, + "loss": 2.0578, + "step": 5055 + }, + { + "epoch": 0.68, + "learning_rate": 1.3164009727100782e-06, + "loss": 2.0385, + "step": 5060 + }, + { + "epoch": 0.68, + "learning_rate": 1.315725479600108e-06, + "loss": 1.9904, + "step": 5065 + }, + { + "epoch": 0.68, + "learning_rate": 1.3150499864901377e-06, + "loss": 1.9545, + "step": 5070 + }, + { + "epoch": 0.69, + "learning_rate": 1.3143744933801674e-06, + "loss": 2.0289, + "step": 5075 + }, + { + "epoch": 0.69, + "learning_rate": 1.3136990002701971e-06, + "loss": 1.9972, + "step": 5080 + }, + { + "epoch": 0.69, + "learning_rate": 1.3130235071602269e-06, + "loss": 1.9992, + "step": 5085 + }, + { + "epoch": 0.69, + "learning_rate": 1.3123480140502566e-06, + "loss": 2.0014, + "step": 5090 + }, + { + "epoch": 0.69, + "learning_rate": 1.3116725209402863e-06, + "loss": 2.0328, + "step": 5095 + }, + { + "epoch": 0.69, + "learning_rate": 1.3109970278303163e-06, + "loss": 2.0877, + "step": 5100 + }, + { + "epoch": 0.69, + "learning_rate": 1.3103215347203458e-06, + "loss": 2.0689, + "step": 5105 + }, + { + "epoch": 0.69, + "learning_rate": 1.3096460416103757e-06, + "loss": 1.9704, + "step": 5110 + }, + { + "epoch": 0.69, + "learning_rate": 1.3089705485004052e-06, + "loss": 1.9833, + "step": 5115 + }, + { + "epoch": 0.69, + "learning_rate": 1.308295055390435e-06, + "loss": 2.0617, + "step": 5120 + }, + { + "epoch": 0.69, + "learning_rate": 1.3076195622804647e-06, + "loss": 1.995, + "step": 5125 + }, + { + "epoch": 0.69, + "learning_rate": 1.3069440691704944e-06, + "loss": 1.9955, + "step": 5130 + }, + { + "epoch": 0.69, + "learning_rate": 1.3062685760605241e-06, + "loss": 2.0641, + "step": 5135 + }, + { + "epoch": 0.69, + "learning_rate": 1.3055930829505539e-06, + "loss": 2.0867, + "step": 5140 + }, + { + "epoch": 0.7, + "learning_rate": 1.3049175898405836e-06, + "loss": 2.017, + "step": 5145 + }, + { + "epoch": 0.7, + "learning_rate": 1.3042420967306133e-06, + "loss": 2.114, + "step": 5150 + }, + { + "epoch": 0.7, + "learning_rate": 1.303566603620643e-06, + "loss": 2.0112, + "step": 5155 + }, + { + "epoch": 0.7, + "learning_rate": 1.3028911105106728e-06, + "loss": 2.037, + "step": 5160 + }, + { + "epoch": 0.7, + "learning_rate": 1.3022156174007025e-06, + "loss": 2.0592, + "step": 5165 + }, + { + "epoch": 0.7, + "learning_rate": 1.3015401242907322e-06, + "loss": 2.0331, + "step": 5170 + }, + { + "epoch": 0.7, + "learning_rate": 1.3008646311807617e-06, + "loss": 2.0045, + "step": 5175 + }, + { + "epoch": 0.7, + "learning_rate": 1.3001891380707917e-06, + "loss": 2.0046, + "step": 5180 + }, + { + "epoch": 0.7, + "learning_rate": 1.2995136449608212e-06, + "loss": 2.0658, + "step": 5185 + }, + { + "epoch": 0.7, + "learning_rate": 1.2988381518508511e-06, + "loss": 2.038, + "step": 5190 + }, + { + "epoch": 0.7, + "learning_rate": 1.2981626587408806e-06, + "loss": 1.9584, + "step": 5195 + }, + { + "epoch": 0.7, + "learning_rate": 1.2974871656309106e-06, + "loss": 2.1201, + "step": 5200 + }, + { + "epoch": 0.7, + "eval_loss": 2.0141761302948, + "eval_runtime": 165.1921, + "eval_samples_per_second": 3.608, + "eval_steps_per_second": 0.454, + "step": 5200 + }, + { + "epoch": 0.7, + "learning_rate": 1.29681167252094e-06, + "loss": 1.9123, + "step": 5205 + }, + { + "epoch": 0.7, + "learning_rate": 1.29613617941097e-06, + "loss": 1.9701, + "step": 5210 + }, + { + "epoch": 0.7, + "learning_rate": 1.2954606863009995e-06, + "loss": 2.0273, + "step": 5215 + }, + { + "epoch": 0.71, + "learning_rate": 1.2947851931910295e-06, + "loss": 2.0312, + "step": 5220 + }, + { + "epoch": 0.71, + "learning_rate": 1.294109700081059e-06, + "loss": 2.0207, + "step": 5225 + }, + { + "epoch": 0.71, + "learning_rate": 1.293434206971089e-06, + "loss": 2.0797, + "step": 5230 + }, + { + "epoch": 0.71, + "learning_rate": 1.2927587138611185e-06, + "loss": 1.9935, + "step": 5235 + }, + { + "epoch": 0.71, + "learning_rate": 1.2920832207511484e-06, + "loss": 2.0066, + "step": 5240 + }, + { + "epoch": 0.71, + "learning_rate": 1.291407727641178e-06, + "loss": 2.0568, + "step": 5245 + }, + { + "epoch": 0.71, + "learning_rate": 1.2907322345312076e-06, + "loss": 2.1398, + "step": 5250 + }, + { + "epoch": 0.71, + "learning_rate": 1.2900567414212376e-06, + "loss": 1.9884, + "step": 5255 + }, + { + "epoch": 0.71, + "learning_rate": 1.289381248311267e-06, + "loss": 2.0353, + "step": 5260 + }, + { + "epoch": 0.71, + "learning_rate": 1.288705755201297e-06, + "loss": 1.9529, + "step": 5265 + }, + { + "epoch": 0.71, + "learning_rate": 1.2880302620913265e-06, + "loss": 2.0423, + "step": 5270 + }, + { + "epoch": 0.71, + "learning_rate": 1.2873547689813565e-06, + "loss": 2.0891, + "step": 5275 + }, + { + "epoch": 0.71, + "learning_rate": 1.286679275871386e-06, + "loss": 2.0047, + "step": 5280 + }, + { + "epoch": 0.71, + "learning_rate": 1.286003782761416e-06, + "loss": 2.0324, + "step": 5285 + }, + { + "epoch": 0.71, + "learning_rate": 1.2853282896514454e-06, + "loss": 2.0379, + "step": 5290 + }, + { + "epoch": 0.72, + "learning_rate": 1.2846527965414754e-06, + "loss": 2.0345, + "step": 5295 + }, + { + "epoch": 0.72, + "learning_rate": 1.2839773034315049e-06, + "loss": 1.9509, + "step": 5300 + }, + { + "epoch": 0.72, + "learning_rate": 1.2833018103215348e-06, + "loss": 2.076, + "step": 5305 + }, + { + "epoch": 0.72, + "learning_rate": 1.2826263172115643e-06, + "loss": 2.0107, + "step": 5310 + }, + { + "epoch": 0.72, + "learning_rate": 1.281950824101594e-06, + "loss": 1.9542, + "step": 5315 + }, + { + "epoch": 0.72, + "learning_rate": 1.2812753309916238e-06, + "loss": 1.9767, + "step": 5320 + }, + { + "epoch": 0.72, + "learning_rate": 1.2805998378816535e-06, + "loss": 1.9998, + "step": 5325 + }, + { + "epoch": 0.72, + "learning_rate": 1.2799243447716832e-06, + "loss": 1.9633, + "step": 5330 + }, + { + "epoch": 0.72, + "learning_rate": 1.279248851661713e-06, + "loss": 2.006, + "step": 5335 + }, + { + "epoch": 0.72, + "learning_rate": 1.2785733585517427e-06, + "loss": 2.0871, + "step": 5340 + }, + { + "epoch": 0.72, + "learning_rate": 1.2778978654417724e-06, + "loss": 2.0124, + "step": 5345 + }, + { + "epoch": 0.72, + "learning_rate": 1.2772223723318022e-06, + "loss": 1.9771, + "step": 5350 + }, + { + "epoch": 0.72, + "learning_rate": 1.2765468792218319e-06, + "loss": 2.0389, + "step": 5355 + }, + { + "epoch": 0.72, + "learning_rate": 1.2758713861118616e-06, + "loss": 2.1118, + "step": 5360 + }, + { + "epoch": 0.72, + "learning_rate": 1.2751958930018913e-06, + "loss": 2.0358, + "step": 5365 + }, + { + "epoch": 0.73, + "learning_rate": 1.2745203998919208e-06, + "loss": 2.0398, + "step": 5370 + }, + { + "epoch": 0.73, + "learning_rate": 1.2738449067819508e-06, + "loss": 1.9582, + "step": 5375 + }, + { + "epoch": 0.73, + "learning_rate": 1.2731694136719803e-06, + "loss": 2.0692, + "step": 5380 + }, + { + "epoch": 0.73, + "learning_rate": 1.2724939205620102e-06, + "loss": 2.0527, + "step": 5385 + }, + { + "epoch": 0.73, + "learning_rate": 1.2718184274520398e-06, + "loss": 2.077, + "step": 5390 + }, + { + "epoch": 0.73, + "learning_rate": 1.2711429343420697e-06, + "loss": 2.0804, + "step": 5395 + }, + { + "epoch": 0.73, + "learning_rate": 1.2704674412320994e-06, + "loss": 1.9849, + "step": 5400 + }, + { + "epoch": 0.73, + "learning_rate": 1.2697919481221291e-06, + "loss": 2.0286, + "step": 5405 + }, + { + "epoch": 0.73, + "learning_rate": 1.2691164550121589e-06, + "loss": 2.0492, + "step": 5410 + }, + { + "epoch": 0.73, + "learning_rate": 1.2684409619021886e-06, + "loss": 1.9443, + "step": 5415 + }, + { + "epoch": 0.73, + "learning_rate": 1.2677654687922183e-06, + "loss": 1.9515, + "step": 5420 + }, + { + "epoch": 0.73, + "learning_rate": 1.267089975682248e-06, + "loss": 2.044, + "step": 5425 + }, + { + "epoch": 0.73, + "learning_rate": 1.2664144825722778e-06, + "loss": 2.0567, + "step": 5430 + }, + { + "epoch": 0.73, + "learning_rate": 1.2657389894623073e-06, + "loss": 1.9627, + "step": 5435 + }, + { + "epoch": 0.73, + "learning_rate": 1.2650634963523372e-06, + "loss": 2.0867, + "step": 5440 + }, + { + "epoch": 0.74, + "learning_rate": 1.2643880032423667e-06, + "loss": 1.9958, + "step": 5445 + }, + { + "epoch": 0.74, + "learning_rate": 1.2637125101323967e-06, + "loss": 1.9795, + "step": 5450 + }, + { + "epoch": 0.74, + "learning_rate": 1.2630370170224262e-06, + "loss": 2.1329, + "step": 5455 + }, + { + "epoch": 0.74, + "learning_rate": 1.2623615239124561e-06, + "loss": 1.9491, + "step": 5460 + }, + { + "epoch": 0.74, + "learning_rate": 1.2616860308024856e-06, + "loss": 1.9966, + "step": 5465 + }, + { + "epoch": 0.74, + "learning_rate": 1.2610105376925156e-06, + "loss": 2.0714, + "step": 5470 + }, + { + "epoch": 0.74, + "learning_rate": 1.260335044582545e-06, + "loss": 2.047, + "step": 5475 + }, + { + "epoch": 0.74, + "learning_rate": 1.259659551472575e-06, + "loss": 1.9376, + "step": 5480 + }, + { + "epoch": 0.74, + "learning_rate": 1.2589840583626046e-06, + "loss": 2.012, + "step": 5485 + }, + { + "epoch": 0.74, + "learning_rate": 1.2583085652526345e-06, + "loss": 2.0617, + "step": 5490 + }, + { + "epoch": 0.74, + "learning_rate": 1.257633072142664e-06, + "loss": 1.9983, + "step": 5495 + }, + { + "epoch": 0.74, + "learning_rate": 1.256957579032694e-06, + "loss": 2.0385, + "step": 5500 + }, + { + "epoch": 0.74, + "learning_rate": 1.2562820859227235e-06, + "loss": 1.9452, + "step": 5505 + }, + { + "epoch": 0.74, + "learning_rate": 1.2556065928127532e-06, + "loss": 2.0028, + "step": 5510 + }, + { + "epoch": 0.75, + "learning_rate": 1.254931099702783e-06, + "loss": 2.0326, + "step": 5515 + }, + { + "epoch": 0.75, + "learning_rate": 1.2542556065928126e-06, + "loss": 1.9992, + "step": 5520 + }, + { + "epoch": 0.75, + "learning_rate": 1.2535801134828424e-06, + "loss": 1.9506, + "step": 5525 + }, + { + "epoch": 0.75, + "learning_rate": 1.252904620372872e-06, + "loss": 2.039, + "step": 5530 + }, + { + "epoch": 0.75, + "learning_rate": 1.2522291272629018e-06, + "loss": 2.072, + "step": 5535 + }, + { + "epoch": 0.75, + "learning_rate": 1.2515536341529315e-06, + "loss": 2.0191, + "step": 5540 + }, + { + "epoch": 0.75, + "learning_rate": 1.2508781410429615e-06, + "loss": 2.0545, + "step": 5545 + }, + { + "epoch": 0.75, + "learning_rate": 1.250202647932991e-06, + "loss": 1.9955, + "step": 5550 + }, + { + "epoch": 0.75, + "learning_rate": 1.249527154823021e-06, + "loss": 1.9908, + "step": 5555 + }, + { + "epoch": 0.75, + "learning_rate": 1.2488516617130504e-06, + "loss": 1.9835, + "step": 5560 + }, + { + "epoch": 0.75, + "learning_rate": 1.2481761686030804e-06, + "loss": 2.054, + "step": 5565 + }, + { + "epoch": 0.75, + "learning_rate": 1.24750067549311e-06, + "loss": 2.0544, + "step": 5570 + }, + { + "epoch": 0.75, + "learning_rate": 1.2468251823831396e-06, + "loss": 2.0558, + "step": 5575 + }, + { + "epoch": 0.75, + "learning_rate": 1.2461496892731694e-06, + "loss": 1.9398, + "step": 5580 + }, + { + "epoch": 0.75, + "learning_rate": 1.245474196163199e-06, + "loss": 2.1002, + "step": 5585 + }, + { + "epoch": 0.76, + "learning_rate": 1.2447987030532288e-06, + "loss": 2.1542, + "step": 5590 + }, + { + "epoch": 0.76, + "learning_rate": 1.2441232099432585e-06, + "loss": 2.0554, + "step": 5595 + }, + { + "epoch": 0.76, + "learning_rate": 1.2434477168332883e-06, + "loss": 2.0143, + "step": 5600 + }, + { + "epoch": 0.76, + "eval_loss": 2.007206916809082, + "eval_runtime": 165.3065, + "eval_samples_per_second": 3.605, + "eval_steps_per_second": 0.454, + "step": 5600 + }, + { + "epoch": 0.76, + "learning_rate": 1.242772223723318e-06, + "loss": 2.093, + "step": 5605 + }, + { + "epoch": 0.76, + "learning_rate": 1.2420967306133477e-06, + "loss": 1.9739, + "step": 5610 + }, + { + "epoch": 0.76, + "learning_rate": 1.2414212375033774e-06, + "loss": 2.1094, + "step": 5615 + }, + { + "epoch": 0.76, + "learning_rate": 1.2407457443934072e-06, + "loss": 2.1694, + "step": 5620 + }, + { + "epoch": 0.76, + "learning_rate": 1.2400702512834369e-06, + "loss": 2.0056, + "step": 5625 + }, + { + "epoch": 0.76, + "learning_rate": 1.2393947581734664e-06, + "loss": 1.9875, + "step": 5630 + }, + { + "epoch": 0.76, + "learning_rate": 1.2387192650634963e-06, + "loss": 2.0466, + "step": 5635 + }, + { + "epoch": 0.76, + "learning_rate": 1.2380437719535259e-06, + "loss": 1.9781, + "step": 5640 + }, + { + "epoch": 0.76, + "learning_rate": 1.2373682788435558e-06, + "loss": 2.044, + "step": 5645 + }, + { + "epoch": 0.76, + "learning_rate": 1.2366927857335853e-06, + "loss": 1.9391, + "step": 5650 + }, + { + "epoch": 0.76, + "learning_rate": 1.2360172926236152e-06, + "loss": 1.9503, + "step": 5655 + }, + { + "epoch": 0.76, + "learning_rate": 1.2353417995136448e-06, + "loss": 2.1022, + "step": 5660 + }, + { + "epoch": 0.77, + "learning_rate": 1.2346663064036747e-06, + "loss": 2.1214, + "step": 5665 + }, + { + "epoch": 0.77, + "learning_rate": 1.2339908132937042e-06, + "loss": 2.0368, + "step": 5670 + }, + { + "epoch": 0.77, + "learning_rate": 1.2333153201837342e-06, + "loss": 2.0938, + "step": 5675 + }, + { + "epoch": 0.77, + "learning_rate": 1.2326398270737637e-06, + "loss": 1.9393, + "step": 5680 + }, + { + "epoch": 0.77, + "learning_rate": 1.2319643339637936e-06, + "loss": 1.9626, + "step": 5685 + }, + { + "epoch": 0.77, + "learning_rate": 1.2312888408538233e-06, + "loss": 2.0078, + "step": 5690 + }, + { + "epoch": 0.77, + "learning_rate": 1.2306133477438528e-06, + "loss": 1.9386, + "step": 5695 + }, + { + "epoch": 0.77, + "learning_rate": 1.2299378546338828e-06, + "loss": 2.0948, + "step": 5700 + }, + { + "epoch": 0.77, + "learning_rate": 1.2292623615239123e-06, + "loss": 2.013, + "step": 5705 + }, + { + "epoch": 0.77, + "learning_rate": 1.2285868684139422e-06, + "loss": 2.0048, + "step": 5710 + }, + { + "epoch": 0.77, + "learning_rate": 1.2279113753039717e-06, + "loss": 2.0154, + "step": 5715 + }, + { + "epoch": 0.77, + "learning_rate": 1.2272358821940017e-06, + "loss": 2.0012, + "step": 5720 + }, + { + "epoch": 0.77, + "learning_rate": 1.2265603890840312e-06, + "loss": 1.9525, + "step": 5725 + }, + { + "epoch": 0.77, + "learning_rate": 1.2258848959740611e-06, + "loss": 2.0631, + "step": 5730 + }, + { + "epoch": 0.77, + "learning_rate": 1.2252094028640907e-06, + "loss": 2.0146, + "step": 5735 + }, + { + "epoch": 0.78, + "learning_rate": 1.2245339097541206e-06, + "loss": 1.9761, + "step": 5740 + }, + { + "epoch": 0.78, + "learning_rate": 1.2238584166441501e-06, + "loss": 2.0205, + "step": 5745 + }, + { + "epoch": 0.78, + "learning_rate": 1.22318292353418e-06, + "loss": 2.0304, + "step": 5750 + }, + { + "epoch": 0.78, + "learning_rate": 1.2225074304242096e-06, + "loss": 2.0341, + "step": 5755 + }, + { + "epoch": 0.78, + "learning_rate": 1.2218319373142395e-06, + "loss": 2.0402, + "step": 5760 + }, + { + "epoch": 0.78, + "learning_rate": 1.221156444204269e-06, + "loss": 2.0297, + "step": 5765 + }, + { + "epoch": 0.78, + "learning_rate": 1.2204809510942987e-06, + "loss": 2.0116, + "step": 5770 + }, + { + "epoch": 0.78, + "learning_rate": 1.2198054579843285e-06, + "loss": 2.0157, + "step": 5775 + }, + { + "epoch": 0.78, + "learning_rate": 1.2191299648743582e-06, + "loss": 2.0291, + "step": 5780 + }, + { + "epoch": 0.78, + "learning_rate": 1.218454471764388e-06, + "loss": 2.0338, + "step": 5785 + }, + { + "epoch": 0.78, + "learning_rate": 1.2177789786544176e-06, + "loss": 2.0364, + "step": 5790 + }, + { + "epoch": 0.78, + "learning_rate": 1.2171034855444474e-06, + "loss": 1.9964, + "step": 5795 + }, + { + "epoch": 0.78, + "learning_rate": 1.216427992434477e-06, + "loss": 2.135, + "step": 5800 + }, + { + "epoch": 0.78, + "learning_rate": 1.2157524993245068e-06, + "loss": 1.8928, + "step": 5805 + }, + { + "epoch": 0.78, + "learning_rate": 1.2150770062145365e-06, + "loss": 1.9855, + "step": 5810 + }, + { + "epoch": 0.79, + "learning_rate": 1.2144015131045663e-06, + "loss": 1.9905, + "step": 5815 + }, + { + "epoch": 0.79, + "learning_rate": 1.213726019994596e-06, + "loss": 1.9884, + "step": 5820 + }, + { + "epoch": 0.79, + "learning_rate": 1.2130505268846255e-06, + "loss": 2.0023, + "step": 5825 + }, + { + "epoch": 0.79, + "learning_rate": 1.2123750337746555e-06, + "loss": 2.0608, + "step": 5830 + }, + { + "epoch": 0.79, + "learning_rate": 1.2116995406646852e-06, + "loss": 2.0036, + "step": 5835 + }, + { + "epoch": 0.79, + "learning_rate": 1.211024047554715e-06, + "loss": 2.0105, + "step": 5840 + }, + { + "epoch": 0.79, + "learning_rate": 1.2103485544447446e-06, + "loss": 1.9843, + "step": 5845 + }, + { + "epoch": 0.79, + "learning_rate": 1.2096730613347744e-06, + "loss": 1.9958, + "step": 5850 + }, + { + "epoch": 0.79, + "learning_rate": 1.208997568224804e-06, + "loss": 2.0423, + "step": 5855 + }, + { + "epoch": 0.79, + "learning_rate": 1.2083220751148338e-06, + "loss": 1.9651, + "step": 5860 + }, + { + "epoch": 0.79, + "learning_rate": 1.2076465820048635e-06, + "loss": 2.0644, + "step": 5865 + }, + { + "epoch": 0.79, + "learning_rate": 1.2069710888948933e-06, + "loss": 1.9557, + "step": 5870 + }, + { + "epoch": 0.79, + "learning_rate": 1.206295595784923e-06, + "loss": 2.0243, + "step": 5875 + }, + { + "epoch": 0.79, + "learning_rate": 1.2056201026749527e-06, + "loss": 1.9895, + "step": 5880 + }, + { + "epoch": 0.8, + "learning_rate": 1.2049446095649824e-06, + "loss": 1.8585, + "step": 5885 + }, + { + "epoch": 0.8, + "learning_rate": 1.204269116455012e-06, + "loss": 2.0616, + "step": 5890 + }, + { + "epoch": 0.8, + "learning_rate": 1.203593623345042e-06, + "loss": 2.0316, + "step": 5895 + }, + { + "epoch": 0.8, + "learning_rate": 1.2029181302350714e-06, + "loss": 2.0547, + "step": 5900 + }, + { + "epoch": 0.8, + "learning_rate": 1.2022426371251013e-06, + "loss": 2.0177, + "step": 5905 + }, + { + "epoch": 0.8, + "learning_rate": 1.2015671440151309e-06, + "loss": 1.9315, + "step": 5910 + }, + { + "epoch": 0.8, + "learning_rate": 1.2008916509051608e-06, + "loss": 2.0497, + "step": 5915 + }, + { + "epoch": 0.8, + "learning_rate": 1.2002161577951903e-06, + "loss": 2.0856, + "step": 5920 + }, + { + "epoch": 0.8, + "learning_rate": 1.1995406646852203e-06, + "loss": 1.9031, + "step": 5925 + }, + { + "epoch": 0.8, + "learning_rate": 1.1988651715752498e-06, + "loss": 2.0394, + "step": 5930 + }, + { + "epoch": 0.8, + "learning_rate": 1.1981896784652797e-06, + "loss": 1.9831, + "step": 5935 + }, + { + "epoch": 0.8, + "learning_rate": 1.1975141853553092e-06, + "loss": 1.9886, + "step": 5940 + }, + { + "epoch": 0.8, + "learning_rate": 1.1968386922453392e-06, + "loss": 2.0037, + "step": 5945 + }, + { + "epoch": 0.8, + "learning_rate": 1.1961631991353687e-06, + "loss": 2.018, + "step": 5950 + }, + { + "epoch": 0.8, + "learning_rate": 1.1954877060253984e-06, + "loss": 2.1561, + "step": 5955 + }, + { + "epoch": 0.81, + "learning_rate": 1.1948122129154281e-06, + "loss": 2.0146, + "step": 5960 + }, + { + "epoch": 0.81, + "learning_rate": 1.1941367198054579e-06, + "loss": 2.0204, + "step": 5965 + }, + { + "epoch": 0.81, + "learning_rate": 1.1934612266954876e-06, + "loss": 1.9847, + "step": 5970 + }, + { + "epoch": 0.81, + "learning_rate": 1.1927857335855173e-06, + "loss": 1.9988, + "step": 5975 + }, + { + "epoch": 0.81, + "learning_rate": 1.1921102404755472e-06, + "loss": 2.0426, + "step": 5980 + }, + { + "epoch": 0.81, + "learning_rate": 1.1914347473655768e-06, + "loss": 1.9307, + "step": 5985 + }, + { + "epoch": 0.81, + "learning_rate": 1.1907592542556067e-06, + "loss": 1.9322, + "step": 5990 + }, + { + "epoch": 0.81, + "learning_rate": 1.1900837611456362e-06, + "loss": 2.0565, + "step": 5995 + }, + { + "epoch": 0.81, + "learning_rate": 1.1894082680356661e-06, + "loss": 1.9926, + "step": 6000 + }, + { + "epoch": 0.81, + "eval_loss": 2.0011448860168457, + "eval_runtime": 165.1917, + "eval_samples_per_second": 3.608, + "eval_steps_per_second": 0.454, + "step": 6000 + }, + { + "epoch": 0.81, + "learning_rate": 1.1887327749256957e-06, + "loss": 2.0437, + "step": 6005 + }, + { + "epoch": 0.81, + "learning_rate": 1.1880572818157256e-06, + "loss": 1.925, + "step": 6010 + }, + { + "epoch": 0.81, + "learning_rate": 1.1873817887057551e-06, + "loss": 2.0602, + "step": 6015 + }, + { + "epoch": 0.81, + "learning_rate": 1.1867062955957848e-06, + "loss": 2.025, + "step": 6020 + }, + { + "epoch": 0.81, + "learning_rate": 1.1860308024858146e-06, + "loss": 1.9937, + "step": 6025 + }, + { + "epoch": 0.81, + "learning_rate": 1.1853553093758443e-06, + "loss": 2.0074, + "step": 6030 + }, + { + "epoch": 0.82, + "learning_rate": 1.184679816265874e-06, + "loss": 2.0228, + "step": 6035 + }, + { + "epoch": 0.82, + "learning_rate": 1.1840043231559037e-06, + "loss": 2.0472, + "step": 6040 + }, + { + "epoch": 0.82, + "learning_rate": 1.1833288300459335e-06, + "loss": 2.0226, + "step": 6045 + }, + { + "epoch": 0.82, + "learning_rate": 1.1826533369359632e-06, + "loss": 1.9945, + "step": 6050 + }, + { + "epoch": 0.82, + "learning_rate": 1.181977843825993e-06, + "loss": 1.9856, + "step": 6055 + }, + { + "epoch": 0.82, + "learning_rate": 1.1813023507160227e-06, + "loss": 2.0037, + "step": 6060 + }, + { + "epoch": 0.82, + "learning_rate": 1.1806268576060524e-06, + "loss": 1.9873, + "step": 6065 + }, + { + "epoch": 0.82, + "learning_rate": 1.179951364496082e-06, + "loss": 2.0066, + "step": 6070 + }, + { + "epoch": 0.82, + "learning_rate": 1.1792758713861118e-06, + "loss": 2.0181, + "step": 6075 + }, + { + "epoch": 0.82, + "learning_rate": 1.1786003782761416e-06, + "loss": 1.9531, + "step": 6080 + }, + { + "epoch": 0.82, + "learning_rate": 1.177924885166171e-06, + "loss": 2.0259, + "step": 6085 + }, + { + "epoch": 0.82, + "learning_rate": 1.177249392056201e-06, + "loss": 1.9526, + "step": 6090 + }, + { + "epoch": 0.82, + "learning_rate": 1.1765738989462305e-06, + "loss": 1.9071, + "step": 6095 + }, + { + "epoch": 0.82, + "learning_rate": 1.1758984058362605e-06, + "loss": 2.0754, + "step": 6100 + }, + { + "epoch": 0.82, + "learning_rate": 1.17522291272629e-06, + "loss": 2.0043, + "step": 6105 + }, + { + "epoch": 0.83, + "learning_rate": 1.17454741961632e-06, + "loss": 1.986, + "step": 6110 + }, + { + "epoch": 0.83, + "learning_rate": 1.1738719265063494e-06, + "loss": 2.0672, + "step": 6115 + }, + { + "epoch": 0.83, + "learning_rate": 1.1731964333963794e-06, + "loss": 2.0446, + "step": 6120 + }, + { + "epoch": 0.83, + "learning_rate": 1.172520940286409e-06, + "loss": 2.0913, + "step": 6125 + }, + { + "epoch": 0.83, + "learning_rate": 1.1718454471764388e-06, + "loss": 2.0568, + "step": 6130 + }, + { + "epoch": 0.83, + "learning_rate": 1.1711699540664685e-06, + "loss": 1.9685, + "step": 6135 + }, + { + "epoch": 0.83, + "learning_rate": 1.1704944609564983e-06, + "loss": 2.033, + "step": 6140 + }, + { + "epoch": 0.83, + "learning_rate": 1.169818967846528e-06, + "loss": 1.9966, + "step": 6145 + }, + { + "epoch": 0.83, + "learning_rate": 1.1691434747365575e-06, + "loss": 1.9999, + "step": 6150 + }, + { + "epoch": 0.83, + "learning_rate": 1.1684679816265874e-06, + "loss": 2.0394, + "step": 6155 + }, + { + "epoch": 0.83, + "learning_rate": 1.167792488516617e-06, + "loss": 1.9596, + "step": 6160 + }, + { + "epoch": 0.83, + "learning_rate": 1.167116995406647e-06, + "loss": 2.0409, + "step": 6165 + }, + { + "epoch": 0.83, + "learning_rate": 1.1664415022966764e-06, + "loss": 1.9961, + "step": 6170 + }, + { + "epoch": 0.83, + "learning_rate": 1.1657660091867064e-06, + "loss": 2.025, + "step": 6175 + }, + { + "epoch": 0.83, + "learning_rate": 1.1650905160767359e-06, + "loss": 2.0427, + "step": 6180 + }, + { + "epoch": 0.84, + "learning_rate": 1.1644150229667658e-06, + "loss": 1.9939, + "step": 6185 + }, + { + "epoch": 0.84, + "learning_rate": 1.1637395298567953e-06, + "loss": 2.061, + "step": 6190 + }, + { + "epoch": 0.84, + "learning_rate": 1.1630640367468253e-06, + "loss": 2.0645, + "step": 6195 + }, + { + "epoch": 0.84, + "learning_rate": 1.1623885436368548e-06, + "loss": 2.0212, + "step": 6200 + }, + { + "epoch": 0.84, + "learning_rate": 1.1617130505268847e-06, + "loss": 2.0376, + "step": 6205 + }, + { + "epoch": 0.84, + "learning_rate": 1.1610375574169142e-06, + "loss": 1.9781, + "step": 6210 + }, + { + "epoch": 0.84, + "learning_rate": 1.160362064306944e-06, + "loss": 2.0583, + "step": 6215 + }, + { + "epoch": 0.84, + "learning_rate": 1.1596865711969737e-06, + "loss": 2.0362, + "step": 6220 + }, + { + "epoch": 0.84, + "learning_rate": 1.1590110780870034e-06, + "loss": 2.0105, + "step": 6225 + }, + { + "epoch": 0.84, + "learning_rate": 1.1583355849770331e-06, + "loss": 2.0859, + "step": 6230 + }, + { + "epoch": 0.84, + "learning_rate": 1.1576600918670629e-06, + "loss": 1.9903, + "step": 6235 + }, + { + "epoch": 0.84, + "learning_rate": 1.1569845987570926e-06, + "loss": 1.9948, + "step": 6240 + }, + { + "epoch": 0.84, + "learning_rate": 1.1563091056471223e-06, + "loss": 1.9782, + "step": 6245 + }, + { + "epoch": 0.84, + "learning_rate": 1.155633612537152e-06, + "loss": 1.9493, + "step": 6250 + }, + { + "epoch": 0.85, + "learning_rate": 1.1549581194271818e-06, + "loss": 2.0101, + "step": 6255 + }, + { + "epoch": 0.85, + "learning_rate": 1.1542826263172115e-06, + "loss": 2.0458, + "step": 6260 + }, + { + "epoch": 0.85, + "learning_rate": 1.1536071332072412e-06, + "loss": 1.8974, + "step": 6265 + }, + { + "epoch": 0.85, + "learning_rate": 1.1529316400972712e-06, + "loss": 1.9465, + "step": 6270 + }, + { + "epoch": 0.85, + "learning_rate": 1.1522561469873007e-06, + "loss": 1.9905, + "step": 6275 + }, + { + "epoch": 0.85, + "learning_rate": 1.1515806538773304e-06, + "loss": 2.0912, + "step": 6280 + }, + { + "epoch": 0.85, + "learning_rate": 1.1509051607673601e-06, + "loss": 1.9464, + "step": 6285 + }, + { + "epoch": 0.85, + "learning_rate": 1.1502296676573898e-06, + "loss": 1.9497, + "step": 6290 + }, + { + "epoch": 0.85, + "learning_rate": 1.1495541745474196e-06, + "loss": 2.0405, + "step": 6295 + }, + { + "epoch": 0.85, + "learning_rate": 1.1488786814374493e-06, + "loss": 1.9306, + "step": 6300 + }, + { + "epoch": 0.85, + "learning_rate": 1.148203188327479e-06, + "loss": 2.1404, + "step": 6305 + }, + { + "epoch": 0.85, + "learning_rate": 1.1475276952175088e-06, + "loss": 1.906, + "step": 6310 + }, + { + "epoch": 0.85, + "learning_rate": 1.1468522021075385e-06, + "loss": 2.0527, + "step": 6315 + }, + { + "epoch": 0.85, + "learning_rate": 1.1461767089975682e-06, + "loss": 2.057, + "step": 6320 + }, + { + "epoch": 0.85, + "learning_rate": 1.145501215887598e-06, + "loss": 1.948, + "step": 6325 + }, + { + "epoch": 0.86, + "learning_rate": 1.1448257227776277e-06, + "loss": 2.0469, + "step": 6330 + }, + { + "epoch": 0.86, + "learning_rate": 1.1441502296676574e-06, + "loss": 2.0718, + "step": 6335 + }, + { + "epoch": 0.86, + "learning_rate": 1.1434747365576871e-06, + "loss": 2.0162, + "step": 6340 + }, + { + "epoch": 0.86, + "learning_rate": 1.1427992434477166e-06, + "loss": 2.0367, + "step": 6345 + }, + { + "epoch": 0.86, + "learning_rate": 1.1421237503377466e-06, + "loss": 1.9805, + "step": 6350 + }, + { + "epoch": 0.86, + "learning_rate": 1.141448257227776e-06, + "loss": 1.975, + "step": 6355 + }, + { + "epoch": 0.86, + "learning_rate": 1.140772764117806e-06, + "loss": 2.1027, + "step": 6360 + }, + { + "epoch": 0.86, + "learning_rate": 1.1400972710078355e-06, + "loss": 2.0534, + "step": 6365 + }, + { + "epoch": 0.86, + "learning_rate": 1.1394217778978655e-06, + "loss": 2.0852, + "step": 6370 + }, + { + "epoch": 0.86, + "learning_rate": 1.138746284787895e-06, + "loss": 2.0087, + "step": 6375 + }, + { + "epoch": 0.86, + "learning_rate": 1.138070791677925e-06, + "loss": 1.9716, + "step": 6380 + }, + { + "epoch": 0.86, + "learning_rate": 1.1373952985679544e-06, + "loss": 1.9873, + "step": 6385 + }, + { + "epoch": 0.86, + "learning_rate": 1.1367198054579844e-06, + "loss": 2.0416, + "step": 6390 + }, + { + "epoch": 0.86, + "learning_rate": 1.1360443123480139e-06, + "loss": 2.0387, + "step": 6395 + }, + { + "epoch": 0.86, + "learning_rate": 1.1353688192380438e-06, + "loss": 1.9927, + "step": 6400 + }, + { + "epoch": 0.86, + "eval_loss": 1.9952620267868042, + "eval_runtime": 165.2045, + "eval_samples_per_second": 3.608, + "eval_steps_per_second": 0.454, + "step": 6400 + }, + { + "epoch": 0.87, + "learning_rate": 1.1346933261280733e-06, + "loss": 2.0118, + "step": 6405 + }, + { + "epoch": 0.87, + "learning_rate": 1.134017833018103e-06, + "loss": 1.9994, + "step": 6410 + }, + { + "epoch": 0.87, + "learning_rate": 1.133342339908133e-06, + "loss": 2.0647, + "step": 6415 + }, + { + "epoch": 0.87, + "learning_rate": 1.1326668467981625e-06, + "loss": 2.0085, + "step": 6420 + }, + { + "epoch": 0.87, + "learning_rate": 1.1319913536881925e-06, + "loss": 2.0304, + "step": 6425 + }, + { + "epoch": 0.87, + "learning_rate": 1.131315860578222e-06, + "loss": 2.0116, + "step": 6430 + }, + { + "epoch": 0.87, + "learning_rate": 1.130640367468252e-06, + "loss": 1.9938, + "step": 6435 + }, + { + "epoch": 0.87, + "learning_rate": 1.1299648743582814e-06, + "loss": 1.9892, + "step": 6440 + }, + { + "epoch": 0.87, + "learning_rate": 1.1292893812483114e-06, + "loss": 1.9668, + "step": 6445 + }, + { + "epoch": 0.87, + "learning_rate": 1.1286138881383409e-06, + "loss": 1.9952, + "step": 6450 + }, + { + "epoch": 0.87, + "learning_rate": 1.1279383950283708e-06, + "loss": 2.02, + "step": 6455 + }, + { + "epoch": 0.87, + "learning_rate": 1.1272629019184003e-06, + "loss": 1.9425, + "step": 6460 + }, + { + "epoch": 0.87, + "learning_rate": 1.1265874088084303e-06, + "loss": 2.1208, + "step": 6465 + }, + { + "epoch": 0.87, + "learning_rate": 1.1259119156984598e-06, + "loss": 1.9683, + "step": 6470 + }, + { + "epoch": 0.87, + "learning_rate": 1.1252364225884895e-06, + "loss": 1.9708, + "step": 6475 + }, + { + "epoch": 0.88, + "learning_rate": 1.1245609294785192e-06, + "loss": 2.0613, + "step": 6480 + }, + { + "epoch": 0.88, + "learning_rate": 1.123885436368549e-06, + "loss": 1.9685, + "step": 6485 + }, + { + "epoch": 0.88, + "learning_rate": 1.1232099432585787e-06, + "loss": 2.028, + "step": 6490 + }, + { + "epoch": 0.88, + "learning_rate": 1.1225344501486084e-06, + "loss": 2.0237, + "step": 6495 + }, + { + "epoch": 0.88, + "learning_rate": 1.1218589570386381e-06, + "loss": 2.007, + "step": 6500 + }, + { + "epoch": 0.88, + "learning_rate": 1.1211834639286679e-06, + "loss": 2.1089, + "step": 6505 + }, + { + "epoch": 0.88, + "learning_rate": 1.1205079708186976e-06, + "loss": 1.9852, + "step": 6510 + }, + { + "epoch": 0.88, + "learning_rate": 1.1198324777087273e-06, + "loss": 2.005, + "step": 6515 + }, + { + "epoch": 0.88, + "learning_rate": 1.119156984598757e-06, + "loss": 1.8925, + "step": 6520 + }, + { + "epoch": 0.88, + "learning_rate": 1.1184814914887868e-06, + "loss": 1.9801, + "step": 6525 + }, + { + "epoch": 0.88, + "learning_rate": 1.1178059983788165e-06, + "loss": 1.9388, + "step": 6530 + }, + { + "epoch": 0.88, + "learning_rate": 1.1171305052688462e-06, + "loss": 2.0295, + "step": 6535 + }, + { + "epoch": 0.88, + "learning_rate": 1.1164550121588757e-06, + "loss": 1.9851, + "step": 6540 + }, + { + "epoch": 0.88, + "learning_rate": 1.1157795190489057e-06, + "loss": 1.9958, + "step": 6545 + }, + { + "epoch": 0.88, + "learning_rate": 1.1151040259389352e-06, + "loss": 2.0137, + "step": 6550 + }, + { + "epoch": 0.89, + "learning_rate": 1.1144285328289651e-06, + "loss": 1.9999, + "step": 6555 + }, + { + "epoch": 0.89, + "learning_rate": 1.1137530397189949e-06, + "loss": 2.0079, + "step": 6560 + }, + { + "epoch": 0.89, + "learning_rate": 1.1130775466090246e-06, + "loss": 2.0772, + "step": 6565 + }, + { + "epoch": 0.89, + "learning_rate": 1.1124020534990543e-06, + "loss": 1.9449, + "step": 6570 + }, + { + "epoch": 0.89, + "learning_rate": 1.111726560389084e-06, + "loss": 2.0685, + "step": 6575 + }, + { + "epoch": 0.89, + "learning_rate": 1.1110510672791138e-06, + "loss": 2.0436, + "step": 6580 + }, + { + "epoch": 0.89, + "learning_rate": 1.1103755741691435e-06, + "loss": 1.9999, + "step": 6585 + }, + { + "epoch": 0.89, + "learning_rate": 1.1097000810591732e-06, + "loss": 1.9621, + "step": 6590 + }, + { + "epoch": 0.89, + "learning_rate": 1.109024587949203e-06, + "loss": 2.018, + "step": 6595 + }, + { + "epoch": 0.89, + "learning_rate": 1.1083490948392327e-06, + "loss": 1.9571, + "step": 6600 + }, + { + "epoch": 0.89, + "learning_rate": 1.1076736017292622e-06, + "loss": 1.9302, + "step": 6605 + }, + { + "epoch": 0.89, + "learning_rate": 1.1069981086192921e-06, + "loss": 1.9668, + "step": 6610 + }, + { + "epoch": 0.89, + "learning_rate": 1.1063226155093216e-06, + "loss": 2.0856, + "step": 6615 + }, + { + "epoch": 0.89, + "learning_rate": 1.1056471223993516e-06, + "loss": 1.9749, + "step": 6620 + }, + { + "epoch": 0.89, + "learning_rate": 1.104971629289381e-06, + "loss": 1.9511, + "step": 6625 + }, + { + "epoch": 0.9, + "learning_rate": 1.104296136179411e-06, + "loss": 2.0511, + "step": 6630 + }, + { + "epoch": 0.9, + "learning_rate": 1.1036206430694405e-06, + "loss": 2.0733, + "step": 6635 + }, + { + "epoch": 0.9, + "learning_rate": 1.1029451499594705e-06, + "loss": 1.9882, + "step": 6640 + }, + { + "epoch": 0.9, + "learning_rate": 1.1022696568495e-06, + "loss": 2.0366, + "step": 6645 + }, + { + "epoch": 0.9, + "learning_rate": 1.10159416373953e-06, + "loss": 1.9377, + "step": 6650 + }, + { + "epoch": 0.9, + "learning_rate": 1.1009186706295594e-06, + "loss": 1.9914, + "step": 6655 + }, + { + "epoch": 0.9, + "learning_rate": 1.1002431775195894e-06, + "loss": 2.035, + "step": 6660 + }, + { + "epoch": 0.9, + "learning_rate": 1.099567684409619e-06, + "loss": 2.0213, + "step": 6665 + }, + { + "epoch": 0.9, + "learning_rate": 1.0988921912996486e-06, + "loss": 2.0916, + "step": 6670 + }, + { + "epoch": 0.9, + "learning_rate": 1.0982166981896783e-06, + "loss": 1.9638, + "step": 6675 + }, + { + "epoch": 0.9, + "learning_rate": 1.097541205079708e-06, + "loss": 2.0284, + "step": 6680 + }, + { + "epoch": 0.9, + "learning_rate": 1.0968657119697378e-06, + "loss": 1.9684, + "step": 6685 + }, + { + "epoch": 0.9, + "learning_rate": 1.0961902188597675e-06, + "loss": 2.0286, + "step": 6690 + }, + { + "epoch": 0.9, + "learning_rate": 1.0955147257497973e-06, + "loss": 1.94, + "step": 6695 + }, + { + "epoch": 0.91, + "learning_rate": 1.094839232639827e-06, + "loss": 2.0547, + "step": 6700 + }, + { + "epoch": 0.91, + "learning_rate": 1.094163739529857e-06, + "loss": 1.9955, + "step": 6705 + }, + { + "epoch": 0.91, + "learning_rate": 1.0934882464198864e-06, + "loss": 2.0647, + "step": 6710 + }, + { + "epoch": 0.91, + "learning_rate": 1.0928127533099164e-06, + "loss": 1.955, + "step": 6715 + }, + { + "epoch": 0.91, + "learning_rate": 1.0921372601999459e-06, + "loss": 2.0865, + "step": 6720 + }, + { + "epoch": 0.91, + "learning_rate": 1.0914617670899758e-06, + "loss": 1.9343, + "step": 6725 + }, + { + "epoch": 0.91, + "learning_rate": 1.0907862739800053e-06, + "loss": 1.9694, + "step": 6730 + }, + { + "epoch": 0.91, + "learning_rate": 1.090110780870035e-06, + "loss": 2.071, + "step": 6735 + }, + { + "epoch": 0.91, + "learning_rate": 1.0894352877600648e-06, + "loss": 2.0049, + "step": 6740 + }, + { + "epoch": 0.91, + "learning_rate": 1.0887597946500945e-06, + "loss": 1.9945, + "step": 6745 + }, + { + "epoch": 0.91, + "learning_rate": 1.0880843015401242e-06, + "loss": 2.0204, + "step": 6750 + }, + { + "epoch": 0.91, + "learning_rate": 1.087408808430154e-06, + "loss": 1.9284, + "step": 6755 + }, + { + "epoch": 0.91, + "learning_rate": 1.0867333153201837e-06, + "loss": 1.9173, + "step": 6760 + }, + { + "epoch": 0.91, + "learning_rate": 1.0860578222102134e-06, + "loss": 1.9321, + "step": 6765 + }, + { + "epoch": 0.91, + "learning_rate": 1.0853823291002431e-06, + "loss": 2.0363, + "step": 6770 + }, + { + "epoch": 0.92, + "learning_rate": 1.0847068359902729e-06, + "loss": 2.053, + "step": 6775 + }, + { + "epoch": 0.92, + "learning_rate": 1.0840313428803026e-06, + "loss": 1.9384, + "step": 6780 + }, + { + "epoch": 0.92, + "learning_rate": 1.0833558497703323e-06, + "loss": 1.9329, + "step": 6785 + }, + { + "epoch": 0.92, + "learning_rate": 1.082680356660362e-06, + "loss": 2.0216, + "step": 6790 + }, + { + "epoch": 0.92, + "learning_rate": 1.0820048635503918e-06, + "loss": 2.0901, + "step": 6795 + }, + { + "epoch": 0.92, + "learning_rate": 1.0813293704404213e-06, + "loss": 2.0983, + "step": 6800 + }, + { + "epoch": 0.92, + "eval_loss": 1.989758014678955, + "eval_runtime": 165.4469, + "eval_samples_per_second": 3.602, + "eval_steps_per_second": 0.453, + "step": 6800 + }, + { + "epoch": 0.92, + "learning_rate": 1.0806538773304512e-06, + "loss": 2.0018, + "step": 6805 + }, + { + "epoch": 0.92, + "learning_rate": 1.0799783842204807e-06, + "loss": 1.9857, + "step": 6810 + }, + { + "epoch": 0.92, + "learning_rate": 1.0793028911105107e-06, + "loss": 2.0529, + "step": 6815 + }, + { + "epoch": 0.92, + "learning_rate": 1.0786273980005402e-06, + "loss": 1.9866, + "step": 6820 + }, + { + "epoch": 0.92, + "learning_rate": 1.0779519048905701e-06, + "loss": 2.0546, + "step": 6825 + }, + { + "epoch": 0.92, + "learning_rate": 1.0772764117805996e-06, + "loss": 1.9727, + "step": 6830 + }, + { + "epoch": 0.92, + "learning_rate": 1.0766009186706296e-06, + "loss": 2.0949, + "step": 6835 + }, + { + "epoch": 0.92, + "learning_rate": 1.075925425560659e-06, + "loss": 2.049, + "step": 6840 + }, + { + "epoch": 0.92, + "learning_rate": 1.075249932450689e-06, + "loss": 2.0513, + "step": 6845 + }, + { + "epoch": 0.93, + "learning_rate": 1.0745744393407188e-06, + "loss": 1.9751, + "step": 6850 + }, + { + "epoch": 0.93, + "learning_rate": 1.0738989462307485e-06, + "loss": 2.0021, + "step": 6855 + }, + { + "epoch": 0.93, + "learning_rate": 1.0732234531207782e-06, + "loss": 2.0116, + "step": 6860 + }, + { + "epoch": 0.93, + "learning_rate": 1.0725479600108077e-06, + "loss": 1.9602, + "step": 6865 + }, + { + "epoch": 0.93, + "learning_rate": 1.0718724669008377e-06, + "loss": 1.9296, + "step": 6870 + }, + { + "epoch": 0.93, + "learning_rate": 1.0711969737908672e-06, + "loss": 1.9961, + "step": 6875 + }, + { + "epoch": 0.93, + "learning_rate": 1.0705214806808971e-06, + "loss": 1.9395, + "step": 6880 + }, + { + "epoch": 0.93, + "learning_rate": 1.0698459875709266e-06, + "loss": 1.9828, + "step": 6885 + }, + { + "epoch": 0.93, + "learning_rate": 1.0691704944609566e-06, + "loss": 1.9577, + "step": 6890 + }, + { + "epoch": 0.93, + "learning_rate": 1.068495001350986e-06, + "loss": 1.9432, + "step": 6895 + }, + { + "epoch": 0.93, + "learning_rate": 1.067819508241016e-06, + "loss": 1.953, + "step": 6900 + }, + { + "epoch": 0.93, + "learning_rate": 1.0671440151310455e-06, + "loss": 2.0855, + "step": 6905 + }, + { + "epoch": 0.93, + "learning_rate": 1.0664685220210755e-06, + "loss": 2.021, + "step": 6910 + }, + { + "epoch": 0.93, + "learning_rate": 1.065793028911105e-06, + "loss": 2.1253, + "step": 6915 + }, + { + "epoch": 0.93, + "learning_rate": 1.065117535801135e-06, + "loss": 1.9398, + "step": 6920 + }, + { + "epoch": 0.94, + "learning_rate": 1.0644420426911644e-06, + "loss": 1.8746, + "step": 6925 + }, + { + "epoch": 0.94, + "learning_rate": 1.0637665495811942e-06, + "loss": 1.9618, + "step": 6930 + }, + { + "epoch": 0.94, + "learning_rate": 1.063091056471224e-06, + "loss": 1.9594, + "step": 6935 + }, + { + "epoch": 0.94, + "learning_rate": 1.0624155633612536e-06, + "loss": 1.9582, + "step": 6940 + }, + { + "epoch": 0.94, + "learning_rate": 1.0617400702512834e-06, + "loss": 2.0177, + "step": 6945 + }, + { + "epoch": 0.94, + "learning_rate": 1.061064577141313e-06, + "loss": 2.0466, + "step": 6950 + }, + { + "epoch": 0.94, + "learning_rate": 1.0603890840313428e-06, + "loss": 1.9999, + "step": 6955 + }, + { + "epoch": 0.94, + "learning_rate": 1.0597135909213725e-06, + "loss": 1.9628, + "step": 6960 + }, + { + "epoch": 0.94, + "learning_rate": 1.0590380978114023e-06, + "loss": 2.0182, + "step": 6965 + }, + { + "epoch": 0.94, + "learning_rate": 1.058362604701432e-06, + "loss": 2.0239, + "step": 6970 + }, + { + "epoch": 0.94, + "learning_rate": 1.0576871115914617e-06, + "loss": 1.915, + "step": 6975 + }, + { + "epoch": 0.94, + "learning_rate": 1.0570116184814914e-06, + "loss": 2.0042, + "step": 6980 + }, + { + "epoch": 0.94, + "learning_rate": 1.056336125371521e-06, + "loss": 1.9469, + "step": 6985 + }, + { + "epoch": 0.94, + "learning_rate": 1.0556606322615509e-06, + "loss": 2.1206, + "step": 6990 + }, + { + "epoch": 0.94, + "learning_rate": 1.0549851391515806e-06, + "loss": 2.0603, + "step": 6995 + }, + { + "epoch": 0.95, + "learning_rate": 1.0543096460416103e-06, + "loss": 1.9043, + "step": 7000 + }, + { + "epoch": 0.95, + "learning_rate": 1.05363415293164e-06, + "loss": 1.9525, + "step": 7005 + }, + { + "epoch": 0.95, + "learning_rate": 1.0529586598216698e-06, + "loss": 2.0047, + "step": 7010 + }, + { + "epoch": 0.95, + "learning_rate": 1.0522831667116995e-06, + "loss": 2.0078, + "step": 7015 + }, + { + "epoch": 0.95, + "learning_rate": 1.0516076736017292e-06, + "loss": 2.0401, + "step": 7020 + }, + { + "epoch": 0.95, + "learning_rate": 1.050932180491759e-06, + "loss": 2.0235, + "step": 7025 + }, + { + "epoch": 0.95, + "learning_rate": 1.0502566873817887e-06, + "loss": 1.8742, + "step": 7030 + }, + { + "epoch": 0.95, + "learning_rate": 1.0495811942718184e-06, + "loss": 1.9213, + "step": 7035 + }, + { + "epoch": 0.95, + "learning_rate": 1.0489057011618482e-06, + "loss": 2.0103, + "step": 7040 + }, + { + "epoch": 0.95, + "learning_rate": 1.0482302080518779e-06, + "loss": 1.9676, + "step": 7045 + }, + { + "epoch": 0.95, + "learning_rate": 1.0475547149419076e-06, + "loss": 2.0613, + "step": 7050 + }, + { + "epoch": 0.95, + "learning_rate": 1.0468792218319373e-06, + "loss": 1.9115, + "step": 7055 + }, + { + "epoch": 0.95, + "learning_rate": 1.0462037287219668e-06, + "loss": 2.1336, + "step": 7060 + }, + { + "epoch": 0.95, + "learning_rate": 1.0455282356119968e-06, + "loss": 2.0362, + "step": 7065 + }, + { + "epoch": 0.96, + "learning_rate": 1.0448527425020263e-06, + "loss": 1.9231, + "step": 7070 + }, + { + "epoch": 0.96, + "learning_rate": 1.0441772493920562e-06, + "loss": 2.0279, + "step": 7075 + }, + { + "epoch": 0.96, + "learning_rate": 1.0435017562820858e-06, + "loss": 1.9745, + "step": 7080 + }, + { + "epoch": 0.96, + "learning_rate": 1.0428262631721157e-06, + "loss": 1.934, + "step": 7085 + }, + { + "epoch": 0.96, + "learning_rate": 1.0421507700621452e-06, + "loss": 1.9617, + "step": 7090 + }, + { + "epoch": 0.96, + "learning_rate": 1.0414752769521751e-06, + "loss": 1.9954, + "step": 7095 + }, + { + "epoch": 0.96, + "learning_rate": 1.0407997838422047e-06, + "loss": 1.9847, + "step": 7100 + }, + { + "epoch": 0.96, + "learning_rate": 1.0401242907322346e-06, + "loss": 2.0425, + "step": 7105 + }, + { + "epoch": 0.96, + "learning_rate": 1.0394487976222641e-06, + "loss": 1.9804, + "step": 7110 + }, + { + "epoch": 0.96, + "learning_rate": 1.038773304512294e-06, + "loss": 1.9689, + "step": 7115 + }, + { + "epoch": 0.96, + "learning_rate": 1.0380978114023236e-06, + "loss": 2.0032, + "step": 7120 + }, + { + "epoch": 0.96, + "learning_rate": 1.0374223182923533e-06, + "loss": 2.0059, + "step": 7125 + }, + { + "epoch": 0.96, + "learning_rate": 1.036746825182383e-06, + "loss": 2.0135, + "step": 7130 + }, + { + "epoch": 0.96, + "learning_rate": 1.0360713320724127e-06, + "loss": 1.94, + "step": 7135 + }, + { + "epoch": 0.96, + "learning_rate": 1.0353958389624427e-06, + "loss": 1.9191, + "step": 7140 + }, + { + "epoch": 0.97, + "learning_rate": 1.0347203458524722e-06, + "loss": 1.9087, + "step": 7145 + }, + { + "epoch": 0.97, + "learning_rate": 1.0340448527425021e-06, + "loss": 1.9729, + "step": 7150 + }, + { + "epoch": 0.97, + "learning_rate": 1.0333693596325316e-06, + "loss": 2.003, + "step": 7155 + }, + { + "epoch": 0.97, + "learning_rate": 1.0326938665225616e-06, + "loss": 2.0825, + "step": 7160 + }, + { + "epoch": 0.97, + "learning_rate": 1.032018373412591e-06, + "loss": 1.9802, + "step": 7165 + }, + { + "epoch": 0.97, + "learning_rate": 1.031342880302621e-06, + "loss": 1.9544, + "step": 7170 + }, + { + "epoch": 0.97, + "learning_rate": 1.0306673871926506e-06, + "loss": 1.9041, + "step": 7175 + }, + { + "epoch": 0.97, + "learning_rate": 1.0299918940826805e-06, + "loss": 1.8864, + "step": 7180 + }, + { + "epoch": 0.97, + "learning_rate": 1.02931640097271e-06, + "loss": 2.0309, + "step": 7185 + }, + { + "epoch": 0.97, + "learning_rate": 1.0286409078627397e-06, + "loss": 1.9711, + "step": 7190 + }, + { + "epoch": 0.97, + "learning_rate": 1.0279654147527695e-06, + "loss": 1.9598, + "step": 7195 + }, + { + "epoch": 0.97, + "learning_rate": 1.0272899216427992e-06, + "loss": 1.9517, + "step": 7200 + }, + { + "epoch": 0.97, + "eval_loss": 1.985114336013794, + "eval_runtime": 165.362, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.454, + "step": 7200 + }, + { + "epoch": 0.97, + "learning_rate": 1.026614428532829e-06, + "loss": 1.8818, + "step": 7205 + }, + { + "epoch": 0.97, + "learning_rate": 1.0259389354228586e-06, + "loss": 1.9379, + "step": 7210 + }, + { + "epoch": 0.97, + "learning_rate": 1.0252634423128884e-06, + "loss": 1.9673, + "step": 7215 + }, + { + "epoch": 0.98, + "learning_rate": 1.024587949202918e-06, + "loss": 2.0222, + "step": 7220 + }, + { + "epoch": 0.98, + "learning_rate": 1.0239124560929478e-06, + "loss": 1.9747, + "step": 7225 + }, + { + "epoch": 0.98, + "learning_rate": 1.0232369629829775e-06, + "loss": 2.088, + "step": 7230 + }, + { + "epoch": 0.98, + "learning_rate": 1.0225614698730073e-06, + "loss": 2.0251, + "step": 7235 + }, + { + "epoch": 0.98, + "learning_rate": 1.021885976763037e-06, + "loss": 2.1025, + "step": 7240 + }, + { + "epoch": 0.98, + "learning_rate": 1.0212104836530665e-06, + "loss": 1.9793, + "step": 7245 + }, + { + "epoch": 0.98, + "learning_rate": 1.0205349905430964e-06, + "loss": 2.0546, + "step": 7250 + }, + { + "epoch": 0.98, + "learning_rate": 1.019859497433126e-06, + "loss": 2.0635, + "step": 7255 + }, + { + "epoch": 0.98, + "learning_rate": 1.019184004323156e-06, + "loss": 1.9224, + "step": 7260 + }, + { + "epoch": 0.98, + "learning_rate": 1.0185085112131854e-06, + "loss": 2.0158, + "step": 7265 + }, + { + "epoch": 0.98, + "learning_rate": 1.0178330181032154e-06, + "loss": 2.0352, + "step": 7270 + }, + { + "epoch": 0.98, + "learning_rate": 1.0171575249932449e-06, + "loss": 2.0249, + "step": 7275 + }, + { + "epoch": 0.98, + "learning_rate": 1.0164820318832748e-06, + "loss": 1.9468, + "step": 7280 + }, + { + "epoch": 0.98, + "learning_rate": 1.0158065387733043e-06, + "loss": 1.9318, + "step": 7285 + }, + { + "epoch": 0.98, + "learning_rate": 1.0151310456633343e-06, + "loss": 2.0633, + "step": 7290 + }, + { + "epoch": 0.99, + "learning_rate": 1.014455552553364e-06, + "loss": 2.0459, + "step": 7295 + }, + { + "epoch": 0.99, + "learning_rate": 1.0137800594433937e-06, + "loss": 2.0734, + "step": 7300 + }, + { + "epoch": 0.99, + "learning_rate": 1.0131045663334234e-06, + "loss": 2.06, + "step": 7305 + }, + { + "epoch": 0.99, + "learning_rate": 1.0124290732234532e-06, + "loss": 1.947, + "step": 7310 + }, + { + "epoch": 0.99, + "learning_rate": 1.0117535801134829e-06, + "loss": 2.0348, + "step": 7315 + }, + { + "epoch": 0.99, + "learning_rate": 1.0110780870035124e-06, + "loss": 2.0692, + "step": 7320 + }, + { + "epoch": 0.99, + "learning_rate": 1.0104025938935423e-06, + "loss": 2.02, + "step": 7325 + }, + { + "epoch": 0.99, + "learning_rate": 1.0097271007835719e-06, + "loss": 1.9659, + "step": 7330 + }, + { + "epoch": 0.99, + "learning_rate": 1.0090516076736018e-06, + "loss": 1.9602, + "step": 7335 + }, + { + "epoch": 0.99, + "learning_rate": 1.0083761145636313e-06, + "loss": 1.9835, + "step": 7340 + }, + { + "epoch": 0.99, + "learning_rate": 1.0077006214536612e-06, + "loss": 1.9815, + "step": 7345 + }, + { + "epoch": 0.99, + "learning_rate": 1.0070251283436908e-06, + "loss": 1.9859, + "step": 7350 + }, + { + "epoch": 0.99, + "learning_rate": 1.0063496352337207e-06, + "loss": 2.1161, + "step": 7355 + }, + { + "epoch": 0.99, + "learning_rate": 1.0056741421237502e-06, + "loss": 2.0387, + "step": 7360 + }, + { + "epoch": 0.99, + "learning_rate": 1.0049986490137801e-06, + "loss": 2.0205, + "step": 7365 + }, + { + "epoch": 1.0, + "learning_rate": 1.0043231559038097e-06, + "loss": 2.0478, + "step": 7370 + }, + { + "epoch": 1.0, + "learning_rate": 1.0036476627938396e-06, + "loss": 2.023, + "step": 7375 + }, + { + "epoch": 1.0, + "learning_rate": 1.0029721696838691e-06, + "loss": 2.0349, + "step": 7380 + }, + { + "epoch": 1.0, + "learning_rate": 1.0022966765738988e-06, + "loss": 2.0054, + "step": 7385 + }, + { + "epoch": 1.0, + "learning_rate": 1.0016211834639286e-06, + "loss": 2.1175, + "step": 7390 + }, + { + "epoch": 1.0, + "learning_rate": 1.0009456903539583e-06, + "loss": 2.0055, + "step": 7395 + }, + { + "epoch": 1.0, + "learning_rate": 1.000270197243988e-06, + "loss": 2.0236, + "step": 7400 + }, + { + "epoch": 1.0, + "learning_rate": 9.995947041340177e-07, + "loss": 2.073, + "step": 7405 + }, + { + "epoch": 1.0, + "learning_rate": 9.989192110240475e-07, + "loss": 1.9406, + "step": 7410 + }, + { + "epoch": 1.0, + "learning_rate": 9.982437179140772e-07, + "loss": 2.0151, + "step": 7415 + }, + { + "epoch": 1.0, + "learning_rate": 9.97568224804107e-07, + "loss": 1.9841, + "step": 7420 + }, + { + "epoch": 1.0, + "learning_rate": 9.968927316941367e-07, + "loss": 1.8764, + "step": 7425 + }, + { + "epoch": 1.0, + "learning_rate": 9.962172385841664e-07, + "loss": 1.982, + "step": 7430 + }, + { + "epoch": 1.0, + "learning_rate": 9.95541745474196e-07, + "loss": 1.9922, + "step": 7435 + }, + { + "epoch": 1.01, + "learning_rate": 9.948662523642258e-07, + "loss": 2.0233, + "step": 7440 + }, + { + "epoch": 1.01, + "learning_rate": 9.941907592542556e-07, + "loss": 1.9742, + "step": 7445 + }, + { + "epoch": 1.01, + "learning_rate": 9.935152661442853e-07, + "loss": 2.048, + "step": 7450 + }, + { + "epoch": 1.01, + "learning_rate": 9.92839773034315e-07, + "loss": 1.9523, + "step": 7455 + }, + { + "epoch": 1.01, + "learning_rate": 9.921642799243447e-07, + "loss": 2.0822, + "step": 7460 + }, + { + "epoch": 1.01, + "learning_rate": 9.914887868143745e-07, + "loss": 2.0152, + "step": 7465 + }, + { + "epoch": 1.01, + "learning_rate": 9.908132937044042e-07, + "loss": 2.0208, + "step": 7470 + }, + { + "epoch": 1.01, + "learning_rate": 9.90137800594434e-07, + "loss": 1.9042, + "step": 7475 + }, + { + "epoch": 1.01, + "learning_rate": 9.894623074844636e-07, + "loss": 2.0153, + "step": 7480 + }, + { + "epoch": 1.01, + "learning_rate": 9.887868143744934e-07, + "loss": 1.9946, + "step": 7485 + }, + { + "epoch": 1.01, + "learning_rate": 9.88111321264523e-07, + "loss": 1.9934, + "step": 7490 + }, + { + "epoch": 1.01, + "learning_rate": 9.874358281545528e-07, + "loss": 2.0187, + "step": 7495 + }, + { + "epoch": 1.01, + "learning_rate": 9.867603350445825e-07, + "loss": 1.9518, + "step": 7500 + }, + { + "epoch": 1.01, + "learning_rate": 9.860848419346123e-07, + "loss": 2.0422, + "step": 7505 + }, + { + "epoch": 1.01, + "learning_rate": 9.85409348824642e-07, + "loss": 2.0009, + "step": 7510 + }, + { + "epoch": 1.02, + "learning_rate": 9.847338557146717e-07, + "loss": 2.0596, + "step": 7515 + }, + { + "epoch": 1.02, + "learning_rate": 9.840583626047015e-07, + "loss": 2.0612, + "step": 7520 + }, + { + "epoch": 1.02, + "learning_rate": 9.833828694947312e-07, + "loss": 1.9486, + "step": 7525 + }, + { + "epoch": 1.02, + "learning_rate": 9.82707376384761e-07, + "loss": 2.0392, + "step": 7530 + }, + { + "epoch": 1.02, + "learning_rate": 9.820318832747906e-07, + "loss": 1.9368, + "step": 7535 + }, + { + "epoch": 1.02, + "learning_rate": 9.813563901648204e-07, + "loss": 2.0314, + "step": 7540 + }, + { + "epoch": 1.02, + "learning_rate": 9.8068089705485e-07, + "loss": 1.9035, + "step": 7545 + }, + { + "epoch": 1.02, + "learning_rate": 9.800054039448798e-07, + "loss": 2.0037, + "step": 7550 + }, + { + "epoch": 1.02, + "learning_rate": 9.793299108349095e-07, + "loss": 2.0193, + "step": 7555 + }, + { + "epoch": 1.02, + "learning_rate": 9.786544177249393e-07, + "loss": 2.0409, + "step": 7560 + }, + { + "epoch": 1.02, + "learning_rate": 9.77978924614969e-07, + "loss": 2.056, + "step": 7565 + }, + { + "epoch": 1.02, + "learning_rate": 9.773034315049987e-07, + "loss": 1.9435, + "step": 7570 + }, + { + "epoch": 1.02, + "learning_rate": 9.766279383950282e-07, + "loss": 1.9287, + "step": 7575 + }, + { + "epoch": 1.02, + "learning_rate": 9.75952445285058e-07, + "loss": 1.9647, + "step": 7580 + }, + { + "epoch": 1.02, + "learning_rate": 9.752769521750877e-07, + "loss": 1.8393, + "step": 7585 + }, + { + "epoch": 1.03, + "learning_rate": 9.746014590651174e-07, + "loss": 1.9353, + "step": 7590 + }, + { + "epoch": 1.03, + "learning_rate": 9.739259659551471e-07, + "loss": 2.0181, + "step": 7595 + }, + { + "epoch": 1.03, + "learning_rate": 9.732504728451769e-07, + "loss": 1.9621, + "step": 7600 + }, + { + "epoch": 1.03, + "eval_loss": 1.9808038473129272, + "eval_runtime": 165.1869, + "eval_samples_per_second": 3.608, + "eval_steps_per_second": 0.454, + "step": 7600 + }, + { + "epoch": 1.03, + "learning_rate": 9.725749797352066e-07, + "loss": 2.0002, + "step": 7605 + }, + { + "epoch": 1.03, + "learning_rate": 9.718994866252363e-07, + "loss": 1.9619, + "step": 7610 + }, + { + "epoch": 1.03, + "learning_rate": 9.71223993515266e-07, + "loss": 2.032, + "step": 7615 + }, + { + "epoch": 1.03, + "learning_rate": 9.705485004052958e-07, + "loss": 1.8952, + "step": 7620 + }, + { + "epoch": 1.03, + "learning_rate": 9.698730072953255e-07, + "loss": 2.0198, + "step": 7625 + }, + { + "epoch": 1.03, + "learning_rate": 9.691975141853552e-07, + "loss": 1.9973, + "step": 7630 + }, + { + "epoch": 1.03, + "learning_rate": 9.68522021075385e-07, + "loss": 2.0368, + "step": 7635 + }, + { + "epoch": 1.03, + "learning_rate": 9.678465279654147e-07, + "loss": 1.913, + "step": 7640 + }, + { + "epoch": 1.03, + "learning_rate": 9.671710348554444e-07, + "loss": 1.9466, + "step": 7645 + }, + { + "epoch": 1.03, + "learning_rate": 9.664955417454741e-07, + "loss": 1.9923, + "step": 7650 + }, + { + "epoch": 1.03, + "learning_rate": 9.658200486355039e-07, + "loss": 2.055, + "step": 7655 + }, + { + "epoch": 1.03, + "learning_rate": 9.651445555255336e-07, + "loss": 1.9588, + "step": 7660 + }, + { + "epoch": 1.04, + "learning_rate": 9.644690624155633e-07, + "loss": 1.9861, + "step": 7665 + }, + { + "epoch": 1.04, + "learning_rate": 9.63793569305593e-07, + "loss": 2.0062, + "step": 7670 + }, + { + "epoch": 1.04, + "learning_rate": 9.631180761956228e-07, + "loss": 1.992, + "step": 7675 + }, + { + "epoch": 1.04, + "learning_rate": 9.624425830856525e-07, + "loss": 1.9221, + "step": 7680 + }, + { + "epoch": 1.04, + "learning_rate": 9.617670899756822e-07, + "loss": 1.9701, + "step": 7685 + }, + { + "epoch": 1.04, + "learning_rate": 9.61091596865712e-07, + "loss": 2.0102, + "step": 7690 + }, + { + "epoch": 1.04, + "learning_rate": 9.604161037557417e-07, + "loss": 1.9687, + "step": 7695 + }, + { + "epoch": 1.04, + "learning_rate": 9.597406106457714e-07, + "loss": 1.98, + "step": 7700 + }, + { + "epoch": 1.04, + "learning_rate": 9.590651175358011e-07, + "loss": 1.9699, + "step": 7705 + }, + { + "epoch": 1.04, + "learning_rate": 9.583896244258308e-07, + "loss": 1.9465, + "step": 7710 + }, + { + "epoch": 1.04, + "learning_rate": 9.577141313158606e-07, + "loss": 2.04, + "step": 7715 + }, + { + "epoch": 1.04, + "learning_rate": 9.570386382058903e-07, + "loss": 2.0439, + "step": 7720 + }, + { + "epoch": 1.04, + "learning_rate": 9.5636314509592e-07, + "loss": 1.9968, + "step": 7725 + }, + { + "epoch": 1.04, + "learning_rate": 9.556876519859497e-07, + "loss": 2.0141, + "step": 7730 + }, + { + "epoch": 1.04, + "learning_rate": 9.550121588759795e-07, + "loss": 1.9623, + "step": 7735 + }, + { + "epoch": 1.05, + "learning_rate": 9.543366657660092e-07, + "loss": 2.0191, + "step": 7740 + }, + { + "epoch": 1.05, + "learning_rate": 9.536611726560388e-07, + "loss": 1.9723, + "step": 7745 + }, + { + "epoch": 1.05, + "learning_rate": 9.529856795460685e-07, + "loss": 2.0218, + "step": 7750 + }, + { + "epoch": 1.05, + "learning_rate": 9.523101864360983e-07, + "loss": 1.8727, + "step": 7755 + }, + { + "epoch": 1.05, + "learning_rate": 9.51634693326128e-07, + "loss": 1.8031, + "step": 7760 + }, + { + "epoch": 1.05, + "learning_rate": 9.509592002161577e-07, + "loss": 2.0312, + "step": 7765 + }, + { + "epoch": 1.05, + "learning_rate": 9.502837071061874e-07, + "loss": 2.0651, + "step": 7770 + }, + { + "epoch": 1.05, + "learning_rate": 9.496082139962172e-07, + "loss": 2.0122, + "step": 7775 + }, + { + "epoch": 1.05, + "learning_rate": 9.489327208862469e-07, + "loss": 1.8839, + "step": 7780 + }, + { + "epoch": 1.05, + "learning_rate": 9.482572277762766e-07, + "loss": 2.0005, + "step": 7785 + }, + { + "epoch": 1.05, + "learning_rate": 9.475817346663064e-07, + "loss": 2.0028, + "step": 7790 + }, + { + "epoch": 1.05, + "learning_rate": 9.469062415563361e-07, + "loss": 1.9506, + "step": 7795 + }, + { + "epoch": 1.05, + "learning_rate": 9.462307484463658e-07, + "loss": 2.0455, + "step": 7800 + }, + { + "epoch": 1.05, + "learning_rate": 9.455552553363955e-07, + "loss": 1.9396, + "step": 7805 + }, + { + "epoch": 1.06, + "learning_rate": 9.448797622264253e-07, + "loss": 2.0283, + "step": 7810 + }, + { + "epoch": 1.06, + "learning_rate": 9.44204269116455e-07, + "loss": 2.0555, + "step": 7815 + }, + { + "epoch": 1.06, + "learning_rate": 9.435287760064847e-07, + "loss": 2.0782, + "step": 7820 + }, + { + "epoch": 1.06, + "learning_rate": 9.428532828965144e-07, + "loss": 2.0381, + "step": 7825 + }, + { + "epoch": 1.06, + "learning_rate": 9.421777897865442e-07, + "loss": 2.0217, + "step": 7830 + }, + { + "epoch": 1.06, + "learning_rate": 9.415022966765739e-07, + "loss": 1.9138, + "step": 7835 + }, + { + "epoch": 1.06, + "learning_rate": 9.408268035666036e-07, + "loss": 2.0361, + "step": 7840 + }, + { + "epoch": 1.06, + "learning_rate": 9.401513104566333e-07, + "loss": 1.9504, + "step": 7845 + }, + { + "epoch": 1.06, + "learning_rate": 9.394758173466631e-07, + "loss": 1.9862, + "step": 7850 + }, + { + "epoch": 1.06, + "learning_rate": 9.388003242366928e-07, + "loss": 1.923, + "step": 7855 + }, + { + "epoch": 1.06, + "learning_rate": 9.381248311267225e-07, + "loss": 1.9324, + "step": 7860 + }, + { + "epoch": 1.06, + "learning_rate": 9.374493380167521e-07, + "loss": 2.098, + "step": 7865 + }, + { + "epoch": 1.06, + "learning_rate": 9.367738449067819e-07, + "loss": 1.949, + "step": 7870 + }, + { + "epoch": 1.06, + "learning_rate": 9.360983517968116e-07, + "loss": 2.0113, + "step": 7875 + }, + { + "epoch": 1.06, + "learning_rate": 9.354228586868413e-07, + "loss": 1.9433, + "step": 7880 + }, + { + "epoch": 1.07, + "learning_rate": 9.34747365576871e-07, + "loss": 2.0162, + "step": 7885 + }, + { + "epoch": 1.07, + "learning_rate": 9.340718724669008e-07, + "loss": 1.9516, + "step": 7890 + }, + { + "epoch": 1.07, + "learning_rate": 9.333963793569305e-07, + "loss": 1.9885, + "step": 7895 + }, + { + "epoch": 1.07, + "learning_rate": 9.327208862469602e-07, + "loss": 1.9776, + "step": 7900 + }, + { + "epoch": 1.07, + "learning_rate": 9.3204539313699e-07, + "loss": 2.0417, + "step": 7905 + }, + { + "epoch": 1.07, + "learning_rate": 9.313699000270197e-07, + "loss": 2.0449, + "step": 7910 + }, + { + "epoch": 1.07, + "learning_rate": 9.306944069170494e-07, + "loss": 2.0715, + "step": 7915 + }, + { + "epoch": 1.07, + "learning_rate": 9.300189138070791e-07, + "loss": 1.9786, + "step": 7920 + }, + { + "epoch": 1.07, + "learning_rate": 9.293434206971088e-07, + "loss": 1.9668, + "step": 7925 + }, + { + "epoch": 1.07, + "learning_rate": 9.286679275871385e-07, + "loss": 2.0532, + "step": 7930 + }, + { + "epoch": 1.07, + "learning_rate": 9.279924344771682e-07, + "loss": 2.0171, + "step": 7935 + }, + { + "epoch": 1.07, + "learning_rate": 9.273169413671979e-07, + "loss": 2.0196, + "step": 7940 + }, + { + "epoch": 1.07, + "learning_rate": 9.266414482572278e-07, + "loss": 1.9405, + "step": 7945 + }, + { + "epoch": 1.07, + "learning_rate": 9.259659551472575e-07, + "loss": 1.9522, + "step": 7950 + }, + { + "epoch": 1.07, + "learning_rate": 9.252904620372872e-07, + "loss": 1.9491, + "step": 7955 + }, + { + "epoch": 1.08, + "learning_rate": 9.246149689273169e-07, + "loss": 1.9541, + "step": 7960 + }, + { + "epoch": 1.08, + "learning_rate": 9.239394758173467e-07, + "loss": 2.0164, + "step": 7965 + }, + { + "epoch": 1.08, + "learning_rate": 9.232639827073764e-07, + "loss": 1.9922, + "step": 7970 + }, + { + "epoch": 1.08, + "learning_rate": 9.225884895974061e-07, + "loss": 1.9706, + "step": 7975 + }, + { + "epoch": 1.08, + "learning_rate": 9.219129964874358e-07, + "loss": 1.9756, + "step": 7980 + }, + { + "epoch": 1.08, + "learning_rate": 9.212375033774656e-07, + "loss": 2.0676, + "step": 7985 + }, + { + "epoch": 1.08, + "learning_rate": 9.205620102674953e-07, + "loss": 2.0105, + "step": 7990 + }, + { + "epoch": 1.08, + "learning_rate": 9.198865171575249e-07, + "loss": 1.9872, + "step": 7995 + }, + { + "epoch": 1.08, + "learning_rate": 9.192110240475546e-07, + "loss": 1.9072, + "step": 8000 + }, + { + "epoch": 1.08, + "eval_loss": 1.9768513441085815, + "eval_runtime": 165.2166, + "eval_samples_per_second": 3.607, + "eval_steps_per_second": 0.454, + "step": 8000 + }, + { + "epoch": 1.08, + "learning_rate": 9.185355309375844e-07, + "loss": 2.021, + "step": 8005 + }, + { + "epoch": 1.08, + "learning_rate": 9.178600378276141e-07, + "loss": 2.0312, + "step": 8010 + }, + { + "epoch": 1.08, + "learning_rate": 9.171845447176438e-07, + "loss": 2.0465, + "step": 8015 + }, + { + "epoch": 1.08, + "learning_rate": 9.165090516076736e-07, + "loss": 2.0647, + "step": 8020 + }, + { + "epoch": 1.08, + "learning_rate": 9.158335584977033e-07, + "loss": 1.9135, + "step": 8025 + }, + { + "epoch": 1.08, + "learning_rate": 9.15158065387733e-07, + "loss": 1.9873, + "step": 8030 + }, + { + "epoch": 1.09, + "learning_rate": 9.144825722777627e-07, + "loss": 2.0321, + "step": 8035 + }, + { + "epoch": 1.09, + "learning_rate": 9.138070791677925e-07, + "loss": 1.8527, + "step": 8040 + }, + { + "epoch": 1.09, + "learning_rate": 9.131315860578222e-07, + "loss": 1.9411, + "step": 8045 + }, + { + "epoch": 1.09, + "learning_rate": 9.124560929478519e-07, + "loss": 2.0264, + "step": 8050 + }, + { + "epoch": 1.09, + "learning_rate": 9.117805998378815e-07, + "loss": 2.0049, + "step": 8055 + }, + { + "epoch": 1.09, + "learning_rate": 9.111051067279113e-07, + "loss": 1.9286, + "step": 8060 + }, + { + "epoch": 1.09, + "learning_rate": 9.10429613617941e-07, + "loss": 2.0032, + "step": 8065 + }, + { + "epoch": 1.09, + "learning_rate": 9.097541205079707e-07, + "loss": 1.9846, + "step": 8070 + }, + { + "epoch": 1.09, + "learning_rate": 9.090786273980004e-07, + "loss": 2.0288, + "step": 8075 + }, + { + "epoch": 1.09, + "learning_rate": 9.084031342880302e-07, + "loss": 2.0061, + "step": 8080 + }, + { + "epoch": 1.09, + "learning_rate": 9.077276411780599e-07, + "loss": 1.9841, + "step": 8085 + }, + { + "epoch": 1.09, + "learning_rate": 9.070521480680897e-07, + "loss": 2.0113, + "step": 8090 + }, + { + "epoch": 1.09, + "learning_rate": 9.063766549581194e-07, + "loss": 1.9356, + "step": 8095 + }, + { + "epoch": 1.09, + "learning_rate": 9.057011618481492e-07, + "loss": 1.9294, + "step": 8100 + }, + { + "epoch": 1.09, + "learning_rate": 9.050256687381789e-07, + "loss": 1.9696, + "step": 8105 + }, + { + "epoch": 1.1, + "learning_rate": 9.043501756282086e-07, + "loss": 1.9592, + "step": 8110 + }, + { + "epoch": 1.1, + "learning_rate": 9.036746825182383e-07, + "loss": 2.0304, + "step": 8115 + }, + { + "epoch": 1.1, + "learning_rate": 9.029991894082681e-07, + "loss": 1.9916, + "step": 8120 + }, + { + "epoch": 1.1, + "learning_rate": 9.023236962982977e-07, + "loss": 2.0205, + "step": 8125 + }, + { + "epoch": 1.1, + "learning_rate": 9.016482031883274e-07, + "loss": 1.9852, + "step": 8130 + }, + { + "epoch": 1.1, + "learning_rate": 9.009727100783571e-07, + "loss": 1.9217, + "step": 8135 + }, + { + "epoch": 1.1, + "learning_rate": 9.002972169683869e-07, + "loss": 1.929, + "step": 8140 + }, + { + "epoch": 1.1, + "learning_rate": 8.996217238584166e-07, + "loss": 1.9051, + "step": 8145 + }, + { + "epoch": 1.1, + "learning_rate": 8.989462307484463e-07, + "loss": 1.9446, + "step": 8150 + }, + { + "epoch": 1.1, + "learning_rate": 8.982707376384761e-07, + "loss": 1.9425, + "step": 8155 + }, + { + "epoch": 1.1, + "learning_rate": 8.975952445285058e-07, + "loss": 1.9975, + "step": 8160 + }, + { + "epoch": 1.1, + "learning_rate": 8.969197514185355e-07, + "loss": 1.988, + "step": 8165 + }, + { + "epoch": 1.1, + "learning_rate": 8.962442583085652e-07, + "loss": 2.0169, + "step": 8170 + }, + { + "epoch": 1.1, + "learning_rate": 8.95568765198595e-07, + "loss": 2.0097, + "step": 8175 + }, + { + "epoch": 1.11, + "learning_rate": 8.948932720886247e-07, + "loss": 2.0233, + "step": 8180 + }, + { + "epoch": 1.11, + "learning_rate": 8.942177789786543e-07, + "loss": 1.9843, + "step": 8185 + }, + { + "epoch": 1.11, + "learning_rate": 8.93542285868684e-07, + "loss": 1.901, + "step": 8190 + }, + { + "epoch": 1.11, + "learning_rate": 8.928667927587138e-07, + "loss": 1.9648, + "step": 8195 + }, + { + "epoch": 1.11, + "learning_rate": 8.921912996487435e-07, + "loss": 1.9876, + "step": 8200 + }, + { + "epoch": 1.11, + "learning_rate": 8.915158065387732e-07, + "loss": 1.9283, + "step": 8205 + }, + { + "epoch": 1.11, + "learning_rate": 8.908403134288029e-07, + "loss": 1.9734, + "step": 8210 + }, + { + "epoch": 1.11, + "learning_rate": 8.901648203188327e-07, + "loss": 1.9392, + "step": 8215 + }, + { + "epoch": 1.11, + "learning_rate": 8.894893272088624e-07, + "loss": 1.9981, + "step": 8220 + }, + { + "epoch": 1.11, + "learning_rate": 8.888138340988921e-07, + "loss": 1.9668, + "step": 8225 + }, + { + "epoch": 1.11, + "learning_rate": 8.881383409889218e-07, + "loss": 2.1008, + "step": 8230 + }, + { + "epoch": 1.11, + "learning_rate": 8.874628478789517e-07, + "loss": 1.8967, + "step": 8235 + }, + { + "epoch": 1.11, + "learning_rate": 8.867873547689814e-07, + "loss": 1.9444, + "step": 8240 + }, + { + "epoch": 1.11, + "learning_rate": 8.861118616590111e-07, + "loss": 1.9424, + "step": 8245 + }, + { + "epoch": 1.11, + "learning_rate": 8.854363685490409e-07, + "loss": 2.0454, + "step": 8250 + }, + { + "epoch": 1.12, + "learning_rate": 8.847608754390705e-07, + "loss": 2.0193, + "step": 8255 + }, + { + "epoch": 1.12, + "learning_rate": 8.840853823291002e-07, + "loss": 1.9897, + "step": 8260 + }, + { + "epoch": 1.12, + "learning_rate": 8.834098892191299e-07, + "loss": 1.9687, + "step": 8265 + }, + { + "epoch": 1.12, + "learning_rate": 8.827343961091597e-07, + "loss": 2.0078, + "step": 8270 + }, + { + "epoch": 1.12, + "learning_rate": 8.820589029991894e-07, + "loss": 2.022, + "step": 8275 + }, + { + "epoch": 1.12, + "learning_rate": 8.813834098892191e-07, + "loss": 1.9253, + "step": 8280 + }, + { + "epoch": 1.12, + "learning_rate": 8.807079167792488e-07, + "loss": 1.9977, + "step": 8285 + }, + { + "epoch": 1.12, + "learning_rate": 8.800324236692786e-07, + "loss": 1.9738, + "step": 8290 + }, + { + "epoch": 1.12, + "learning_rate": 8.793569305593083e-07, + "loss": 1.9692, + "step": 8295 + }, + { + "epoch": 1.12, + "learning_rate": 8.78681437449338e-07, + "loss": 1.9502, + "step": 8300 + }, + { + "epoch": 1.12, + "learning_rate": 8.780059443393677e-07, + "loss": 1.9975, + "step": 8305 + }, + { + "epoch": 1.12, + "learning_rate": 8.773304512293975e-07, + "loss": 2.0219, + "step": 8310 + }, + { + "epoch": 1.12, + "learning_rate": 8.766549581194271e-07, + "loss": 1.9972, + "step": 8315 + }, + { + "epoch": 1.12, + "learning_rate": 8.759794650094568e-07, + "loss": 2.0072, + "step": 8320 + }, + { + "epoch": 1.12, + "learning_rate": 8.753039718994865e-07, + "loss": 1.9763, + "step": 8325 + }, + { + "epoch": 1.13, + "learning_rate": 8.746284787895163e-07, + "loss": 2.0124, + "step": 8330 + }, + { + "epoch": 1.13, + "learning_rate": 8.73952985679546e-07, + "loss": 1.9558, + "step": 8335 + }, + { + "epoch": 1.13, + "learning_rate": 8.732774925695757e-07, + "loss": 1.971, + "step": 8340 + }, + { + "epoch": 1.13, + "learning_rate": 8.726019994596054e-07, + "loss": 2.0614, + "step": 8345 + }, + { + "epoch": 1.13, + "learning_rate": 8.719265063496352e-07, + "loss": 2.0482, + "step": 8350 + }, + { + "epoch": 1.13, + "learning_rate": 8.712510132396649e-07, + "loss": 1.9451, + "step": 8355 + }, + { + "epoch": 1.13, + "learning_rate": 8.705755201296946e-07, + "loss": 1.9871, + "step": 8360 + }, + { + "epoch": 1.13, + "learning_rate": 8.699000270197243e-07, + "loss": 1.9311, + "step": 8365 + }, + { + "epoch": 1.13, + "learning_rate": 8.692245339097541e-07, + "loss": 1.9456, + "step": 8370 + }, + { + "epoch": 1.13, + "learning_rate": 8.685490407997838e-07, + "loss": 2.0216, + "step": 8375 + }, + { + "epoch": 1.13, + "learning_rate": 8.678735476898136e-07, + "loss": 1.9703, + "step": 8380 + }, + { + "epoch": 1.13, + "learning_rate": 8.671980545798433e-07, + "loss": 1.9589, + "step": 8385 + }, + { + "epoch": 1.13, + "learning_rate": 8.66522561469873e-07, + "loss": 2.006, + "step": 8390 + }, + { + "epoch": 1.13, + "learning_rate": 8.658470683599027e-07, + "loss": 1.9825, + "step": 8395 + }, + { + "epoch": 1.13, + "learning_rate": 8.651715752499324e-07, + "loss": 2.0099, + "step": 8400 + }, + { + "epoch": 1.13, + "eval_loss": 1.9731146097183228, + "eval_runtime": 165.3898, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.453, + "step": 8400 + }, + { + "epoch": 1.14, + "learning_rate": 8.644960821399622e-07, + "loss": 1.9268, + "step": 8405 + }, + { + "epoch": 1.14, + "learning_rate": 8.638205890299919e-07, + "loss": 2.0344, + "step": 8410 + }, + { + "epoch": 1.14, + "learning_rate": 8.631450959200216e-07, + "loss": 1.961, + "step": 8415 + }, + { + "epoch": 1.14, + "learning_rate": 8.624696028100513e-07, + "loss": 2.0161, + "step": 8420 + }, + { + "epoch": 1.14, + "learning_rate": 8.617941097000811e-07, + "loss": 2.0132, + "step": 8425 + }, + { + "epoch": 1.14, + "learning_rate": 8.611186165901108e-07, + "loss": 1.9307, + "step": 8430 + }, + { + "epoch": 1.14, + "learning_rate": 8.604431234801405e-07, + "loss": 2.0199, + "step": 8435 + }, + { + "epoch": 1.14, + "learning_rate": 8.597676303701702e-07, + "loss": 1.9297, + "step": 8440 + }, + { + "epoch": 1.14, + "learning_rate": 8.590921372601999e-07, + "loss": 1.9283, + "step": 8445 + }, + { + "epoch": 1.14, + "learning_rate": 8.584166441502296e-07, + "loss": 2.0167, + "step": 8450 + }, + { + "epoch": 1.14, + "learning_rate": 8.577411510402593e-07, + "loss": 1.9519, + "step": 8455 + }, + { + "epoch": 1.14, + "learning_rate": 8.57065657930289e-07, + "loss": 1.9769, + "step": 8460 + }, + { + "epoch": 1.14, + "learning_rate": 8.563901648203188e-07, + "loss": 1.9857, + "step": 8465 + }, + { + "epoch": 1.14, + "learning_rate": 8.557146717103485e-07, + "loss": 1.9416, + "step": 8470 + }, + { + "epoch": 1.14, + "learning_rate": 8.550391786003782e-07, + "loss": 1.989, + "step": 8475 + }, + { + "epoch": 1.15, + "learning_rate": 8.543636854904079e-07, + "loss": 1.9692, + "step": 8480 + }, + { + "epoch": 1.15, + "learning_rate": 8.536881923804377e-07, + "loss": 1.9521, + "step": 8485 + }, + { + "epoch": 1.15, + "learning_rate": 8.530126992704674e-07, + "loss": 2.012, + "step": 8490 + }, + { + "epoch": 1.15, + "learning_rate": 8.523372061604971e-07, + "loss": 2.0318, + "step": 8495 + }, + { + "epoch": 1.15, + "learning_rate": 8.516617130505268e-07, + "loss": 2.0502, + "step": 8500 + }, + { + "epoch": 1.15, + "learning_rate": 8.509862199405566e-07, + "loss": 2.0098, + "step": 8505 + }, + { + "epoch": 1.15, + "learning_rate": 8.503107268305862e-07, + "loss": 1.9996, + "step": 8510 + }, + { + "epoch": 1.15, + "learning_rate": 8.496352337206159e-07, + "loss": 2.0079, + "step": 8515 + }, + { + "epoch": 1.15, + "learning_rate": 8.489597406106456e-07, + "loss": 1.9978, + "step": 8520 + }, + { + "epoch": 1.15, + "learning_rate": 8.482842475006755e-07, + "loss": 1.9966, + "step": 8525 + }, + { + "epoch": 1.15, + "learning_rate": 8.476087543907052e-07, + "loss": 1.9699, + "step": 8530 + }, + { + "epoch": 1.15, + "learning_rate": 8.469332612807349e-07, + "loss": 2.0915, + "step": 8535 + }, + { + "epoch": 1.15, + "learning_rate": 8.462577681707647e-07, + "loss": 1.9294, + "step": 8540 + }, + { + "epoch": 1.15, + "learning_rate": 8.455822750607944e-07, + "loss": 1.9252, + "step": 8545 + }, + { + "epoch": 1.16, + "learning_rate": 8.449067819508241e-07, + "loss": 1.9872, + "step": 8550 + }, + { + "epoch": 1.16, + "learning_rate": 8.442312888408538e-07, + "loss": 1.9805, + "step": 8555 + }, + { + "epoch": 1.16, + "learning_rate": 8.435557957308836e-07, + "loss": 2.0113, + "step": 8560 + }, + { + "epoch": 1.16, + "learning_rate": 8.428803026209133e-07, + "loss": 1.9835, + "step": 8565 + }, + { + "epoch": 1.16, + "learning_rate": 8.42204809510943e-07, + "loss": 2.0882, + "step": 8570 + }, + { + "epoch": 1.16, + "learning_rate": 8.415293164009726e-07, + "loss": 2.0025, + "step": 8575 + }, + { + "epoch": 1.16, + "learning_rate": 8.408538232910024e-07, + "loss": 1.9061, + "step": 8580 + }, + { + "epoch": 1.16, + "learning_rate": 8.401783301810321e-07, + "loss": 1.9768, + "step": 8585 + }, + { + "epoch": 1.16, + "learning_rate": 8.395028370710618e-07, + "loss": 2.023, + "step": 8590 + }, + { + "epoch": 1.16, + "learning_rate": 8.388273439610915e-07, + "loss": 2.0458, + "step": 8595 + }, + { + "epoch": 1.16, + "learning_rate": 8.381518508511213e-07, + "loss": 2.0728, + "step": 8600 + }, + { + "epoch": 1.16, + "learning_rate": 8.37476357741151e-07, + "loss": 2.0385, + "step": 8605 + }, + { + "epoch": 1.16, + "learning_rate": 8.368008646311807e-07, + "loss": 1.9309, + "step": 8610 + }, + { + "epoch": 1.16, + "learning_rate": 8.361253715212104e-07, + "loss": 1.9162, + "step": 8615 + }, + { + "epoch": 1.16, + "learning_rate": 8.354498784112402e-07, + "loss": 1.9476, + "step": 8620 + }, + { + "epoch": 1.17, + "learning_rate": 8.347743853012699e-07, + "loss": 2.0305, + "step": 8625 + }, + { + "epoch": 1.17, + "learning_rate": 8.340988921912996e-07, + "loss": 2.0251, + "step": 8630 + }, + { + "epoch": 1.17, + "learning_rate": 8.334233990813294e-07, + "loss": 1.9413, + "step": 8635 + }, + { + "epoch": 1.17, + "learning_rate": 8.32747905971359e-07, + "loss": 1.8424, + "step": 8640 + }, + { + "epoch": 1.17, + "learning_rate": 8.320724128613887e-07, + "loss": 1.9744, + "step": 8645 + }, + { + "epoch": 1.17, + "learning_rate": 8.313969197514184e-07, + "loss": 1.9232, + "step": 8650 + }, + { + "epoch": 1.17, + "learning_rate": 8.307214266414482e-07, + "loss": 2.0065, + "step": 8655 + }, + { + "epoch": 1.17, + "learning_rate": 8.300459335314779e-07, + "loss": 1.8987, + "step": 8660 + }, + { + "epoch": 1.17, + "learning_rate": 8.293704404215076e-07, + "loss": 1.9654, + "step": 8665 + }, + { + "epoch": 1.17, + "learning_rate": 8.286949473115374e-07, + "loss": 2.0159, + "step": 8670 + }, + { + "epoch": 1.17, + "learning_rate": 8.280194542015672e-07, + "loss": 1.9236, + "step": 8675 + }, + { + "epoch": 1.17, + "learning_rate": 8.273439610915969e-07, + "loss": 1.9523, + "step": 8680 + }, + { + "epoch": 1.17, + "learning_rate": 8.266684679816266e-07, + "loss": 1.9601, + "step": 8685 + }, + { + "epoch": 1.17, + "learning_rate": 8.259929748716563e-07, + "loss": 1.8293, + "step": 8690 + }, + { + "epoch": 1.17, + "learning_rate": 8.253174817616861e-07, + "loss": 1.99, + "step": 8695 + }, + { + "epoch": 1.18, + "learning_rate": 8.246419886517158e-07, + "loss": 1.9628, + "step": 8700 + }, + { + "epoch": 1.18, + "learning_rate": 8.239664955417454e-07, + "loss": 1.9766, + "step": 8705 + }, + { + "epoch": 1.18, + "learning_rate": 8.232910024317751e-07, + "loss": 1.8718, + "step": 8710 + }, + { + "epoch": 1.18, + "learning_rate": 8.226155093218049e-07, + "loss": 1.9567, + "step": 8715 + }, + { + "epoch": 1.18, + "learning_rate": 8.219400162118346e-07, + "loss": 1.8965, + "step": 8720 + }, + { + "epoch": 1.18, + "learning_rate": 8.212645231018643e-07, + "loss": 1.9987, + "step": 8725 + }, + { + "epoch": 1.18, + "learning_rate": 8.20589029991894e-07, + "loss": 1.9691, + "step": 8730 + }, + { + "epoch": 1.18, + "learning_rate": 8.199135368819238e-07, + "loss": 1.9517, + "step": 8735 + }, + { + "epoch": 1.18, + "learning_rate": 8.192380437719535e-07, + "loss": 2.0101, + "step": 8740 + }, + { + "epoch": 1.18, + "learning_rate": 8.185625506619832e-07, + "loss": 1.9377, + "step": 8745 + }, + { + "epoch": 1.18, + "learning_rate": 8.17887057552013e-07, + "loss": 2.0027, + "step": 8750 + }, + { + "epoch": 1.18, + "learning_rate": 8.172115644420427e-07, + "loss": 1.939, + "step": 8755 + }, + { + "epoch": 1.18, + "learning_rate": 8.165360713320724e-07, + "loss": 2.0467, + "step": 8760 + }, + { + "epoch": 1.18, + "learning_rate": 8.158605782221021e-07, + "loss": 1.9963, + "step": 8765 + }, + { + "epoch": 1.18, + "learning_rate": 8.151850851121318e-07, + "loss": 1.9886, + "step": 8770 + }, + { + "epoch": 1.19, + "learning_rate": 8.145095920021615e-07, + "loss": 1.944, + "step": 8775 + }, + { + "epoch": 1.19, + "learning_rate": 8.138340988921912e-07, + "loss": 1.8703, + "step": 8780 + }, + { + "epoch": 1.19, + "learning_rate": 8.131586057822209e-07, + "loss": 1.886, + "step": 8785 + }, + { + "epoch": 1.19, + "learning_rate": 8.124831126722507e-07, + "loss": 1.979, + "step": 8790 + }, + { + "epoch": 1.19, + "learning_rate": 8.118076195622804e-07, + "loss": 1.9356, + "step": 8795 + }, + { + "epoch": 1.19, + "learning_rate": 8.111321264523101e-07, + "loss": 1.9588, + "step": 8800 + }, + { + "epoch": 1.19, + "eval_loss": 1.9695961475372314, + "eval_runtime": 165.3874, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.453, + "step": 8800 + }, + { + "epoch": 1.19, + "learning_rate": 8.104566333423398e-07, + "loss": 1.9797, + "step": 8805 + }, + { + "epoch": 1.19, + "learning_rate": 8.097811402323696e-07, + "loss": 1.9845, + "step": 8810 + }, + { + "epoch": 1.19, + "learning_rate": 8.091056471223993e-07, + "loss": 1.9642, + "step": 8815 + }, + { + "epoch": 1.19, + "learning_rate": 8.084301540124291e-07, + "loss": 1.9069, + "step": 8820 + }, + { + "epoch": 1.19, + "learning_rate": 8.077546609024588e-07, + "loss": 2.1011, + "step": 8825 + }, + { + "epoch": 1.19, + "learning_rate": 8.070791677924886e-07, + "loss": 2.0182, + "step": 8830 + }, + { + "epoch": 1.19, + "learning_rate": 8.064036746825182e-07, + "loss": 1.8241, + "step": 8835 + }, + { + "epoch": 1.19, + "learning_rate": 8.057281815725479e-07, + "loss": 1.9258, + "step": 8840 + }, + { + "epoch": 1.19, + "learning_rate": 8.050526884625776e-07, + "loss": 1.9356, + "step": 8845 + }, + { + "epoch": 1.2, + "learning_rate": 8.043771953526074e-07, + "loss": 1.959, + "step": 8850 + }, + { + "epoch": 1.2, + "learning_rate": 8.037017022426371e-07, + "loss": 1.9901, + "step": 8855 + }, + { + "epoch": 1.2, + "learning_rate": 8.030262091326668e-07, + "loss": 1.9471, + "step": 8860 + }, + { + "epoch": 1.2, + "learning_rate": 8.023507160226966e-07, + "loss": 2.0966, + "step": 8865 + }, + { + "epoch": 1.2, + "learning_rate": 8.016752229127263e-07, + "loss": 1.9141, + "step": 8870 + }, + { + "epoch": 1.2, + "learning_rate": 8.00999729802756e-07, + "loss": 2.0125, + "step": 8875 + }, + { + "epoch": 1.2, + "learning_rate": 8.003242366927857e-07, + "loss": 1.9031, + "step": 8880 + }, + { + "epoch": 1.2, + "learning_rate": 7.996487435828155e-07, + "loss": 2.0249, + "step": 8885 + }, + { + "epoch": 1.2, + "learning_rate": 7.989732504728452e-07, + "loss": 1.9745, + "step": 8890 + }, + { + "epoch": 1.2, + "learning_rate": 7.982977573628749e-07, + "loss": 1.9759, + "step": 8895 + }, + { + "epoch": 1.2, + "learning_rate": 7.976222642529045e-07, + "loss": 1.9258, + "step": 8900 + }, + { + "epoch": 1.2, + "learning_rate": 7.969467711429343e-07, + "loss": 2.013, + "step": 8905 + }, + { + "epoch": 1.2, + "learning_rate": 7.96271278032964e-07, + "loss": 1.9858, + "step": 8910 + }, + { + "epoch": 1.2, + "learning_rate": 7.955957849229937e-07, + "loss": 1.9655, + "step": 8915 + }, + { + "epoch": 1.21, + "learning_rate": 7.949202918130234e-07, + "loss": 2.0186, + "step": 8920 + }, + { + "epoch": 1.21, + "learning_rate": 7.942447987030532e-07, + "loss": 1.9713, + "step": 8925 + }, + { + "epoch": 1.21, + "learning_rate": 7.935693055930829e-07, + "loss": 1.9275, + "step": 8930 + }, + { + "epoch": 1.21, + "learning_rate": 7.928938124831126e-07, + "loss": 1.8962, + "step": 8935 + }, + { + "epoch": 1.21, + "learning_rate": 7.922183193731423e-07, + "loss": 2.063, + "step": 8940 + }, + { + "epoch": 1.21, + "learning_rate": 7.915428262631721e-07, + "loss": 1.8896, + "step": 8945 + }, + { + "epoch": 1.21, + "learning_rate": 7.908673331532018e-07, + "loss": 1.966, + "step": 8950 + }, + { + "epoch": 1.21, + "learning_rate": 7.901918400432315e-07, + "loss": 1.9818, + "step": 8955 + }, + { + "epoch": 1.21, + "learning_rate": 7.895163469332611e-07, + "loss": 2.0381, + "step": 8960 + }, + { + "epoch": 1.21, + "learning_rate": 7.88840853823291e-07, + "loss": 1.9868, + "step": 8965 + }, + { + "epoch": 1.21, + "learning_rate": 7.881653607133207e-07, + "loss": 1.9937, + "step": 8970 + }, + { + "epoch": 1.21, + "learning_rate": 7.874898676033504e-07, + "loss": 1.9347, + "step": 8975 + }, + { + "epoch": 1.21, + "learning_rate": 7.868143744933801e-07, + "loss": 1.9733, + "step": 8980 + }, + { + "epoch": 1.21, + "learning_rate": 7.861388813834099e-07, + "loss": 1.9135, + "step": 8985 + }, + { + "epoch": 1.21, + "learning_rate": 7.854633882734396e-07, + "loss": 1.9111, + "step": 8990 + }, + { + "epoch": 1.22, + "learning_rate": 7.847878951634693e-07, + "loss": 2.0278, + "step": 8995 + }, + { + "epoch": 1.22, + "learning_rate": 7.841124020534991e-07, + "loss": 2.0025, + "step": 9000 + }, + { + "epoch": 1.22, + "learning_rate": 7.834369089435288e-07, + "loss": 1.9607, + "step": 9005 + }, + { + "epoch": 1.22, + "learning_rate": 7.827614158335585e-07, + "loss": 1.966, + "step": 9010 + }, + { + "epoch": 1.22, + "learning_rate": 7.820859227235882e-07, + "loss": 2.0316, + "step": 9015 + }, + { + "epoch": 1.22, + "learning_rate": 7.81410429613618e-07, + "loss": 2.0147, + "step": 9020 + }, + { + "epoch": 1.22, + "learning_rate": 7.807349365036477e-07, + "loss": 1.9391, + "step": 9025 + }, + { + "epoch": 1.22, + "learning_rate": 7.800594433936773e-07, + "loss": 2.0185, + "step": 9030 + }, + { + "epoch": 1.22, + "learning_rate": 7.79383950283707e-07, + "loss": 1.9343, + "step": 9035 + }, + { + "epoch": 1.22, + "learning_rate": 7.787084571737368e-07, + "loss": 1.9053, + "step": 9040 + }, + { + "epoch": 1.22, + "learning_rate": 7.780329640637665e-07, + "loss": 1.9645, + "step": 9045 + }, + { + "epoch": 1.22, + "learning_rate": 7.773574709537962e-07, + "loss": 1.9119, + "step": 9050 + }, + { + "epoch": 1.22, + "learning_rate": 7.766819778438259e-07, + "loss": 1.9916, + "step": 9055 + }, + { + "epoch": 1.22, + "learning_rate": 7.760064847338557e-07, + "loss": 1.978, + "step": 9060 + }, + { + "epoch": 1.22, + "learning_rate": 7.753309916238854e-07, + "loss": 1.9838, + "step": 9065 + }, + { + "epoch": 1.23, + "learning_rate": 7.746554985139151e-07, + "loss": 2.0075, + "step": 9070 + }, + { + "epoch": 1.23, + "learning_rate": 7.739800054039448e-07, + "loss": 2.0059, + "step": 9075 + }, + { + "epoch": 1.23, + "learning_rate": 7.733045122939746e-07, + "loss": 1.9458, + "step": 9080 + }, + { + "epoch": 1.23, + "learning_rate": 7.726290191840043e-07, + "loss": 2.0189, + "step": 9085 + }, + { + "epoch": 1.23, + "learning_rate": 7.719535260740339e-07, + "loss": 1.907, + "step": 9090 + }, + { + "epoch": 1.23, + "learning_rate": 7.712780329640636e-07, + "loss": 2.0197, + "step": 9095 + }, + { + "epoch": 1.23, + "learning_rate": 7.706025398540934e-07, + "loss": 1.9569, + "step": 9100 + }, + { + "epoch": 1.23, + "learning_rate": 7.699270467441231e-07, + "loss": 2.0457, + "step": 9105 + }, + { + "epoch": 1.23, + "learning_rate": 7.692515536341529e-07, + "loss": 1.9715, + "step": 9110 + }, + { + "epoch": 1.23, + "learning_rate": 7.685760605241827e-07, + "loss": 1.9375, + "step": 9115 + }, + { + "epoch": 1.23, + "learning_rate": 7.679005674142124e-07, + "loss": 1.9681, + "step": 9120 + }, + { + "epoch": 1.23, + "learning_rate": 7.672250743042421e-07, + "loss": 2.0124, + "step": 9125 + }, + { + "epoch": 1.23, + "learning_rate": 7.665495811942718e-07, + "loss": 1.9938, + "step": 9130 + }, + { + "epoch": 1.23, + "learning_rate": 7.658740880843016e-07, + "loss": 1.9986, + "step": 9135 + }, + { + "epoch": 1.23, + "learning_rate": 7.651985949743313e-07, + "loss": 2.0119, + "step": 9140 + }, + { + "epoch": 1.24, + "learning_rate": 7.64523101864361e-07, + "loss": 1.989, + "step": 9145 + }, + { + "epoch": 1.24, + "learning_rate": 7.638476087543907e-07, + "loss": 1.9794, + "step": 9150 + }, + { + "epoch": 1.24, + "learning_rate": 7.631721156444205e-07, + "loss": 1.9841, + "step": 9155 + }, + { + "epoch": 1.24, + "learning_rate": 7.624966225344501e-07, + "loss": 2.0012, + "step": 9160 + }, + { + "epoch": 1.24, + "learning_rate": 7.618211294244798e-07, + "loss": 2.0727, + "step": 9165 + }, + { + "epoch": 1.24, + "learning_rate": 7.611456363145095e-07, + "loss": 2.0328, + "step": 9170 + }, + { + "epoch": 1.24, + "learning_rate": 7.604701432045393e-07, + "loss": 1.9844, + "step": 9175 + }, + { + "epoch": 1.24, + "learning_rate": 7.59794650094569e-07, + "loss": 2.0104, + "step": 9180 + }, + { + "epoch": 1.24, + "learning_rate": 7.591191569845987e-07, + "loss": 1.9992, + "step": 9185 + }, + { + "epoch": 1.24, + "learning_rate": 7.584436638746284e-07, + "loss": 2.0314, + "step": 9190 + }, + { + "epoch": 1.24, + "learning_rate": 7.577681707646582e-07, + "loss": 1.9967, + "step": 9195 + }, + { + "epoch": 1.24, + "learning_rate": 7.570926776546879e-07, + "loss": 2.0098, + "step": 9200 + }, + { + "epoch": 1.24, + "eval_loss": 1.96656334400177, + "eval_runtime": 165.5096, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.453, + "step": 9200 + }, + { + "epoch": 1.24, + "learning_rate": 7.564171845447176e-07, + "loss": 1.9014, + "step": 9205 + }, + { + "epoch": 1.24, + "learning_rate": 7.557416914347473e-07, + "loss": 2.0112, + "step": 9210 + }, + { + "epoch": 1.24, + "learning_rate": 7.550661983247771e-07, + "loss": 1.981, + "step": 9215 + }, + { + "epoch": 1.25, + "learning_rate": 7.543907052148067e-07, + "loss": 2.0282, + "step": 9220 + }, + { + "epoch": 1.25, + "learning_rate": 7.537152121048364e-07, + "loss": 1.888, + "step": 9225 + }, + { + "epoch": 1.25, + "learning_rate": 7.530397189948661e-07, + "loss": 1.957, + "step": 9230 + }, + { + "epoch": 1.25, + "learning_rate": 7.523642258848959e-07, + "loss": 2.0638, + "step": 9235 + }, + { + "epoch": 1.25, + "learning_rate": 7.516887327749256e-07, + "loss": 2.0383, + "step": 9240 + }, + { + "epoch": 1.25, + "learning_rate": 7.510132396649553e-07, + "loss": 2.1005, + "step": 9245 + }, + { + "epoch": 1.25, + "learning_rate": 7.50337746554985e-07, + "loss": 1.9186, + "step": 9250 + }, + { + "epoch": 1.25, + "learning_rate": 7.496622534450149e-07, + "loss": 1.9245, + "step": 9255 + }, + { + "epoch": 1.25, + "learning_rate": 7.489867603350446e-07, + "loss": 1.9879, + "step": 9260 + }, + { + "epoch": 1.25, + "learning_rate": 7.483112672250743e-07, + "loss": 1.9247, + "step": 9265 + }, + { + "epoch": 1.25, + "learning_rate": 7.476357741151041e-07, + "loss": 1.9621, + "step": 9270 + }, + { + "epoch": 1.25, + "learning_rate": 7.469602810051338e-07, + "loss": 1.8959, + "step": 9275 + }, + { + "epoch": 1.25, + "learning_rate": 7.462847878951635e-07, + "loss": 2.0015, + "step": 9280 + }, + { + "epoch": 1.25, + "learning_rate": 7.456092947851932e-07, + "loss": 2.0016, + "step": 9285 + }, + { + "epoch": 1.26, + "learning_rate": 7.449338016752229e-07, + "loss": 2.0414, + "step": 9290 + }, + { + "epoch": 1.26, + "learning_rate": 7.442583085652526e-07, + "loss": 1.8762, + "step": 9295 + }, + { + "epoch": 1.26, + "learning_rate": 7.435828154552823e-07, + "loss": 1.9855, + "step": 9300 + }, + { + "epoch": 1.26, + "learning_rate": 7.42907322345312e-07, + "loss": 1.8853, + "step": 9305 + }, + { + "epoch": 1.26, + "learning_rate": 7.422318292353418e-07, + "loss": 1.9256, + "step": 9310 + }, + { + "epoch": 1.26, + "learning_rate": 7.415563361253715e-07, + "loss": 1.9988, + "step": 9315 + }, + { + "epoch": 1.26, + "learning_rate": 7.408808430154012e-07, + "loss": 1.9554, + "step": 9320 + }, + { + "epoch": 1.26, + "learning_rate": 7.402053499054309e-07, + "loss": 1.9522, + "step": 9325 + }, + { + "epoch": 1.26, + "learning_rate": 7.395298567954607e-07, + "loss": 1.9611, + "step": 9330 + }, + { + "epoch": 1.26, + "learning_rate": 7.388543636854904e-07, + "loss": 1.9107, + "step": 9335 + }, + { + "epoch": 1.26, + "learning_rate": 7.381788705755201e-07, + "loss": 1.9087, + "step": 9340 + }, + { + "epoch": 1.26, + "learning_rate": 7.375033774655498e-07, + "loss": 1.8861, + "step": 9345 + }, + { + "epoch": 1.26, + "learning_rate": 7.368278843555795e-07, + "loss": 1.9844, + "step": 9350 + }, + { + "epoch": 1.26, + "learning_rate": 7.361523912456092e-07, + "loss": 1.9853, + "step": 9355 + }, + { + "epoch": 1.26, + "learning_rate": 7.354768981356389e-07, + "loss": 1.9048, + "step": 9360 + }, + { + "epoch": 1.27, + "learning_rate": 7.348014050256686e-07, + "loss": 1.9164, + "step": 9365 + }, + { + "epoch": 1.27, + "learning_rate": 7.341259119156984e-07, + "loss": 1.9002, + "step": 9370 + }, + { + "epoch": 1.27, + "learning_rate": 7.334504188057281e-07, + "loss": 1.9329, + "step": 9375 + }, + { + "epoch": 1.27, + "learning_rate": 7.327749256957578e-07, + "loss": 1.8667, + "step": 9380 + }, + { + "epoch": 1.27, + "learning_rate": 7.320994325857876e-07, + "loss": 1.9071, + "step": 9385 + }, + { + "epoch": 1.27, + "learning_rate": 7.314239394758173e-07, + "loss": 1.9801, + "step": 9390 + }, + { + "epoch": 1.27, + "learning_rate": 7.30748446365847e-07, + "loss": 1.8903, + "step": 9395 + }, + { + "epoch": 1.27, + "learning_rate": 7.300729532558768e-07, + "loss": 2.0193, + "step": 9400 + }, + { + "epoch": 1.27, + "learning_rate": 7.293974601459066e-07, + "loss": 1.9583, + "step": 9405 + }, + { + "epoch": 1.27, + "learning_rate": 7.287219670359363e-07, + "loss": 1.9964, + "step": 9410 + }, + { + "epoch": 1.27, + "learning_rate": 7.28046473925966e-07, + "loss": 1.9577, + "step": 9415 + }, + { + "epoch": 1.27, + "learning_rate": 7.273709808159956e-07, + "loss": 1.9443, + "step": 9420 + }, + { + "epoch": 1.27, + "learning_rate": 7.266954877060254e-07, + "loss": 2.057, + "step": 9425 + }, + { + "epoch": 1.27, + "learning_rate": 7.260199945960551e-07, + "loss": 2.0193, + "step": 9430 + }, + { + "epoch": 1.27, + "learning_rate": 7.253445014860848e-07, + "loss": 2.0085, + "step": 9435 + }, + { + "epoch": 1.28, + "learning_rate": 7.246690083761145e-07, + "loss": 1.9692, + "step": 9440 + }, + { + "epoch": 1.28, + "learning_rate": 7.239935152661443e-07, + "loss": 1.9368, + "step": 9445 + }, + { + "epoch": 1.28, + "learning_rate": 7.23318022156174e-07, + "loss": 1.9555, + "step": 9450 + }, + { + "epoch": 1.28, + "learning_rate": 7.226425290462037e-07, + "loss": 1.9812, + "step": 9455 + }, + { + "epoch": 1.28, + "learning_rate": 7.219670359362334e-07, + "loss": 1.9442, + "step": 9460 + }, + { + "epoch": 1.28, + "learning_rate": 7.212915428262632e-07, + "loss": 1.9647, + "step": 9465 + }, + { + "epoch": 1.28, + "learning_rate": 7.206160497162929e-07, + "loss": 2.0252, + "step": 9470 + }, + { + "epoch": 1.28, + "learning_rate": 7.199405566063226e-07, + "loss": 1.9675, + "step": 9475 + }, + { + "epoch": 1.28, + "learning_rate": 7.192650634963522e-07, + "loss": 1.9984, + "step": 9480 + }, + { + "epoch": 1.28, + "learning_rate": 7.18589570386382e-07, + "loss": 2.0282, + "step": 9485 + }, + { + "epoch": 1.28, + "learning_rate": 7.179140772764117e-07, + "loss": 1.9355, + "step": 9490 + }, + { + "epoch": 1.28, + "learning_rate": 7.172385841664414e-07, + "loss": 2.0768, + "step": 9495 + }, + { + "epoch": 1.28, + "learning_rate": 7.165630910564712e-07, + "loss": 2.0453, + "step": 9500 + }, + { + "epoch": 1.28, + "learning_rate": 7.158875979465009e-07, + "loss": 1.9795, + "step": 9505 + }, + { + "epoch": 1.28, + "learning_rate": 7.152121048365306e-07, + "loss": 1.9689, + "step": 9510 + }, + { + "epoch": 1.29, + "learning_rate": 7.145366117265603e-07, + "loss": 2.0186, + "step": 9515 + }, + { + "epoch": 1.29, + "learning_rate": 7.138611186165901e-07, + "loss": 2.0045, + "step": 9520 + }, + { + "epoch": 1.29, + "learning_rate": 7.131856255066198e-07, + "loss": 2.0813, + "step": 9525 + }, + { + "epoch": 1.29, + "learning_rate": 7.125101323966495e-07, + "loss": 1.9417, + "step": 9530 + }, + { + "epoch": 1.29, + "learning_rate": 7.118346392866792e-07, + "loss": 1.8859, + "step": 9535 + }, + { + "epoch": 1.29, + "learning_rate": 7.11159146176709e-07, + "loss": 1.9949, + "step": 9540 + }, + { + "epoch": 1.29, + "learning_rate": 7.104836530667388e-07, + "loss": 2.0046, + "step": 9545 + }, + { + "epoch": 1.29, + "learning_rate": 7.098081599567684e-07, + "loss": 1.9243, + "step": 9550 + }, + { + "epoch": 1.29, + "learning_rate": 7.091326668467981e-07, + "loss": 1.9855, + "step": 9555 + }, + { + "epoch": 1.29, + "learning_rate": 7.084571737368279e-07, + "loss": 1.9326, + "step": 9560 + }, + { + "epoch": 1.29, + "learning_rate": 7.077816806268576e-07, + "loss": 2.0766, + "step": 9565 + }, + { + "epoch": 1.29, + "learning_rate": 7.071061875168873e-07, + "loss": 2.016, + "step": 9570 + }, + { + "epoch": 1.29, + "learning_rate": 7.06430694406917e-07, + "loss": 2.012, + "step": 9575 + }, + { + "epoch": 1.29, + "learning_rate": 7.057552012969468e-07, + "loss": 1.9261, + "step": 9580 + }, + { + "epoch": 1.29, + "learning_rate": 7.050797081869765e-07, + "loss": 2.047, + "step": 9585 + }, + { + "epoch": 1.3, + "learning_rate": 7.044042150770062e-07, + "loss": 1.9532, + "step": 9590 + }, + { + "epoch": 1.3, + "learning_rate": 7.03728721967036e-07, + "loss": 2.0149, + "step": 9595 + }, + { + "epoch": 1.3, + "learning_rate": 7.030532288570657e-07, + "loss": 1.9688, + "step": 9600 + }, + { + "epoch": 1.3, + "eval_loss": 1.963936448097229, + "eval_runtime": 165.4614, + "eval_samples_per_second": 3.602, + "eval_steps_per_second": 0.453, + "step": 9600 + }, + { + "epoch": 1.3, + "learning_rate": 7.023777357470954e-07, + "loss": 1.9251, + "step": 9605 + }, + { + "epoch": 1.3, + "learning_rate": 7.01702242637125e-07, + "loss": 2.0004, + "step": 9610 + }, + { + "epoch": 1.3, + "learning_rate": 7.010267495271548e-07, + "loss": 1.903, + "step": 9615 + }, + { + "epoch": 1.3, + "learning_rate": 7.003512564171845e-07, + "loss": 2.0285, + "step": 9620 + }, + { + "epoch": 1.3, + "learning_rate": 6.996757633072142e-07, + "loss": 1.9815, + "step": 9625 + }, + { + "epoch": 1.3, + "learning_rate": 6.990002701972439e-07, + "loss": 1.8968, + "step": 9630 + }, + { + "epoch": 1.3, + "learning_rate": 6.983247770872737e-07, + "loss": 1.9809, + "step": 9635 + }, + { + "epoch": 1.3, + "learning_rate": 6.976492839773034e-07, + "loss": 1.9713, + "step": 9640 + }, + { + "epoch": 1.3, + "learning_rate": 6.969737908673331e-07, + "loss": 1.9472, + "step": 9645 + }, + { + "epoch": 1.3, + "learning_rate": 6.962982977573628e-07, + "loss": 1.9328, + "step": 9650 + }, + { + "epoch": 1.3, + "learning_rate": 6.956228046473926e-07, + "loss": 1.9631, + "step": 9655 + }, + { + "epoch": 1.31, + "learning_rate": 6.949473115374223e-07, + "loss": 2.0051, + "step": 9660 + }, + { + "epoch": 1.31, + "learning_rate": 6.94271818427452e-07, + "loss": 1.9866, + "step": 9665 + }, + { + "epoch": 1.31, + "learning_rate": 6.935963253174817e-07, + "loss": 1.9494, + "step": 9670 + }, + { + "epoch": 1.31, + "learning_rate": 6.929208322075114e-07, + "loss": 1.9731, + "step": 9675 + }, + { + "epoch": 1.31, + "learning_rate": 6.922453390975411e-07, + "loss": 1.9718, + "step": 9680 + }, + { + "epoch": 1.31, + "learning_rate": 6.915698459875708e-07, + "loss": 1.9197, + "step": 9685 + }, + { + "epoch": 1.31, + "learning_rate": 6.908943528776006e-07, + "loss": 2.0505, + "step": 9690 + }, + { + "epoch": 1.31, + "learning_rate": 6.902188597676304e-07, + "loss": 2.0586, + "step": 9695 + }, + { + "epoch": 1.31, + "learning_rate": 6.895433666576601e-07, + "loss": 1.9232, + "step": 9700 + }, + { + "epoch": 1.31, + "learning_rate": 6.888678735476898e-07, + "loss": 1.9559, + "step": 9705 + }, + { + "epoch": 1.31, + "learning_rate": 6.881923804377195e-07, + "loss": 2.0318, + "step": 9710 + }, + { + "epoch": 1.31, + "learning_rate": 6.875168873277493e-07, + "loss": 1.8752, + "step": 9715 + }, + { + "epoch": 1.31, + "learning_rate": 6.86841394217779e-07, + "loss": 1.9969, + "step": 9720 + }, + { + "epoch": 1.31, + "learning_rate": 6.861659011078087e-07, + "loss": 1.9531, + "step": 9725 + }, + { + "epoch": 1.31, + "learning_rate": 6.854904079978385e-07, + "loss": 2.0319, + "step": 9730 + }, + { + "epoch": 1.32, + "learning_rate": 6.848149148878682e-07, + "loss": 1.8927, + "step": 9735 + }, + { + "epoch": 1.32, + "learning_rate": 6.841394217778978e-07, + "loss": 1.8673, + "step": 9740 + }, + { + "epoch": 1.32, + "learning_rate": 6.834639286679275e-07, + "loss": 2.0187, + "step": 9745 + }, + { + "epoch": 1.32, + "learning_rate": 6.827884355579573e-07, + "loss": 2.0092, + "step": 9750 + }, + { + "epoch": 1.32, + "learning_rate": 6.82112942447987e-07, + "loss": 2.0315, + "step": 9755 + }, + { + "epoch": 1.32, + "learning_rate": 6.814374493380167e-07, + "loss": 1.9539, + "step": 9760 + }, + { + "epoch": 1.32, + "learning_rate": 6.807619562280464e-07, + "loss": 1.8357, + "step": 9765 + }, + { + "epoch": 1.32, + "learning_rate": 6.800864631180762e-07, + "loss": 2.0681, + "step": 9770 + }, + { + "epoch": 1.32, + "learning_rate": 6.794109700081059e-07, + "loss": 1.9327, + "step": 9775 + }, + { + "epoch": 1.32, + "learning_rate": 6.787354768981356e-07, + "loss": 1.916, + "step": 9780 + }, + { + "epoch": 1.32, + "learning_rate": 6.780599837881653e-07, + "loss": 1.9925, + "step": 9785 + }, + { + "epoch": 1.32, + "learning_rate": 6.773844906781951e-07, + "loss": 1.9407, + "step": 9790 + }, + { + "epoch": 1.32, + "learning_rate": 6.767089975682248e-07, + "loss": 1.9569, + "step": 9795 + }, + { + "epoch": 1.32, + "learning_rate": 6.760335044582545e-07, + "loss": 1.9141, + "step": 9800 + }, + { + "epoch": 1.32, + "learning_rate": 6.753580113482841e-07, + "loss": 1.9845, + "step": 9805 + }, + { + "epoch": 1.33, + "learning_rate": 6.746825182383139e-07, + "loss": 2.0271, + "step": 9810 + }, + { + "epoch": 1.33, + "learning_rate": 6.740070251283436e-07, + "loss": 1.9355, + "step": 9815 + }, + { + "epoch": 1.33, + "learning_rate": 6.733315320183733e-07, + "loss": 1.9558, + "step": 9820 + }, + { + "epoch": 1.33, + "learning_rate": 6.72656038908403e-07, + "loss": 1.9754, + "step": 9825 + }, + { + "epoch": 1.33, + "learning_rate": 6.719805457984328e-07, + "loss": 2.0408, + "step": 9830 + }, + { + "epoch": 1.33, + "learning_rate": 6.713050526884625e-07, + "loss": 1.9422, + "step": 9835 + }, + { + "epoch": 1.33, + "learning_rate": 6.706295595784923e-07, + "loss": 2.0819, + "step": 9840 + }, + { + "epoch": 1.33, + "learning_rate": 6.699540664685221e-07, + "loss": 1.9963, + "step": 9845 + }, + { + "epoch": 1.33, + "learning_rate": 6.692785733585518e-07, + "loss": 1.9835, + "step": 9850 + }, + { + "epoch": 1.33, + "learning_rate": 6.686030802485815e-07, + "loss": 1.9566, + "step": 9855 + }, + { + "epoch": 1.33, + "learning_rate": 6.679275871386112e-07, + "loss": 2.0353, + "step": 9860 + }, + { + "epoch": 1.33, + "learning_rate": 6.67252094028641e-07, + "loss": 2.0683, + "step": 9865 + }, + { + "epoch": 1.33, + "learning_rate": 6.665766009186706e-07, + "loss": 1.9563, + "step": 9870 + }, + { + "epoch": 1.33, + "learning_rate": 6.659011078087003e-07, + "loss": 1.8663, + "step": 9875 + }, + { + "epoch": 1.33, + "learning_rate": 6.6522561469873e-07, + "loss": 1.9794, + "step": 9880 + }, + { + "epoch": 1.34, + "learning_rate": 6.645501215887598e-07, + "loss": 1.9824, + "step": 9885 + }, + { + "epoch": 1.34, + "learning_rate": 6.638746284787895e-07, + "loss": 1.9124, + "step": 9890 + }, + { + "epoch": 1.34, + "learning_rate": 6.631991353688192e-07, + "loss": 1.9687, + "step": 9895 + }, + { + "epoch": 1.34, + "learning_rate": 6.625236422588489e-07, + "loss": 2.0313, + "step": 9900 + }, + { + "epoch": 1.34, + "learning_rate": 6.618481491488787e-07, + "loss": 1.943, + "step": 9905 + }, + { + "epoch": 1.34, + "learning_rate": 6.611726560389084e-07, + "loss": 1.9226, + "step": 9910 + }, + { + "epoch": 1.34, + "learning_rate": 6.604971629289381e-07, + "loss": 2.0105, + "step": 9915 + }, + { + "epoch": 1.34, + "learning_rate": 6.598216698189678e-07, + "loss": 2.0327, + "step": 9920 + }, + { + "epoch": 1.34, + "learning_rate": 6.591461767089976e-07, + "loss": 1.9641, + "step": 9925 + }, + { + "epoch": 1.34, + "learning_rate": 6.584706835990273e-07, + "loss": 1.9631, + "step": 9930 + }, + { + "epoch": 1.34, + "learning_rate": 6.577951904890569e-07, + "loss": 1.9812, + "step": 9935 + }, + { + "epoch": 1.34, + "learning_rate": 6.571196973790866e-07, + "loss": 2.0004, + "step": 9940 + }, + { + "epoch": 1.34, + "learning_rate": 6.564442042691164e-07, + "loss": 1.8798, + "step": 9945 + }, + { + "epoch": 1.34, + "learning_rate": 6.557687111591461e-07, + "loss": 2.0338, + "step": 9950 + }, + { + "epoch": 1.34, + "learning_rate": 6.550932180491758e-07, + "loss": 1.9378, + "step": 9955 + }, + { + "epoch": 1.35, + "learning_rate": 6.544177249392055e-07, + "loss": 1.8363, + "step": 9960 + }, + { + "epoch": 1.35, + "learning_rate": 6.537422318292353e-07, + "loss": 1.9079, + "step": 9965 + }, + { + "epoch": 1.35, + "learning_rate": 6.53066738719265e-07, + "loss": 1.8645, + "step": 9970 + }, + { + "epoch": 1.35, + "learning_rate": 6.523912456092947e-07, + "loss": 1.9543, + "step": 9975 + }, + { + "epoch": 1.35, + "learning_rate": 6.517157524993245e-07, + "loss": 1.942, + "step": 9980 + }, + { + "epoch": 1.35, + "learning_rate": 6.510402593893543e-07, + "loss": 1.9462, + "step": 9985 + }, + { + "epoch": 1.35, + "learning_rate": 6.50364766279384e-07, + "loss": 2.1435, + "step": 9990 + }, + { + "epoch": 1.35, + "learning_rate": 6.496892731694137e-07, + "loss": 2.0368, + "step": 9995 + }, + { + "epoch": 1.35, + "learning_rate": 6.490137800594434e-07, + "loss": 1.9291, + "step": 10000 + }, + { + "epoch": 1.35, + "eval_loss": 1.961105465888977, + "eval_runtime": 165.405, + "eval_samples_per_second": 3.603, + "eval_steps_per_second": 0.453, + "step": 10000 + }, + { + "epoch": 1.35, + "learning_rate": 6.483382869494731e-07, + "loss": 1.9377, + "step": 10005 + }, + { + "epoch": 1.35, + "learning_rate": 6.476627938395028e-07, + "loss": 1.9865, + "step": 10010 + }, + { + "epoch": 1.35, + "learning_rate": 6.469873007295325e-07, + "loss": 1.9385, + "step": 10015 + }, + { + "epoch": 1.35, + "learning_rate": 6.463118076195623e-07, + "loss": 1.9832, + "step": 10020 + }, + { + "epoch": 1.35, + "learning_rate": 6.45636314509592e-07, + "loss": 2.0501, + "step": 10025 + }, + { + "epoch": 1.35, + "learning_rate": 6.449608213996217e-07, + "loss": 1.931, + "step": 10030 + }, + { + "epoch": 1.36, + "learning_rate": 6.442853282896514e-07, + "loss": 1.9716, + "step": 10035 + }, + { + "epoch": 1.36, + "learning_rate": 6.436098351796812e-07, + "loss": 2.1156, + "step": 10040 + }, + { + "epoch": 1.36, + "learning_rate": 6.429343420697109e-07, + "loss": 1.9412, + "step": 10045 + }, + { + "epoch": 1.36, + "learning_rate": 6.422588489597406e-07, + "loss": 1.9237, + "step": 10050 + }, + { + "epoch": 1.36, + "learning_rate": 6.415833558497703e-07, + "loss": 1.9539, + "step": 10055 + }, + { + "epoch": 1.36, + "learning_rate": 6.409078627398001e-07, + "loss": 1.9547, + "step": 10060 + }, + { + "epoch": 1.36, + "learning_rate": 6.402323696298297e-07, + "loss": 1.9443, + "step": 10065 + }, + { + "epoch": 1.36, + "learning_rate": 6.395568765198594e-07, + "loss": 1.8834, + "step": 10070 + }, + { + "epoch": 1.36, + "learning_rate": 6.388813834098891e-07, + "loss": 1.996, + "step": 10075 + }, + { + "epoch": 1.36, + "learning_rate": 6.382058902999189e-07, + "loss": 1.9791, + "step": 10080 + }, + { + "epoch": 1.36, + "learning_rate": 6.375303971899486e-07, + "loss": 2.0237, + "step": 10085 + }, + { + "epoch": 1.36, + "learning_rate": 6.368549040799783e-07, + "loss": 1.955, + "step": 10090 + }, + { + "epoch": 1.36, + "learning_rate": 6.36179410970008e-07, + "loss": 2.0109, + "step": 10095 + }, + { + "epoch": 1.36, + "learning_rate": 6.355039178600378e-07, + "loss": 1.9346, + "step": 10100 + }, + { + "epoch": 1.37, + "learning_rate": 6.348284247500675e-07, + "loss": 2.0298, + "step": 10105 + }, + { + "epoch": 1.37, + "learning_rate": 6.341529316400972e-07, + "loss": 2.0031, + "step": 10110 + }, + { + "epoch": 1.37, + "learning_rate": 6.33477438530127e-07, + "loss": 1.887, + "step": 10115 + }, + { + "epoch": 1.37, + "learning_rate": 6.328019454201567e-07, + "loss": 1.955, + "step": 10120 + }, + { + "epoch": 1.37, + "learning_rate": 6.321264523101863e-07, + "loss": 1.9275, + "step": 10125 + }, + { + "epoch": 1.37, + "learning_rate": 6.314509592002161e-07, + "loss": 1.8876, + "step": 10130 + }, + { + "epoch": 1.37, + "learning_rate": 6.307754660902459e-07, + "loss": 1.9722, + "step": 10135 + }, + { + "epoch": 1.37, + "learning_rate": 6.300999729802756e-07, + "loss": 1.9802, + "step": 10140 + }, + { + "epoch": 1.37, + "learning_rate": 6.294244798703053e-07, + "loss": 2.0026, + "step": 10145 + }, + { + "epoch": 1.37, + "learning_rate": 6.28748986760335e-07, + "loss": 2.0283, + "step": 10150 + }, + { + "epoch": 1.37, + "learning_rate": 6.280734936503648e-07, + "loss": 1.9919, + "step": 10155 + }, + { + "epoch": 1.37, + "learning_rate": 6.273980005403945e-07, + "loss": 1.915, + "step": 10160 + }, + { + "epoch": 1.37, + "learning_rate": 6.267225074304242e-07, + "loss": 1.9717, + "step": 10165 + }, + { + "epoch": 1.37, + "learning_rate": 6.260470143204539e-07, + "loss": 1.9684, + "step": 10170 + }, + { + "epoch": 1.37, + "learning_rate": 6.253715212104837e-07, + "loss": 2.023, + "step": 10175 + }, + { + "epoch": 1.38, + "learning_rate": 6.246960281005134e-07, + "loss": 1.8704, + "step": 10180 + }, + { + "epoch": 1.38, + "learning_rate": 6.240205349905431e-07, + "loss": 1.9476, + "step": 10185 + }, + { + "epoch": 1.38, + "learning_rate": 6.233450418805728e-07, + "loss": 1.975, + "step": 10190 + }, + { + "epoch": 1.38, + "learning_rate": 6.226695487706025e-07, + "loss": 1.8549, + "step": 10195 + }, + { + "epoch": 1.38, + "learning_rate": 6.219940556606322e-07, + "loss": 2.0041, + "step": 10200 + }, + { + "epoch": 1.38, + "learning_rate": 6.213185625506619e-07, + "loss": 1.9219, + "step": 10205 + }, + { + "epoch": 1.38, + "learning_rate": 6.206430694406916e-07, + "loss": 2.0199, + "step": 10210 + }, + { + "epoch": 1.38, + "learning_rate": 6.199675763307214e-07, + "loss": 2.0977, + "step": 10215 + }, + { + "epoch": 1.38, + "learning_rate": 6.192920832207511e-07, + "loss": 1.9502, + "step": 10220 + }, + { + "epoch": 1.38, + "learning_rate": 6.186165901107808e-07, + "loss": 2.0295, + "step": 10225 + }, + { + "epoch": 1.38, + "learning_rate": 6.179410970008106e-07, + "loss": 1.8866, + "step": 10230 + }, + { + "epoch": 1.38, + "learning_rate": 6.172656038908403e-07, + "loss": 1.9987, + "step": 10235 + }, + { + "epoch": 1.38, + "learning_rate": 6.1659011078087e-07, + "loss": 2.0693, + "step": 10240 + }, + { + "epoch": 1.38, + "learning_rate": 6.159146176708997e-07, + "loss": 1.9641, + "step": 10245 + }, + { + "epoch": 1.38, + "learning_rate": 6.152391245609295e-07, + "loss": 1.8745, + "step": 10250 + }, + { + "epoch": 1.39, + "learning_rate": 6.145636314509591e-07, + "loss": 1.9384, + "step": 10255 + }, + { + "epoch": 1.39, + "learning_rate": 6.138881383409888e-07, + "loss": 1.9278, + "step": 10260 + }, + { + "epoch": 1.39, + "learning_rate": 6.132126452310185e-07, + "loss": 2.0378, + "step": 10265 + }, + { + "epoch": 1.39, + "learning_rate": 6.125371521210483e-07, + "loss": 2.0384, + "step": 10270 + }, + { + "epoch": 1.39, + "learning_rate": 6.118616590110781e-07, + "loss": 1.9244, + "step": 10275 + }, + { + "epoch": 1.39, + "learning_rate": 6.111861659011078e-07, + "loss": 1.9505, + "step": 10280 + }, + { + "epoch": 1.39, + "learning_rate": 6.105106727911375e-07, + "loss": 1.9794, + "step": 10285 + }, + { + "epoch": 1.39, + "learning_rate": 6.098351796811673e-07, + "loss": 1.9922, + "step": 10290 + }, + { + "epoch": 1.39, + "learning_rate": 6.09159686571197e-07, + "loss": 1.9708, + "step": 10295 + }, + { + "epoch": 1.39, + "learning_rate": 6.084841934612267e-07, + "loss": 2.0161, + "step": 10300 + }, + { + "epoch": 1.39, + "learning_rate": 6.078087003512564e-07, + "loss": 1.9192, + "step": 10305 + }, + { + "epoch": 1.39, + "learning_rate": 6.071332072412862e-07, + "loss": 1.91, + "step": 10310 + }, + { + "epoch": 1.39, + "learning_rate": 6.064577141313159e-07, + "loss": 2.0866, + "step": 10315 + }, + { + "epoch": 1.39, + "learning_rate": 6.057822210213456e-07, + "loss": 2.0044, + "step": 10320 + }, + { + "epoch": 1.39, + "learning_rate": 6.051067279113752e-07, + "loss": 2.1199, + "step": 10325 + }, + { + "epoch": 1.4, + "learning_rate": 6.04431234801405e-07, + "loss": 1.996, + "step": 10330 + }, + { + "epoch": 1.4, + "learning_rate": 6.037557416914347e-07, + "loss": 1.9303, + "step": 10335 + }, + { + "epoch": 1.4, + "learning_rate": 6.030802485814644e-07, + "loss": 1.9297, + "step": 10340 + }, + { + "epoch": 1.4, + "learning_rate": 6.024047554714942e-07, + "loss": 1.9069, + "step": 10345 + }, + { + "epoch": 1.4, + "learning_rate": 6.017292623615239e-07, + "loss": 2.0062, + "step": 10350 + }, + { + "epoch": 1.4, + "learning_rate": 6.010537692515536e-07, + "loss": 2.0051, + "step": 10355 + }, + { + "epoch": 1.4, + "learning_rate": 6.003782761415833e-07, + "loss": 1.9996, + "step": 10360 + }, + { + "epoch": 1.4, + "learning_rate": 5.997027830316131e-07, + "loss": 1.9947, + "step": 10365 + }, + { + "epoch": 1.4, + "learning_rate": 5.990272899216428e-07, + "loss": 1.984, + "step": 10370 + }, + { + "epoch": 1.4, + "learning_rate": 5.983517968116725e-07, + "loss": 1.9509, + "step": 10375 + }, + { + "epoch": 1.4, + "learning_rate": 5.976763037017022e-07, + "loss": 1.9455, + "step": 10380 + }, + { + "epoch": 1.4, + "learning_rate": 5.970008105917319e-07, + "loss": 1.9383, + "step": 10385 + }, + { + "epoch": 1.4, + "learning_rate": 5.963253174817616e-07, + "loss": 1.8293, + "step": 10390 + }, + { + "epoch": 1.4, + "learning_rate": 5.956498243717913e-07, + "loss": 1.9228, + "step": 10395 + }, + { + "epoch": 1.4, + "learning_rate": 5.94974331261821e-07, + "loss": 2.0348, + "step": 10400 + }, + { + "epoch": 1.4, + "eval_loss": 1.9586541652679443, + "eval_runtime": 165.5067, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.453, + "step": 10400 + }, + { + "epoch": 1.41, + "learning_rate": 5.942988381518508e-07, + "loss": 1.9632, + "step": 10405 + }, + { + "epoch": 1.41, + "learning_rate": 5.936233450418805e-07, + "loss": 1.8818, + "step": 10410 + }, + { + "epoch": 1.41, + "learning_rate": 5.929478519319102e-07, + "loss": 1.9375, + "step": 10415 + }, + { + "epoch": 1.41, + "learning_rate": 5.9227235882194e-07, + "loss": 1.9466, + "step": 10420 + }, + { + "epoch": 1.41, + "learning_rate": 5.915968657119698e-07, + "loss": 2.0166, + "step": 10425 + }, + { + "epoch": 1.41, + "learning_rate": 5.909213726019995e-07, + "loss": 1.9846, + "step": 10430 + }, + { + "epoch": 1.41, + "learning_rate": 5.902458794920292e-07, + "loss": 1.9175, + "step": 10435 + }, + { + "epoch": 1.41, + "learning_rate": 5.89570386382059e-07, + "loss": 1.8928, + "step": 10440 + }, + { + "epoch": 1.41, + "learning_rate": 5.888948932720887e-07, + "loss": 1.9582, + "step": 10445 + }, + { + "epoch": 1.41, + "learning_rate": 5.882194001621184e-07, + "loss": 1.9586, + "step": 10450 + }, + { + "epoch": 1.41, + "learning_rate": 5.87543907052148e-07, + "loss": 1.9719, + "step": 10455 + }, + { + "epoch": 1.41, + "learning_rate": 5.868684139421778e-07, + "loss": 1.9996, + "step": 10460 + }, + { + "epoch": 1.41, + "learning_rate": 5.861929208322075e-07, + "loss": 1.935, + "step": 10465 + }, + { + "epoch": 1.41, + "learning_rate": 5.855174277222372e-07, + "loss": 1.9258, + "step": 10470 + }, + { + "epoch": 1.42, + "learning_rate": 5.848419346122669e-07, + "loss": 2.0089, + "step": 10475 + }, + { + "epoch": 1.42, + "learning_rate": 5.841664415022967e-07, + "loss": 1.9161, + "step": 10480 + }, + { + "epoch": 1.42, + "learning_rate": 5.834909483923264e-07, + "loss": 1.9469, + "step": 10485 + }, + { + "epoch": 1.42, + "learning_rate": 5.828154552823561e-07, + "loss": 1.9688, + "step": 10490 + }, + { + "epoch": 1.42, + "learning_rate": 5.821399621723858e-07, + "loss": 1.9424, + "step": 10495 + }, + { + "epoch": 1.42, + "learning_rate": 5.814644690624156e-07, + "loss": 1.9893, + "step": 10500 + }, + { + "epoch": 1.42, + "learning_rate": 5.807889759524453e-07, + "loss": 1.9072, + "step": 10505 + }, + { + "epoch": 1.42, + "learning_rate": 5.80113482842475e-07, + "loss": 2.0403, + "step": 10510 + }, + { + "epoch": 1.42, + "learning_rate": 5.794379897325046e-07, + "loss": 1.9779, + "step": 10515 + }, + { + "epoch": 1.42, + "learning_rate": 5.787624966225344e-07, + "loss": 1.9289, + "step": 10520 + }, + { + "epoch": 1.42, + "learning_rate": 5.780870035125641e-07, + "loss": 2.047, + "step": 10525 + }, + { + "epoch": 1.42, + "learning_rate": 5.774115104025938e-07, + "loss": 2.0729, + "step": 10530 + }, + { + "epoch": 1.42, + "learning_rate": 5.767360172926235e-07, + "loss": 1.9825, + "step": 10535 + }, + { + "epoch": 1.42, + "learning_rate": 5.760605241826533e-07, + "loss": 1.9138, + "step": 10540 + }, + { + "epoch": 1.42, + "learning_rate": 5.75385031072683e-07, + "loss": 1.9638, + "step": 10545 + }, + { + "epoch": 1.43, + "learning_rate": 5.747095379627127e-07, + "loss": 2.0517, + "step": 10550 + }, + { + "epoch": 1.43, + "learning_rate": 5.740340448527424e-07, + "loss": 2.0771, + "step": 10555 + }, + { + "epoch": 1.43, + "learning_rate": 5.733585517427722e-07, + "loss": 1.9698, + "step": 10560 + }, + { + "epoch": 1.43, + "learning_rate": 5.72683058632802e-07, + "loss": 1.9299, + "step": 10565 + }, + { + "epoch": 1.43, + "learning_rate": 5.720075655228317e-07, + "loss": 2.0264, + "step": 10570 + }, + { + "epoch": 1.43, + "learning_rate": 5.713320724128615e-07, + "loss": 2.0212, + "step": 10575 + }, + { + "epoch": 1.43, + "learning_rate": 5.706565793028912e-07, + "loss": 1.9875, + "step": 10580 + }, + { + "epoch": 1.43, + "learning_rate": 5.699810861929208e-07, + "loss": 1.9172, + "step": 10585 + }, + { + "epoch": 1.43, + "learning_rate": 5.693055930829505e-07, + "loss": 2.0435, + "step": 10590 + }, + { + "epoch": 1.43, + "learning_rate": 5.686300999729803e-07, + "loss": 2.0022, + "step": 10595 + }, + { + "epoch": 1.43, + "learning_rate": 5.6795460686301e-07, + "loss": 2.0239, + "step": 10600 + }, + { + "epoch": 1.43, + "learning_rate": 5.672791137530397e-07, + "loss": 1.9575, + "step": 10605 + }, + { + "epoch": 1.43, + "learning_rate": 5.666036206430694e-07, + "loss": 2.0316, + "step": 10610 + }, + { + "epoch": 1.43, + "learning_rate": 5.659281275330992e-07, + "loss": 1.9751, + "step": 10615 + }, + { + "epoch": 1.43, + "learning_rate": 5.652526344231289e-07, + "loss": 1.9895, + "step": 10620 + }, + { + "epoch": 1.44, + "learning_rate": 5.645771413131586e-07, + "loss": 1.9672, + "step": 10625 + }, + { + "epoch": 1.44, + "learning_rate": 5.639016482031883e-07, + "loss": 1.9807, + "step": 10630 + }, + { + "epoch": 1.44, + "learning_rate": 5.632261550932181e-07, + "loss": 1.9333, + "step": 10635 + }, + { + "epoch": 1.44, + "learning_rate": 5.625506619832478e-07, + "loss": 1.835, + "step": 10640 + }, + { + "epoch": 1.44, + "learning_rate": 5.618751688732774e-07, + "loss": 1.9753, + "step": 10645 + }, + { + "epoch": 1.44, + "learning_rate": 5.611996757633071e-07, + "loss": 1.9376, + "step": 10650 + }, + { + "epoch": 1.44, + "learning_rate": 5.605241826533369e-07, + "loss": 1.9732, + "step": 10655 + }, + { + "epoch": 1.44, + "learning_rate": 5.598486895433666e-07, + "loss": 2.0749, + "step": 10660 + }, + { + "epoch": 1.44, + "learning_rate": 5.591731964333963e-07, + "loss": 1.934, + "step": 10665 + }, + { + "epoch": 1.44, + "learning_rate": 5.58497703323426e-07, + "loss": 1.9208, + "step": 10670 + }, + { + "epoch": 1.44, + "learning_rate": 5.578222102134558e-07, + "loss": 1.9562, + "step": 10675 + }, + { + "epoch": 1.44, + "learning_rate": 5.571467171034855e-07, + "loss": 2.082, + "step": 10680 + }, + { + "epoch": 1.44, + "learning_rate": 5.564712239935152e-07, + "loss": 2.0432, + "step": 10685 + }, + { + "epoch": 1.44, + "learning_rate": 5.557957308835449e-07, + "loss": 1.9236, + "step": 10690 + }, + { + "epoch": 1.44, + "learning_rate": 5.551202377735747e-07, + "loss": 2.0632, + "step": 10695 + }, + { + "epoch": 1.45, + "learning_rate": 5.544447446636044e-07, + "loss": 1.9083, + "step": 10700 + }, + { + "epoch": 1.45, + "learning_rate": 5.537692515536341e-07, + "loss": 2.0383, + "step": 10705 + }, + { + "epoch": 1.45, + "learning_rate": 5.530937584436639e-07, + "loss": 1.9063, + "step": 10710 + }, + { + "epoch": 1.45, + "learning_rate": 5.524182653336936e-07, + "loss": 2.0845, + "step": 10715 + }, + { + "epoch": 1.45, + "learning_rate": 5.517427722237233e-07, + "loss": 1.9743, + "step": 10720 + }, + { + "epoch": 1.45, + "learning_rate": 5.51067279113753e-07, + "loss": 2.0553, + "step": 10725 + }, + { + "epoch": 1.45, + "learning_rate": 5.503917860037828e-07, + "loss": 2.0907, + "step": 10730 + }, + { + "epoch": 1.45, + "learning_rate": 5.497162928938125e-07, + "loss": 1.9165, + "step": 10735 + }, + { + "epoch": 1.45, + "learning_rate": 5.490407997838422e-07, + "loss": 1.9457, + "step": 10740 + }, + { + "epoch": 1.45, + "learning_rate": 5.483653066738719e-07, + "loss": 1.9602, + "step": 10745 + }, + { + "epoch": 1.45, + "learning_rate": 5.476898135639017e-07, + "loss": 1.9842, + "step": 10750 + }, + { + "epoch": 1.45, + "learning_rate": 5.470143204539314e-07, + "loss": 1.9074, + "step": 10755 + }, + { + "epoch": 1.45, + "learning_rate": 5.463388273439611e-07, + "loss": 1.9034, + "step": 10760 + }, + { + "epoch": 1.45, + "learning_rate": 5.456633342339908e-07, + "loss": 1.966, + "step": 10765 + }, + { + "epoch": 1.45, + "learning_rate": 5.449878411240206e-07, + "loss": 2.0049, + "step": 10770 + }, + { + "epoch": 1.46, + "learning_rate": 5.443123480140502e-07, + "loss": 1.9493, + "step": 10775 + }, + { + "epoch": 1.46, + "learning_rate": 5.436368549040799e-07, + "loss": 1.9264, + "step": 10780 + }, + { + "epoch": 1.46, + "learning_rate": 5.429613617941096e-07, + "loss": 1.9516, + "step": 10785 + }, + { + "epoch": 1.46, + "learning_rate": 5.422858686841394e-07, + "loss": 2.0036, + "step": 10790 + }, + { + "epoch": 1.46, + "learning_rate": 5.416103755741691e-07, + "loss": 1.9817, + "step": 10795 + }, + { + "epoch": 1.46, + "learning_rate": 5.409348824641988e-07, + "loss": 1.9724, + "step": 10800 + }, + { + "epoch": 1.46, + "eval_loss": 1.9568144083023071, + "eval_runtime": 165.314, + "eval_samples_per_second": 3.605, + "eval_steps_per_second": 0.454, + "step": 10800 + }, + { + "epoch": 1.46, + "learning_rate": 5.402593893542285e-07, + "loss": 2.0044, + "step": 10805 + }, + { + "epoch": 1.46, + "learning_rate": 5.395838962442583e-07, + "loss": 1.9504, + "step": 10810 + }, + { + "epoch": 1.46, + "learning_rate": 5.38908403134288e-07, + "loss": 1.9536, + "step": 10815 + }, + { + "epoch": 1.46, + "learning_rate": 5.382329100243177e-07, + "loss": 1.9608, + "step": 10820 + }, + { + "epoch": 1.46, + "learning_rate": 5.375574169143475e-07, + "loss": 2.0382, + "step": 10825 + }, + { + "epoch": 1.46, + "learning_rate": 5.368819238043772e-07, + "loss": 1.9799, + "step": 10830 + }, + { + "epoch": 1.46, + "learning_rate": 5.362064306944069e-07, + "loss": 1.9551, + "step": 10835 + }, + { + "epoch": 1.46, + "learning_rate": 5.355309375844365e-07, + "loss": 1.9896, + "step": 10840 + }, + { + "epoch": 1.47, + "learning_rate": 5.348554444744662e-07, + "loss": 2.039, + "step": 10845 + }, + { + "epoch": 1.47, + "learning_rate": 5.34179951364496e-07, + "loss": 1.9283, + "step": 10850 + }, + { + "epoch": 1.47, + "learning_rate": 5.335044582545257e-07, + "loss": 1.924, + "step": 10855 + }, + { + "epoch": 1.47, + "learning_rate": 5.328289651445555e-07, + "loss": 2.0038, + "step": 10860 + }, + { + "epoch": 1.47, + "learning_rate": 5.321534720345853e-07, + "loss": 1.9953, + "step": 10865 + }, + { + "epoch": 1.47, + "learning_rate": 5.31477978924615e-07, + "loss": 2.0623, + "step": 10870 + }, + { + "epoch": 1.47, + "learning_rate": 5.308024858146447e-07, + "loss": 2.0246, + "step": 10875 + }, + { + "epoch": 1.47, + "learning_rate": 5.301269927046744e-07, + "loss": 1.9648, + "step": 10880 + }, + { + "epoch": 1.47, + "learning_rate": 5.294514995947042e-07, + "loss": 1.9202, + "step": 10885 + }, + { + "epoch": 1.47, + "learning_rate": 5.287760064847339e-07, + "loss": 2.0132, + "step": 10890 + }, + { + "epoch": 1.47, + "learning_rate": 5.281005133747636e-07, + "loss": 2.0106, + "step": 10895 + }, + { + "epoch": 1.47, + "learning_rate": 5.274250202647933e-07, + "loss": 2.006, + "step": 10900 + }, + { + "epoch": 1.47, + "learning_rate": 5.26749527154823e-07, + "loss": 1.9925, + "step": 10905 + }, + { + "epoch": 1.47, + "learning_rate": 5.260740340448527e-07, + "loss": 2.018, + "step": 10910 + }, + { + "epoch": 1.47, + "learning_rate": 5.253985409348824e-07, + "loss": 1.9322, + "step": 10915 + }, + { + "epoch": 1.48, + "learning_rate": 5.247230478249121e-07, + "loss": 1.9453, + "step": 10920 + }, + { + "epoch": 1.48, + "learning_rate": 5.240475547149419e-07, + "loss": 1.965, + "step": 10925 + }, + { + "epoch": 1.48, + "learning_rate": 5.233720616049716e-07, + "loss": 1.9446, + "step": 10930 + }, + { + "epoch": 1.48, + "learning_rate": 5.226965684950013e-07, + "loss": 1.9942, + "step": 10935 + }, + { + "epoch": 1.48, + "learning_rate": 5.22021075385031e-07, + "loss": 1.9138, + "step": 10940 + }, + { + "epoch": 1.48, + "learning_rate": 5.213455822750608e-07, + "loss": 1.9764, + "step": 10945 + }, + { + "epoch": 1.48, + "learning_rate": 5.206700891650905e-07, + "loss": 1.9884, + "step": 10950 + }, + { + "epoch": 1.48, + "learning_rate": 5.199945960551202e-07, + "loss": 1.9135, + "step": 10955 + }, + { + "epoch": 1.48, + "learning_rate": 5.1931910294515e-07, + "loss": 2.0193, + "step": 10960 + }, + { + "epoch": 1.48, + "learning_rate": 5.186436098351797e-07, + "loss": 1.9446, + "step": 10965 + }, + { + "epoch": 1.48, + "learning_rate": 5.179681167252093e-07, + "loss": 2.0596, + "step": 10970 + }, + { + "epoch": 1.48, + "learning_rate": 5.17292623615239e-07, + "loss": 1.9912, + "step": 10975 + }, + { + "epoch": 1.48, + "learning_rate": 5.166171305052688e-07, + "loss": 1.9624, + "step": 10980 + }, + { + "epoch": 1.48, + "learning_rate": 5.159416373952985e-07, + "loss": 1.9297, + "step": 10985 + }, + { + "epoch": 1.48, + "learning_rate": 5.152661442853282e-07, + "loss": 1.9808, + "step": 10990 + }, + { + "epoch": 1.49, + "learning_rate": 5.145906511753579e-07, + "loss": 1.9933, + "step": 10995 + }, + { + "epoch": 1.49, + "learning_rate": 5.139151580653877e-07, + "loss": 1.9767, + "step": 11000 + }, + { + "epoch": 1.49, + "learning_rate": 5.132396649554175e-07, + "loss": 1.9229, + "step": 11005 + }, + { + "epoch": 1.49, + "learning_rate": 5.125641718454472e-07, + "loss": 1.9596, + "step": 11010 + }, + { + "epoch": 1.49, + "learning_rate": 5.118886787354769e-07, + "loss": 1.9631, + "step": 11015 + }, + { + "epoch": 1.49, + "learning_rate": 5.112131856255067e-07, + "loss": 1.9485, + "step": 11020 + }, + { + "epoch": 1.49, + "learning_rate": 5.105376925155364e-07, + "loss": 1.9808, + "step": 11025 + }, + { + "epoch": 1.49, + "learning_rate": 5.098621994055661e-07, + "loss": 1.925, + "step": 11030 + }, + { + "epoch": 1.49, + "learning_rate": 5.091867062955957e-07, + "loss": 1.9282, + "step": 11035 + }, + { + "epoch": 1.49, + "learning_rate": 5.085112131856255e-07, + "loss": 1.9395, + "step": 11040 + }, + { + "epoch": 1.49, + "learning_rate": 5.078357200756552e-07, + "loss": 1.9219, + "step": 11045 + }, + { + "epoch": 1.49, + "learning_rate": 5.071602269656849e-07, + "loss": 1.8854, + "step": 11050 + }, + { + "epoch": 1.49, + "learning_rate": 5.064847338557146e-07, + "loss": 1.9981, + "step": 11055 + }, + { + "epoch": 1.49, + "learning_rate": 5.058092407457444e-07, + "loss": 1.9279, + "step": 11060 + }, + { + "epoch": 1.49, + "learning_rate": 5.051337476357741e-07, + "loss": 2.0484, + "step": 11065 + }, + { + "epoch": 1.5, + "learning_rate": 5.044582545258038e-07, + "loss": 1.971, + "step": 11070 + }, + { + "epoch": 1.5, + "learning_rate": 5.037827614158336e-07, + "loss": 1.914, + "step": 11075 + }, + { + "epoch": 1.5, + "learning_rate": 5.031072683058633e-07, + "loss": 1.9193, + "step": 11080 + }, + { + "epoch": 1.5, + "learning_rate": 5.02431775195893e-07, + "loss": 1.9318, + "step": 11085 + }, + { + "epoch": 1.5, + "learning_rate": 5.017562820859227e-07, + "loss": 1.9414, + "step": 11090 + }, + { + "epoch": 1.5, + "learning_rate": 5.010807889759525e-07, + "loss": 1.9664, + "step": 11095 + }, + { + "epoch": 1.5, + "learning_rate": 5.004052958659821e-07, + "loss": 2.0118, + "step": 11100 + }, + { + "epoch": 1.5, + "learning_rate": 4.997298027560118e-07, + "loss": 1.9932, + "step": 11105 + }, + { + "epoch": 1.5, + "learning_rate": 4.990543096460415e-07, + "loss": 1.9015, + "step": 11110 + }, + { + "epoch": 1.5, + "learning_rate": 4.983788165360714e-07, + "loss": 1.9539, + "step": 11115 + }, + { + "epoch": 1.5, + "learning_rate": 4.977033234261011e-07, + "loss": 2.1365, + "step": 11120 + }, + { + "epoch": 1.5, + "learning_rate": 4.970278303161308e-07, + "loss": 1.966, + "step": 11125 + }, + { + "epoch": 1.5, + "learning_rate": 4.963523372061604e-07, + "loss": 2.0074, + "step": 11130 + }, + { + "epoch": 1.5, + "learning_rate": 4.956768440961902e-07, + "loss": 2.0762, + "step": 11135 + }, + { + "epoch": 1.5, + "learning_rate": 4.950013509862199e-07, + "loss": 1.8631, + "step": 11140 + }, + { + "epoch": 1.51, + "learning_rate": 4.943258578762496e-07, + "loss": 1.9708, + "step": 11145 + }, + { + "epoch": 1.51, + "learning_rate": 4.936503647662793e-07, + "loss": 1.9905, + "step": 11150 + }, + { + "epoch": 1.51, + "learning_rate": 4.929748716563091e-07, + "loss": 1.9543, + "step": 11155 + }, + { + "epoch": 1.51, + "learning_rate": 4.922993785463388e-07, + "loss": 1.9418, + "step": 11160 + }, + { + "epoch": 1.51, + "learning_rate": 4.916238854363685e-07, + "loss": 1.9517, + "step": 11165 + }, + { + "epoch": 1.51, + "learning_rate": 4.909483923263982e-07, + "loss": 1.8429, + "step": 11170 + }, + { + "epoch": 1.51, + "learning_rate": 4.90272899216428e-07, + "loss": 1.9754, + "step": 11175 + }, + { + "epoch": 1.51, + "learning_rate": 4.895974061064577e-07, + "loss": 1.8974, + "step": 11180 + }, + { + "epoch": 1.51, + "learning_rate": 4.889219129964874e-07, + "loss": 1.9996, + "step": 11185 + }, + { + "epoch": 1.51, + "learning_rate": 4.882464198865172e-07, + "loss": 2.0173, + "step": 11190 + }, + { + "epoch": 1.51, + "learning_rate": 4.875709267765469e-07, + "loss": 1.9683, + "step": 11195 + }, + { + "epoch": 1.51, + "learning_rate": 4.868954336665766e-07, + "loss": 1.9497, + "step": 11200 + }, + { + "epoch": 1.51, + "eval_loss": 1.955120325088501, + "eval_runtime": 165.2289, + "eval_samples_per_second": 3.607, + "eval_steps_per_second": 0.454, + "step": 11200 + }, + { + "epoch": 1.51, + "learning_rate": 4.862199405566063e-07, + "loss": 1.9811, + "step": 11205 + }, + { + "epoch": 1.51, + "learning_rate": 4.855444474466361e-07, + "loss": 1.9879, + "step": 11210 + }, + { + "epoch": 1.52, + "learning_rate": 4.848689543366658e-07, + "loss": 1.9957, + "step": 11215 + }, + { + "epoch": 1.52, + "learning_rate": 4.841934612266955e-07, + "loss": 1.8997, + "step": 11220 + }, + { + "epoch": 1.52, + "learning_rate": 4.835179681167252e-07, + "loss": 1.9366, + "step": 11225 + }, + { + "epoch": 1.52, + "learning_rate": 4.828424750067549e-07, + "loss": 1.9237, + "step": 11230 + }, + { + "epoch": 1.52, + "learning_rate": 4.821669818967846e-07, + "loss": 1.9348, + "step": 11235 + }, + { + "epoch": 1.52, + "learning_rate": 4.814914887868143e-07, + "loss": 1.8665, + "step": 11240 + }, + { + "epoch": 1.52, + "learning_rate": 4.80815995676844e-07, + "loss": 1.9615, + "step": 11245 + }, + { + "epoch": 1.52, + "learning_rate": 4.801405025668738e-07, + "loss": 2.0397, + "step": 11250 + }, + { + "epoch": 1.52, + "learning_rate": 4.794650094569035e-07, + "loss": 2.0333, + "step": 11255 + }, + { + "epoch": 1.52, + "learning_rate": 4.787895163469332e-07, + "loss": 1.996, + "step": 11260 + }, + { + "epoch": 1.52, + "learning_rate": 4.781140232369629e-07, + "loss": 1.971, + "step": 11265 + }, + { + "epoch": 1.52, + "learning_rate": 4.774385301269927e-07, + "loss": 2.0498, + "step": 11270 + }, + { + "epoch": 1.52, + "learning_rate": 4.767630370170224e-07, + "loss": 2.0136, + "step": 11275 + }, + { + "epoch": 1.52, + "learning_rate": 4.760875439070521e-07, + "loss": 1.9929, + "step": 11280 + }, + { + "epoch": 1.52, + "learning_rate": 4.7541205079708184e-07, + "loss": 1.9976, + "step": 11285 + }, + { + "epoch": 1.53, + "learning_rate": 4.7473655768711157e-07, + "loss": 1.8972, + "step": 11290 + }, + { + "epoch": 1.53, + "learning_rate": 4.740610645771413e-07, + "loss": 2.0062, + "step": 11295 + }, + { + "epoch": 1.53, + "learning_rate": 4.73385571467171e-07, + "loss": 2.0605, + "step": 11300 + }, + { + "epoch": 1.53, + "learning_rate": 4.7271007835720075e-07, + "loss": 1.9654, + "step": 11305 + }, + { + "epoch": 1.53, + "learning_rate": 4.720345852472304e-07, + "loss": 2.0147, + "step": 11310 + }, + { + "epoch": 1.53, + "learning_rate": 4.7135909213726015e-07, + "loss": 1.9944, + "step": 11315 + }, + { + "epoch": 1.53, + "learning_rate": 4.706835990272899e-07, + "loss": 1.9651, + "step": 11320 + }, + { + "epoch": 1.53, + "learning_rate": 4.700081059173196e-07, + "loss": 1.9287, + "step": 11325 + }, + { + "epoch": 1.53, + "learning_rate": 4.693326128073494e-07, + "loss": 2.0328, + "step": 11330 + }, + { + "epoch": 1.53, + "learning_rate": 4.686571196973791e-07, + "loss": 2.0085, + "step": 11335 + }, + { + "epoch": 1.53, + "learning_rate": 4.679816265874088e-07, + "loss": 1.969, + "step": 11340 + }, + { + "epoch": 1.53, + "learning_rate": 4.673061334774385e-07, + "loss": 1.8905, + "step": 11345 + }, + { + "epoch": 1.53, + "learning_rate": 4.6663064036746823e-07, + "loss": 1.9451, + "step": 11350 + }, + { + "epoch": 1.53, + "learning_rate": 4.6595514725749796e-07, + "loss": 1.9741, + "step": 11355 + }, + { + "epoch": 1.53, + "learning_rate": 4.652796541475277e-07, + "loss": 1.9929, + "step": 11360 + }, + { + "epoch": 1.54, + "learning_rate": 4.646041610375574e-07, + "loss": 1.9585, + "step": 11365 + }, + { + "epoch": 1.54, + "learning_rate": 4.6392866792758714e-07, + "loss": 1.9383, + "step": 11370 + }, + { + "epoch": 1.54, + "learning_rate": 4.632531748176168e-07, + "loss": 1.9451, + "step": 11375 + }, + { + "epoch": 1.54, + "learning_rate": 4.6257768170764654e-07, + "loss": 1.9353, + "step": 11380 + }, + { + "epoch": 1.54, + "learning_rate": 4.6190218859767626e-07, + "loss": 1.9131, + "step": 11385 + }, + { + "epoch": 1.54, + "learning_rate": 4.61226695487706e-07, + "loss": 2.0578, + "step": 11390 + }, + { + "epoch": 1.54, + "learning_rate": 4.605512023777357e-07, + "loss": 1.9363, + "step": 11395 + }, + { + "epoch": 1.54, + "learning_rate": 4.5987570926776544e-07, + "loss": 1.9979, + "step": 11400 + }, + { + "epoch": 1.54, + "learning_rate": 4.5920021615779517e-07, + "loss": 1.9738, + "step": 11405 + }, + { + "epoch": 1.54, + "learning_rate": 4.585247230478249e-07, + "loss": 1.9685, + "step": 11410 + }, + { + "epoch": 1.54, + "learning_rate": 4.578492299378546e-07, + "loss": 1.9402, + "step": 11415 + }, + { + "epoch": 1.54, + "learning_rate": 4.5717373682788435e-07, + "loss": 1.9906, + "step": 11420 + }, + { + "epoch": 1.54, + "learning_rate": 4.564982437179141e-07, + "loss": 1.8393, + "step": 11425 + }, + { + "epoch": 1.54, + "learning_rate": 4.558227506079438e-07, + "loss": 1.988, + "step": 11430 + }, + { + "epoch": 1.54, + "learning_rate": 4.5514725749797353e-07, + "loss": 1.9981, + "step": 11435 + }, + { + "epoch": 1.55, + "learning_rate": 4.544717643880032e-07, + "loss": 1.9823, + "step": 11440 + }, + { + "epoch": 1.55, + "learning_rate": 4.537962712780329e-07, + "loss": 1.9773, + "step": 11445 + }, + { + "epoch": 1.55, + "learning_rate": 4.5312077816806265e-07, + "loss": 1.9343, + "step": 11450 + }, + { + "epoch": 1.55, + "learning_rate": 4.524452850580924e-07, + "loss": 1.949, + "step": 11455 + }, + { + "epoch": 1.55, + "learning_rate": 4.517697919481221e-07, + "loss": 1.9503, + "step": 11460 + }, + { + "epoch": 1.55, + "learning_rate": 4.5109429883815183e-07, + "loss": 1.9878, + "step": 11465 + }, + { + "epoch": 1.55, + "learning_rate": 4.504188057281815e-07, + "loss": 2.0098, + "step": 11470 + }, + { + "epoch": 1.55, + "learning_rate": 4.497433126182113e-07, + "loss": 1.9646, + "step": 11475 + }, + { + "epoch": 1.55, + "learning_rate": 4.49067819508241e-07, + "loss": 2.0194, + "step": 11480 + }, + { + "epoch": 1.55, + "learning_rate": 4.4839232639827074e-07, + "loss": 1.9737, + "step": 11485 + }, + { + "epoch": 1.55, + "learning_rate": 4.4771683328830046e-07, + "loss": 1.9999, + "step": 11490 + }, + { + "epoch": 1.55, + "learning_rate": 4.470413401783302e-07, + "loss": 1.9922, + "step": 11495 + }, + { + "epoch": 1.55, + "learning_rate": 4.463658470683599e-07, + "loss": 1.9929, + "step": 11500 + }, + { + "epoch": 1.55, + "learning_rate": 4.456903539583896e-07, + "loss": 2.0408, + "step": 11505 + }, + { + "epoch": 1.55, + "learning_rate": 4.450148608484193e-07, + "loss": 1.9746, + "step": 11510 + }, + { + "epoch": 1.56, + "learning_rate": 4.4433936773844904e-07, + "loss": 2.0071, + "step": 11515 + }, + { + "epoch": 1.56, + "learning_rate": 4.4366387462847877e-07, + "loss": 1.9578, + "step": 11520 + }, + { + "epoch": 1.56, + "learning_rate": 4.429883815185085e-07, + "loss": 1.9269, + "step": 11525 + }, + { + "epoch": 1.56, + "learning_rate": 4.423128884085382e-07, + "loss": 1.9608, + "step": 11530 + }, + { + "epoch": 1.56, + "learning_rate": 4.416373952985679e-07, + "loss": 1.9224, + "step": 11535 + }, + { + "epoch": 1.56, + "learning_rate": 4.409619021885976e-07, + "loss": 1.9858, + "step": 11540 + }, + { + "epoch": 1.56, + "learning_rate": 4.4028640907862735e-07, + "loss": 1.9847, + "step": 11545 + }, + { + "epoch": 1.56, + "learning_rate": 4.396109159686571e-07, + "loss": 2.0205, + "step": 11550 + }, + { + "epoch": 1.56, + "learning_rate": 4.3893542285868685e-07, + "loss": 1.9723, + "step": 11555 + }, + { + "epoch": 1.56, + "learning_rate": 4.382599297487166e-07, + "loss": 2.043, + "step": 11560 + }, + { + "epoch": 1.56, + "learning_rate": 4.375844366387463e-07, + "loss": 1.9467, + "step": 11565 + }, + { + "epoch": 1.56, + "learning_rate": 4.36908943528776e-07, + "loss": 1.9304, + "step": 11570 + }, + { + "epoch": 1.56, + "learning_rate": 4.362334504188057e-07, + "loss": 2.0435, + "step": 11575 + }, + { + "epoch": 1.56, + "learning_rate": 4.3555795730883543e-07, + "loss": 1.9946, + "step": 11580 + }, + { + "epoch": 1.57, + "learning_rate": 4.3488246419886516e-07, + "loss": 1.9891, + "step": 11585 + }, + { + "epoch": 1.57, + "learning_rate": 4.342069710888949e-07, + "loss": 1.9875, + "step": 11590 + }, + { + "epoch": 1.57, + "learning_rate": 4.335314779789246e-07, + "loss": 2.0281, + "step": 11595 + }, + { + "epoch": 1.57, + "learning_rate": 4.328559848689543e-07, + "loss": 1.8857, + "step": 11600 + }, + { + "epoch": 1.57, + "eval_loss": 1.9535282850265503, + "eval_runtime": 165.4539, + "eval_samples_per_second": 3.602, + "eval_steps_per_second": 0.453, + "step": 11600 + }, + { + "epoch": 1.57, + "learning_rate": 4.32180491758984e-07, + "loss": 1.9551, + "step": 11605 + }, + { + "epoch": 1.57, + "learning_rate": 4.3150499864901373e-07, + "loss": 1.9194, + "step": 11610 + }, + { + "epoch": 1.57, + "learning_rate": 4.3082950553904346e-07, + "loss": 1.9471, + "step": 11615 + }, + { + "epoch": 1.57, + "learning_rate": 4.301540124290732e-07, + "loss": 1.9033, + "step": 11620 + }, + { + "epoch": 1.57, + "learning_rate": 4.2947851931910297e-07, + "loss": 1.9445, + "step": 11625 + }, + { + "epoch": 1.57, + "learning_rate": 4.288030262091327e-07, + "loss": 1.9713, + "step": 11630 + }, + { + "epoch": 1.57, + "learning_rate": 4.2812753309916237e-07, + "loss": 1.9252, + "step": 11635 + }, + { + "epoch": 1.57, + "learning_rate": 4.274520399891921e-07, + "loss": 1.88, + "step": 11640 + }, + { + "epoch": 1.57, + "learning_rate": 4.267765468792218e-07, + "loss": 1.9462, + "step": 11645 + }, + { + "epoch": 1.57, + "learning_rate": 4.2610105376925154e-07, + "loss": 2.0004, + "step": 11650 + }, + { + "epoch": 1.57, + "learning_rate": 4.2542556065928127e-07, + "loss": 1.9346, + "step": 11655 + }, + { + "epoch": 1.58, + "learning_rate": 4.24750067549311e-07, + "loss": 2.0424, + "step": 11660 + }, + { + "epoch": 1.58, + "learning_rate": 4.2407457443934067e-07, + "loss": 1.966, + "step": 11665 + }, + { + "epoch": 1.58, + "learning_rate": 4.233990813293704e-07, + "loss": 2.016, + "step": 11670 + }, + { + "epoch": 1.58, + "learning_rate": 4.227235882194001e-07, + "loss": 1.9669, + "step": 11675 + }, + { + "epoch": 1.58, + "learning_rate": 4.2204809510942985e-07, + "loss": 1.9423, + "step": 11680 + }, + { + "epoch": 1.58, + "learning_rate": 4.213726019994596e-07, + "loss": 1.9125, + "step": 11685 + }, + { + "epoch": 1.58, + "learning_rate": 4.206971088894893e-07, + "loss": 1.8883, + "step": 11690 + }, + { + "epoch": 1.58, + "learning_rate": 4.200216157795191e-07, + "loss": 1.9714, + "step": 11695 + }, + { + "epoch": 1.58, + "learning_rate": 4.1934612266954875e-07, + "loss": 1.9444, + "step": 11700 + }, + { + "epoch": 1.58, + "learning_rate": 4.186706295595785e-07, + "loss": 1.9369, + "step": 11705 + }, + { + "epoch": 1.58, + "learning_rate": 4.179951364496082e-07, + "loss": 1.9787, + "step": 11710 + }, + { + "epoch": 1.58, + "learning_rate": 4.1731964333963793e-07, + "loss": 1.8535, + "step": 11715 + }, + { + "epoch": 1.58, + "learning_rate": 4.1664415022966766e-07, + "loss": 1.9828, + "step": 11720 + }, + { + "epoch": 1.58, + "learning_rate": 4.159686571196974e-07, + "loss": 1.9836, + "step": 11725 + }, + { + "epoch": 1.58, + "learning_rate": 4.1529316400972706e-07, + "loss": 1.9408, + "step": 11730 + }, + { + "epoch": 1.59, + "learning_rate": 4.146176708997568e-07, + "loss": 1.9519, + "step": 11735 + }, + { + "epoch": 1.59, + "learning_rate": 4.139421777897865e-07, + "loss": 2.0592, + "step": 11740 + }, + { + "epoch": 1.59, + "learning_rate": 4.1326668467981624e-07, + "loss": 1.9832, + "step": 11745 + }, + { + "epoch": 1.59, + "learning_rate": 4.1259119156984596e-07, + "loss": 1.9843, + "step": 11750 + }, + { + "epoch": 1.59, + "learning_rate": 4.119156984598757e-07, + "loss": 1.947, + "step": 11755 + }, + { + "epoch": 1.59, + "learning_rate": 4.1124020534990536e-07, + "loss": 1.9285, + "step": 11760 + }, + { + "epoch": 1.59, + "learning_rate": 4.105647122399351e-07, + "loss": 1.8873, + "step": 11765 + }, + { + "epoch": 1.59, + "learning_rate": 4.0988921912996487e-07, + "loss": 1.9133, + "step": 11770 + }, + { + "epoch": 1.59, + "learning_rate": 4.092137260199946e-07, + "loss": 2.0237, + "step": 11775 + }, + { + "epoch": 1.59, + "learning_rate": 4.085382329100243e-07, + "loss": 1.9161, + "step": 11780 + }, + { + "epoch": 1.59, + "learning_rate": 4.0786273980005405e-07, + "loss": 1.8488, + "step": 11785 + }, + { + "epoch": 1.59, + "learning_rate": 4.071872466900838e-07, + "loss": 2.0393, + "step": 11790 + }, + { + "epoch": 1.59, + "learning_rate": 4.0651175358011345e-07, + "loss": 1.9536, + "step": 11795 + }, + { + "epoch": 1.59, + "learning_rate": 4.058362604701432e-07, + "loss": 1.97, + "step": 11800 + }, + { + "epoch": 1.59, + "learning_rate": 4.051607673601729e-07, + "loss": 2.003, + "step": 11805 + }, + { + "epoch": 1.6, + "learning_rate": 4.0448527425020263e-07, + "loss": 1.9425, + "step": 11810 + }, + { + "epoch": 1.6, + "learning_rate": 4.0380978114023235e-07, + "loss": 1.9305, + "step": 11815 + }, + { + "epoch": 1.6, + "learning_rate": 4.031342880302621e-07, + "loss": 1.8838, + "step": 11820 + }, + { + "epoch": 1.6, + "learning_rate": 4.0245879492029175e-07, + "loss": 2.0721, + "step": 11825 + }, + { + "epoch": 1.6, + "learning_rate": 4.017833018103215e-07, + "loss": 1.8947, + "step": 11830 + }, + { + "epoch": 1.6, + "learning_rate": 4.011078087003512e-07, + "loss": 1.9371, + "step": 11835 + }, + { + "epoch": 1.6, + "learning_rate": 4.00432315590381e-07, + "loss": 1.9521, + "step": 11840 + }, + { + "epoch": 1.6, + "learning_rate": 3.997568224804107e-07, + "loss": 1.9498, + "step": 11845 + }, + { + "epoch": 1.6, + "learning_rate": 3.9908132937044044e-07, + "loss": 1.9536, + "step": 11850 + }, + { + "epoch": 1.6, + "learning_rate": 3.9840583626047016e-07, + "loss": 1.9916, + "step": 11855 + }, + { + "epoch": 1.6, + "learning_rate": 3.9773034315049984e-07, + "loss": 1.9506, + "step": 11860 + }, + { + "epoch": 1.6, + "learning_rate": 3.9705485004052956e-07, + "loss": 1.9623, + "step": 11865 + }, + { + "epoch": 1.6, + "learning_rate": 3.963793569305593e-07, + "loss": 1.9731, + "step": 11870 + }, + { + "epoch": 1.6, + "learning_rate": 3.95703863820589e-07, + "loss": 1.9713, + "step": 11875 + }, + { + "epoch": 1.6, + "learning_rate": 3.9502837071061874e-07, + "loss": 2.0692, + "step": 11880 + }, + { + "epoch": 1.61, + "learning_rate": 3.9435287760064847e-07, + "loss": 1.9771, + "step": 11885 + }, + { + "epoch": 1.61, + "learning_rate": 3.9367738449067814e-07, + "loss": 2.0068, + "step": 11890 + }, + { + "epoch": 1.61, + "learning_rate": 3.9300189138070787e-07, + "loss": 1.8429, + "step": 11895 + }, + { + "epoch": 1.61, + "learning_rate": 3.923263982707376e-07, + "loss": 2.0443, + "step": 11900 + }, + { + "epoch": 1.61, + "learning_rate": 3.916509051607673e-07, + "loss": 1.9551, + "step": 11905 + }, + { + "epoch": 1.61, + "learning_rate": 3.9097541205079705e-07, + "loss": 1.8964, + "step": 11910 + }, + { + "epoch": 1.61, + "learning_rate": 3.902999189408268e-07, + "loss": 1.9391, + "step": 11915 + }, + { + "epoch": 1.61, + "learning_rate": 3.8962442583085655e-07, + "loss": 2.0496, + "step": 11920 + }, + { + "epoch": 1.61, + "learning_rate": 3.889489327208862e-07, + "loss": 2.0249, + "step": 11925 + }, + { + "epoch": 1.61, + "learning_rate": 3.8827343961091595e-07, + "loss": 2.0152, + "step": 11930 + }, + { + "epoch": 1.61, + "learning_rate": 3.875979465009457e-07, + "loss": 1.9238, + "step": 11935 + }, + { + "epoch": 1.61, + "learning_rate": 3.869224533909754e-07, + "loss": 2.0196, + "step": 11940 + }, + { + "epoch": 1.61, + "learning_rate": 3.8624696028100513e-07, + "loss": 1.9307, + "step": 11945 + }, + { + "epoch": 1.61, + "learning_rate": 3.8557146717103486e-07, + "loss": 1.9859, + "step": 11950 + }, + { + "epoch": 1.62, + "learning_rate": 3.8489597406106453e-07, + "loss": 1.9542, + "step": 11955 + }, + { + "epoch": 1.62, + "learning_rate": 3.8422048095109426e-07, + "loss": 1.8884, + "step": 11960 + }, + { + "epoch": 1.62, + "learning_rate": 3.83544987841124e-07, + "loss": 2.0328, + "step": 11965 + }, + { + "epoch": 1.62, + "learning_rate": 3.828694947311537e-07, + "loss": 1.9707, + "step": 11970 + }, + { + "epoch": 1.62, + "learning_rate": 3.8219400162118344e-07, + "loss": 1.9789, + "step": 11975 + }, + { + "epoch": 1.62, + "learning_rate": 3.8151850851121316e-07, + "loss": 1.964, + "step": 11980 + }, + { + "epoch": 1.62, + "learning_rate": 3.8084301540124294e-07, + "loss": 1.9321, + "step": 11985 + }, + { + "epoch": 1.62, + "learning_rate": 3.801675222912726e-07, + "loss": 1.916, + "step": 11990 + }, + { + "epoch": 1.62, + "learning_rate": 3.7949202918130234e-07, + "loss": 1.8828, + "step": 11995 + }, + { + "epoch": 1.62, + "learning_rate": 3.7881653607133207e-07, + "loss": 1.9949, + "step": 12000 + }, + { + "epoch": 1.62, + "eval_loss": 1.952235221862793, + "eval_runtime": 165.3673, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.454, + "step": 12000 + }, + { + "epoch": 1.62, + "learning_rate": 3.781410429613618e-07, + "loss": 1.8852, + "step": 12005 + }, + { + "epoch": 1.62, + "learning_rate": 3.774655498513915e-07, + "loss": 1.9413, + "step": 12010 + }, + { + "epoch": 1.62, + "learning_rate": 3.7679005674142125e-07, + "loss": 2.0267, + "step": 12015 + }, + { + "epoch": 1.62, + "learning_rate": 3.761145636314509e-07, + "loss": 1.8799, + "step": 12020 + }, + { + "epoch": 1.62, + "learning_rate": 3.7543907052148065e-07, + "loss": 1.9891, + "step": 12025 + }, + { + "epoch": 1.63, + "learning_rate": 3.7476357741151037e-07, + "loss": 1.9889, + "step": 12030 + }, + { + "epoch": 1.63, + "learning_rate": 3.740880843015401e-07, + "loss": 1.984, + "step": 12035 + }, + { + "epoch": 1.63, + "learning_rate": 3.734125911915698e-07, + "loss": 1.8569, + "step": 12040 + }, + { + "epoch": 1.63, + "learning_rate": 3.7273709808159955e-07, + "loss": 1.9308, + "step": 12045 + }, + { + "epoch": 1.63, + "learning_rate": 3.720616049716293e-07, + "loss": 1.9315, + "step": 12050 + }, + { + "epoch": 1.63, + "learning_rate": 3.7138611186165895e-07, + "loss": 1.9194, + "step": 12055 + }, + { + "epoch": 1.63, + "learning_rate": 3.7071061875168873e-07, + "loss": 1.9769, + "step": 12060 + }, + { + "epoch": 1.63, + "learning_rate": 3.7003512564171846e-07, + "loss": 1.9433, + "step": 12065 + }, + { + "epoch": 1.63, + "learning_rate": 3.693596325317482e-07, + "loss": 1.8922, + "step": 12070 + }, + { + "epoch": 1.63, + "learning_rate": 3.686841394217779e-07, + "loss": 2.0268, + "step": 12075 + }, + { + "epoch": 1.63, + "learning_rate": 3.6800864631180763e-07, + "loss": 1.9862, + "step": 12080 + }, + { + "epoch": 1.63, + "learning_rate": 3.673331532018373e-07, + "loss": 2.0022, + "step": 12085 + }, + { + "epoch": 1.63, + "learning_rate": 3.6665766009186703e-07, + "loss": 1.9151, + "step": 12090 + }, + { + "epoch": 1.63, + "learning_rate": 3.6598216698189676e-07, + "loss": 2.0128, + "step": 12095 + }, + { + "epoch": 1.63, + "learning_rate": 3.653066738719265e-07, + "loss": 1.9876, + "step": 12100 + }, + { + "epoch": 1.64, + "learning_rate": 3.646311807619562e-07, + "loss": 2.0628, + "step": 12105 + }, + { + "epoch": 1.64, + "learning_rate": 3.6395568765198594e-07, + "loss": 1.9863, + "step": 12110 + }, + { + "epoch": 1.64, + "learning_rate": 3.6328019454201567e-07, + "loss": 1.9916, + "step": 12115 + }, + { + "epoch": 1.64, + "learning_rate": 3.6260470143204534e-07, + "loss": 2.0298, + "step": 12120 + }, + { + "epoch": 1.64, + "learning_rate": 3.6192920832207506e-07, + "loss": 2.0356, + "step": 12125 + }, + { + "epoch": 1.64, + "learning_rate": 3.612537152121048e-07, + "loss": 1.927, + "step": 12130 + }, + { + "epoch": 1.64, + "learning_rate": 3.6057822210213457e-07, + "loss": 1.9696, + "step": 12135 + }, + { + "epoch": 1.64, + "learning_rate": 3.599027289921643e-07, + "loss": 2.0689, + "step": 12140 + }, + { + "epoch": 1.64, + "learning_rate": 3.59227235882194e-07, + "loss": 1.9127, + "step": 12145 + }, + { + "epoch": 1.64, + "learning_rate": 3.585517427722237e-07, + "loss": 1.9824, + "step": 12150 + }, + { + "epoch": 1.64, + "learning_rate": 3.578762496622534e-07, + "loss": 2.0156, + "step": 12155 + }, + { + "epoch": 1.64, + "learning_rate": 3.5720075655228315e-07, + "loss": 1.9376, + "step": 12160 + }, + { + "epoch": 1.64, + "learning_rate": 3.565252634423129e-07, + "loss": 1.8839, + "step": 12165 + }, + { + "epoch": 1.64, + "learning_rate": 3.558497703323426e-07, + "loss": 1.986, + "step": 12170 + }, + { + "epoch": 1.64, + "learning_rate": 3.5517427722237233e-07, + "loss": 1.9391, + "step": 12175 + }, + { + "epoch": 1.65, + "learning_rate": 3.5449878411240205e-07, + "loss": 1.9136, + "step": 12180 + }, + { + "epoch": 1.65, + "learning_rate": 3.5382329100243173e-07, + "loss": 1.9457, + "step": 12185 + }, + { + "epoch": 1.65, + "learning_rate": 3.5314779789246145e-07, + "loss": 2.0062, + "step": 12190 + }, + { + "epoch": 1.65, + "learning_rate": 3.524723047824912e-07, + "loss": 1.9796, + "step": 12195 + }, + { + "epoch": 1.65, + "learning_rate": 3.517968116725209e-07, + "loss": 2.0649, + "step": 12200 + }, + { + "epoch": 1.65, + "learning_rate": 3.511213185625507e-07, + "loss": 2.0202, + "step": 12205 + }, + { + "epoch": 1.65, + "learning_rate": 3.504458254525804e-07, + "loss": 1.9865, + "step": 12210 + }, + { + "epoch": 1.65, + "learning_rate": 3.497703323426101e-07, + "loss": 1.9317, + "step": 12215 + }, + { + "epoch": 1.65, + "learning_rate": 3.490948392326398e-07, + "loss": 1.9266, + "step": 12220 + }, + { + "epoch": 1.65, + "learning_rate": 3.4841934612266954e-07, + "loss": 1.9678, + "step": 12225 + }, + { + "epoch": 1.65, + "learning_rate": 3.4774385301269926e-07, + "loss": 2.0067, + "step": 12230 + }, + { + "epoch": 1.65, + "learning_rate": 3.47068359902729e-07, + "loss": 1.9964, + "step": 12235 + }, + { + "epoch": 1.65, + "learning_rate": 3.463928667927587e-07, + "loss": 2.0902, + "step": 12240 + }, + { + "epoch": 1.65, + "learning_rate": 3.457173736827884e-07, + "loss": 1.9893, + "step": 12245 + }, + { + "epoch": 1.65, + "learning_rate": 3.450418805728181e-07, + "loss": 1.9439, + "step": 12250 + }, + { + "epoch": 1.66, + "learning_rate": 3.4436638746284784e-07, + "loss": 1.9843, + "step": 12255 + }, + { + "epoch": 1.66, + "learning_rate": 3.4369089435287757e-07, + "loss": 2.0811, + "step": 12260 + }, + { + "epoch": 1.66, + "learning_rate": 3.430154012429073e-07, + "loss": 1.8165, + "step": 12265 + }, + { + "epoch": 1.66, + "learning_rate": 3.42339908132937e-07, + "loss": 2.0074, + "step": 12270 + }, + { + "epoch": 1.66, + "learning_rate": 3.4166441502296675e-07, + "loss": 1.8722, + "step": 12275 + }, + { + "epoch": 1.66, + "learning_rate": 3.4098892191299647e-07, + "loss": 1.9534, + "step": 12280 + }, + { + "epoch": 1.66, + "learning_rate": 3.403134288030262e-07, + "loss": 2.0328, + "step": 12285 + }, + { + "epoch": 1.66, + "learning_rate": 3.396379356930559e-07, + "loss": 2.0205, + "step": 12290 + }, + { + "epoch": 1.66, + "learning_rate": 3.3896244258308565e-07, + "loss": 1.9499, + "step": 12295 + }, + { + "epoch": 1.66, + "learning_rate": 3.382869494731154e-07, + "loss": 1.9113, + "step": 12300 + }, + { + "epoch": 1.66, + "learning_rate": 3.376114563631451e-07, + "loss": 1.9177, + "step": 12305 + }, + { + "epoch": 1.66, + "learning_rate": 3.369359632531748e-07, + "loss": 1.8988, + "step": 12310 + }, + { + "epoch": 1.66, + "learning_rate": 3.362604701432045e-07, + "loss": 1.9675, + "step": 12315 + }, + { + "epoch": 1.66, + "learning_rate": 3.3558497703323423e-07, + "loss": 1.9732, + "step": 12320 + }, + { + "epoch": 1.67, + "learning_rate": 3.3490948392326396e-07, + "loss": 1.987, + "step": 12325 + }, + { + "epoch": 1.67, + "learning_rate": 3.342339908132937e-07, + "loss": 1.9473, + "step": 12330 + }, + { + "epoch": 1.67, + "learning_rate": 3.335584977033234e-07, + "loss": 1.9229, + "step": 12335 + }, + { + "epoch": 1.67, + "learning_rate": 3.3288300459335314e-07, + "loss": 1.9963, + "step": 12340 + }, + { + "epoch": 1.67, + "learning_rate": 3.322075114833828e-07, + "loss": 1.8348, + "step": 12345 + }, + { + "epoch": 1.67, + "learning_rate": 3.315320183734126e-07, + "loss": 1.9457, + "step": 12350 + }, + { + "epoch": 1.67, + "learning_rate": 3.308565252634423e-07, + "loss": 1.9232, + "step": 12355 + }, + { + "epoch": 1.67, + "learning_rate": 3.3018103215347204e-07, + "loss": 1.9743, + "step": 12360 + }, + { + "epoch": 1.67, + "learning_rate": 3.2950553904350177e-07, + "loss": 1.9761, + "step": 12365 + }, + { + "epoch": 1.67, + "learning_rate": 3.288300459335315e-07, + "loss": 1.9756, + "step": 12370 + }, + { + "epoch": 1.67, + "learning_rate": 3.2815455282356117e-07, + "loss": 1.8987, + "step": 12375 + }, + { + "epoch": 1.67, + "learning_rate": 3.274790597135909e-07, + "loss": 2.0811, + "step": 12380 + }, + { + "epoch": 1.67, + "learning_rate": 3.268035666036206e-07, + "loss": 1.9758, + "step": 12385 + }, + { + "epoch": 1.67, + "learning_rate": 3.2612807349365035e-07, + "loss": 1.9949, + "step": 12390 + }, + { + "epoch": 1.67, + "learning_rate": 3.2545258038368007e-07, + "loss": 1.8838, + "step": 12395 + }, + { + "epoch": 1.68, + "learning_rate": 3.247770872737098e-07, + "loss": 1.9292, + "step": 12400 + }, + { + "epoch": 1.68, + "eval_loss": 1.9511464834213257, + "eval_runtime": 165.4983, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.453, + "step": 12400 + }, + { + "epoch": 1.68, + "learning_rate": 3.241015941637395e-07, + "loss": 1.9073, + "step": 12405 + }, + { + "epoch": 1.68, + "learning_rate": 3.234261010537692e-07, + "loss": 1.9725, + "step": 12410 + }, + { + "epoch": 1.68, + "learning_rate": 3.227506079437989e-07, + "loss": 1.8673, + "step": 12415 + }, + { + "epoch": 1.68, + "learning_rate": 3.2207511483382865e-07, + "loss": 2.0299, + "step": 12420 + }, + { + "epoch": 1.68, + "learning_rate": 3.2139962172385843e-07, + "loss": 1.9586, + "step": 12425 + }, + { + "epoch": 1.68, + "learning_rate": 3.2072412861388816e-07, + "loss": 1.9822, + "step": 12430 + }, + { + "epoch": 1.68, + "learning_rate": 3.200486355039179e-07, + "loss": 2.0641, + "step": 12435 + }, + { + "epoch": 1.68, + "learning_rate": 3.1937314239394756e-07, + "loss": 2.0372, + "step": 12440 + }, + { + "epoch": 1.68, + "learning_rate": 3.186976492839773e-07, + "loss": 1.9135, + "step": 12445 + }, + { + "epoch": 1.68, + "learning_rate": 3.18022156174007e-07, + "loss": 1.9977, + "step": 12450 + }, + { + "epoch": 1.68, + "learning_rate": 3.1734666306403673e-07, + "loss": 1.9981, + "step": 12455 + }, + { + "epoch": 1.68, + "learning_rate": 3.1667116995406646e-07, + "loss": 2.0354, + "step": 12460 + }, + { + "epoch": 1.68, + "learning_rate": 3.159956768440962e-07, + "loss": 1.9714, + "step": 12465 + }, + { + "epoch": 1.68, + "learning_rate": 3.153201837341259e-07, + "loss": 2.0264, + "step": 12470 + }, + { + "epoch": 1.69, + "learning_rate": 3.146446906241556e-07, + "loss": 1.9805, + "step": 12475 + }, + { + "epoch": 1.69, + "learning_rate": 3.139691975141853e-07, + "loss": 2.0613, + "step": 12480 + }, + { + "epoch": 1.69, + "learning_rate": 3.1329370440421504e-07, + "loss": 1.8951, + "step": 12485 + }, + { + "epoch": 1.69, + "learning_rate": 3.1261821129424477e-07, + "loss": 1.9624, + "step": 12490 + }, + { + "epoch": 1.69, + "learning_rate": 3.1194271818427454e-07, + "loss": 1.9934, + "step": 12495 + }, + { + "epoch": 1.69, + "learning_rate": 3.1126722507430427e-07, + "loss": 1.9149, + "step": 12500 + }, + { + "epoch": 1.69, + "learning_rate": 3.1059173196433394e-07, + "loss": 1.9528, + "step": 12505 + }, + { + "epoch": 1.69, + "learning_rate": 3.0991623885436367e-07, + "loss": 1.9816, + "step": 12510 + }, + { + "epoch": 1.69, + "learning_rate": 3.092407457443934e-07, + "loss": 1.9338, + "step": 12515 + }, + { + "epoch": 1.69, + "learning_rate": 3.085652526344231e-07, + "loss": 1.9294, + "step": 12520 + }, + { + "epoch": 1.69, + "learning_rate": 3.0788975952445285e-07, + "loss": 1.9416, + "step": 12525 + }, + { + "epoch": 1.69, + "learning_rate": 3.072142664144826e-07, + "loss": 2.0216, + "step": 12530 + }, + { + "epoch": 1.69, + "learning_rate": 3.065387733045123e-07, + "loss": 1.9174, + "step": 12535 + }, + { + "epoch": 1.69, + "learning_rate": 3.05863280194542e-07, + "loss": 1.8646, + "step": 12540 + }, + { + "epoch": 1.69, + "learning_rate": 3.051877870845717e-07, + "loss": 2.0483, + "step": 12545 + }, + { + "epoch": 1.7, + "learning_rate": 3.0451229397460143e-07, + "loss": 1.9749, + "step": 12550 + }, + { + "epoch": 1.7, + "learning_rate": 3.0383680086463115e-07, + "loss": 2.0014, + "step": 12555 + }, + { + "epoch": 1.7, + "learning_rate": 3.031613077546609e-07, + "loss": 1.9313, + "step": 12560 + }, + { + "epoch": 1.7, + "learning_rate": 3.024858146446906e-07, + "loss": 1.8786, + "step": 12565 + }, + { + "epoch": 1.7, + "learning_rate": 3.0181032153472033e-07, + "loss": 1.8782, + "step": 12570 + }, + { + "epoch": 1.7, + "learning_rate": 3.0113482842475006e-07, + "loss": 1.9038, + "step": 12575 + }, + { + "epoch": 1.7, + "learning_rate": 3.004593353147798e-07, + "loss": 1.9865, + "step": 12580 + }, + { + "epoch": 1.7, + "learning_rate": 2.997838422048095e-07, + "loss": 1.9386, + "step": 12585 + }, + { + "epoch": 1.7, + "learning_rate": 2.9910834909483924e-07, + "loss": 1.884, + "step": 12590 + }, + { + "epoch": 1.7, + "learning_rate": 2.9843285598486896e-07, + "loss": 1.9116, + "step": 12595 + }, + { + "epoch": 1.7, + "learning_rate": 2.977573628748987e-07, + "loss": 1.9599, + "step": 12600 + }, + { + "epoch": 1.7, + "learning_rate": 2.9708186976492836e-07, + "loss": 1.966, + "step": 12605 + }, + { + "epoch": 1.7, + "learning_rate": 2.964063766549581e-07, + "loss": 1.9358, + "step": 12610 + }, + { + "epoch": 1.7, + "learning_rate": 2.957308835449878e-07, + "loss": 2.0271, + "step": 12615 + }, + { + "epoch": 1.7, + "learning_rate": 2.9505539043501754e-07, + "loss": 1.9708, + "step": 12620 + }, + { + "epoch": 1.71, + "learning_rate": 2.9437989732504727e-07, + "loss": 1.8144, + "step": 12625 + }, + { + "epoch": 1.71, + "learning_rate": 2.93704404215077e-07, + "loss": 2.0278, + "step": 12630 + }, + { + "epoch": 1.71, + "learning_rate": 2.9302891110510667e-07, + "loss": 1.9605, + "step": 12635 + }, + { + "epoch": 1.71, + "learning_rate": 2.923534179951364e-07, + "loss": 1.9958, + "step": 12640 + }, + { + "epoch": 1.71, + "learning_rate": 2.916779248851662e-07, + "loss": 1.9269, + "step": 12645 + }, + { + "epoch": 1.71, + "learning_rate": 2.910024317751959e-07, + "loss": 1.9597, + "step": 12650 + }, + { + "epoch": 1.71, + "learning_rate": 2.903269386652256e-07, + "loss": 1.9553, + "step": 12655 + }, + { + "epoch": 1.71, + "learning_rate": 2.8965144555525535e-07, + "loss": 1.9797, + "step": 12660 + }, + { + "epoch": 1.71, + "learning_rate": 2.889759524452851e-07, + "loss": 1.9698, + "step": 12665 + }, + { + "epoch": 1.71, + "learning_rate": 2.8830045933531475e-07, + "loss": 2.0515, + "step": 12670 + }, + { + "epoch": 1.71, + "learning_rate": 2.876249662253445e-07, + "loss": 1.9132, + "step": 12675 + }, + { + "epoch": 1.71, + "learning_rate": 2.869494731153742e-07, + "loss": 1.9046, + "step": 12680 + }, + { + "epoch": 1.71, + "learning_rate": 2.8627398000540393e-07, + "loss": 1.9502, + "step": 12685 + }, + { + "epoch": 1.71, + "learning_rate": 2.8559848689543366e-07, + "loss": 2.0462, + "step": 12690 + }, + { + "epoch": 1.72, + "learning_rate": 2.849229937854634e-07, + "loss": 2.0225, + "step": 12695 + }, + { + "epoch": 1.72, + "learning_rate": 2.8424750067549306e-07, + "loss": 1.9613, + "step": 12700 + }, + { + "epoch": 1.72, + "learning_rate": 2.835720075655228e-07, + "loss": 1.9458, + "step": 12705 + }, + { + "epoch": 1.72, + "learning_rate": 2.828965144555525e-07, + "loss": 1.9629, + "step": 12710 + }, + { + "epoch": 1.72, + "learning_rate": 2.822210213455823e-07, + "loss": 1.9982, + "step": 12715 + }, + { + "epoch": 1.72, + "learning_rate": 2.81545528235612e-07, + "loss": 1.9519, + "step": 12720 + }, + { + "epoch": 1.72, + "learning_rate": 2.8087003512564174e-07, + "loss": 2.0149, + "step": 12725 + }, + { + "epoch": 1.72, + "learning_rate": 2.8019454201567147e-07, + "loss": 1.9178, + "step": 12730 + }, + { + "epoch": 1.72, + "learning_rate": 2.7951904890570114e-07, + "loss": 1.9672, + "step": 12735 + }, + { + "epoch": 1.72, + "learning_rate": 2.7884355579573087e-07, + "loss": 1.9545, + "step": 12740 + }, + { + "epoch": 1.72, + "learning_rate": 2.781680626857606e-07, + "loss": 1.9199, + "step": 12745 + }, + { + "epoch": 1.72, + "learning_rate": 2.774925695757903e-07, + "loss": 1.962, + "step": 12750 + }, + { + "epoch": 1.72, + "learning_rate": 2.7681707646582005e-07, + "loss": 1.9126, + "step": 12755 + }, + { + "epoch": 1.72, + "learning_rate": 2.7614158335584977e-07, + "loss": 1.9452, + "step": 12760 + }, + { + "epoch": 1.72, + "learning_rate": 2.7546609024587945e-07, + "loss": 2.0155, + "step": 12765 + }, + { + "epoch": 1.73, + "learning_rate": 2.7479059713590917e-07, + "loss": 1.9335, + "step": 12770 + }, + { + "epoch": 1.73, + "learning_rate": 2.741151040259389e-07, + "loss": 1.8992, + "step": 12775 + }, + { + "epoch": 1.73, + "learning_rate": 2.734396109159686e-07, + "loss": 1.9687, + "step": 12780 + }, + { + "epoch": 1.73, + "learning_rate": 2.7276411780599835e-07, + "loss": 1.9148, + "step": 12785 + }, + { + "epoch": 1.73, + "learning_rate": 2.7208862469602813e-07, + "loss": 2.0001, + "step": 12790 + }, + { + "epoch": 1.73, + "learning_rate": 2.7141313158605786e-07, + "loss": 1.9786, + "step": 12795 + }, + { + "epoch": 1.73, + "learning_rate": 2.7073763847608753e-07, + "loss": 1.937, + "step": 12800 + }, + { + "epoch": 1.73, + "eval_loss": 1.95003080368042, + "eval_runtime": 165.3799, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.454, + "step": 12800 + }, + { + "epoch": 1.73, + "learning_rate": 2.7006214536611726e-07, + "loss": 1.9034, + "step": 12805 + }, + { + "epoch": 1.73, + "learning_rate": 2.69386652256147e-07, + "loss": 2.009, + "step": 12810 + }, + { + "epoch": 1.73, + "learning_rate": 2.687111591461767e-07, + "loss": 1.957, + "step": 12815 + }, + { + "epoch": 1.73, + "learning_rate": 2.6803566603620643e-07, + "loss": 2.0366, + "step": 12820 + }, + { + "epoch": 1.73, + "learning_rate": 2.6736017292623616e-07, + "loss": 1.9522, + "step": 12825 + }, + { + "epoch": 1.73, + "learning_rate": 2.6668467981626583e-07, + "loss": 1.9014, + "step": 12830 + }, + { + "epoch": 1.73, + "learning_rate": 2.6600918670629556e-07, + "loss": 1.8543, + "step": 12835 + }, + { + "epoch": 1.73, + "learning_rate": 2.653336935963253e-07, + "loss": 1.9508, + "step": 12840 + }, + { + "epoch": 1.74, + "learning_rate": 2.64658200486355e-07, + "loss": 1.9476, + "step": 12845 + }, + { + "epoch": 1.74, + "learning_rate": 2.6398270737638474e-07, + "loss": 1.9962, + "step": 12850 + }, + { + "epoch": 1.74, + "learning_rate": 2.6330721426641447e-07, + "loss": 1.8131, + "step": 12855 + }, + { + "epoch": 1.74, + "learning_rate": 2.6263172115644424e-07, + "loss": 1.8921, + "step": 12860 + }, + { + "epoch": 1.74, + "learning_rate": 2.619562280464739e-07, + "loss": 1.9959, + "step": 12865 + }, + { + "epoch": 1.74, + "learning_rate": 2.6128073493650364e-07, + "loss": 1.9768, + "step": 12870 + }, + { + "epoch": 1.74, + "learning_rate": 2.6060524182653337e-07, + "loss": 1.9248, + "step": 12875 + }, + { + "epoch": 1.74, + "learning_rate": 2.599297487165631e-07, + "loss": 1.9049, + "step": 12880 + }, + { + "epoch": 1.74, + "learning_rate": 2.592542556065928e-07, + "loss": 1.9095, + "step": 12885 + }, + { + "epoch": 1.74, + "learning_rate": 2.5857876249662255e-07, + "loss": 1.8965, + "step": 12890 + }, + { + "epoch": 1.74, + "learning_rate": 2.579032693866522e-07, + "loss": 1.9665, + "step": 12895 + }, + { + "epoch": 1.74, + "learning_rate": 2.5722777627668195e-07, + "loss": 1.9266, + "step": 12900 + }, + { + "epoch": 1.74, + "learning_rate": 2.565522831667117e-07, + "loss": 2.0833, + "step": 12905 + }, + { + "epoch": 1.74, + "learning_rate": 2.558767900567414e-07, + "loss": 1.9587, + "step": 12910 + }, + { + "epoch": 1.74, + "learning_rate": 2.5520129694677113e-07, + "loss": 2.1045, + "step": 12915 + }, + { + "epoch": 1.75, + "learning_rate": 2.5452580383680085e-07, + "loss": 2.0159, + "step": 12920 + }, + { + "epoch": 1.75, + "learning_rate": 2.5385031072683053e-07, + "loss": 1.9911, + "step": 12925 + }, + { + "epoch": 1.75, + "learning_rate": 2.5317481761686025e-07, + "loss": 1.9373, + "step": 12930 + }, + { + "epoch": 1.75, + "learning_rate": 2.5249932450689003e-07, + "loss": 2.012, + "step": 12935 + }, + { + "epoch": 1.75, + "learning_rate": 2.5182383139691976e-07, + "loss": 1.9334, + "step": 12940 + }, + { + "epoch": 1.75, + "learning_rate": 2.511483382869495e-07, + "loss": 2.0098, + "step": 12945 + }, + { + "epoch": 1.75, + "learning_rate": 2.504728451769792e-07, + "loss": 1.8779, + "step": 12950 + }, + { + "epoch": 1.75, + "learning_rate": 2.4979735206700894e-07, + "loss": 1.8818, + "step": 12955 + }, + { + "epoch": 1.75, + "learning_rate": 2.491218589570386e-07, + "loss": 1.9791, + "step": 12960 + }, + { + "epoch": 1.75, + "learning_rate": 2.4844636584706834e-07, + "loss": 1.9753, + "step": 12965 + }, + { + "epoch": 1.75, + "learning_rate": 2.4777087273709806e-07, + "loss": 1.9361, + "step": 12970 + }, + { + "epoch": 1.75, + "learning_rate": 2.470953796271278e-07, + "loss": 1.8509, + "step": 12975 + }, + { + "epoch": 1.75, + "learning_rate": 2.464198865171575e-07, + "loss": 2.0183, + "step": 12980 + }, + { + "epoch": 1.75, + "learning_rate": 2.4574439340718724e-07, + "loss": 1.9777, + "step": 12985 + }, + { + "epoch": 1.75, + "learning_rate": 2.4506890029721697e-07, + "loss": 1.9786, + "step": 12990 + }, + { + "epoch": 1.76, + "learning_rate": 2.443934071872467e-07, + "loss": 1.9021, + "step": 12995 + }, + { + "epoch": 1.76, + "learning_rate": 2.437179140772764e-07, + "loss": 2.0193, + "step": 13000 + }, + { + "epoch": 1.76, + "learning_rate": 2.4304242096730615e-07, + "loss": 1.8966, + "step": 13005 + }, + { + "epoch": 1.76, + "learning_rate": 2.423669278573358e-07, + "loss": 2.0073, + "step": 13010 + }, + { + "epoch": 1.76, + "learning_rate": 2.4169143474736555e-07, + "loss": 1.9552, + "step": 13015 + }, + { + "epoch": 1.76, + "learning_rate": 2.410159416373953e-07, + "loss": 1.9251, + "step": 13020 + }, + { + "epoch": 1.76, + "learning_rate": 2.40340448527425e-07, + "loss": 2.0429, + "step": 13025 + }, + { + "epoch": 1.76, + "learning_rate": 2.3966495541745473e-07, + "loss": 2.0433, + "step": 13030 + }, + { + "epoch": 1.76, + "learning_rate": 2.3898946230748445e-07, + "loss": 1.9169, + "step": 13035 + }, + { + "epoch": 1.76, + "learning_rate": 2.3831396919751418e-07, + "loss": 1.9662, + "step": 13040 + }, + { + "epoch": 1.76, + "learning_rate": 2.376384760875439e-07, + "loss": 1.9404, + "step": 13045 + }, + { + "epoch": 1.76, + "learning_rate": 2.369629829775736e-07, + "loss": 1.9799, + "step": 13050 + }, + { + "epoch": 1.76, + "learning_rate": 2.3628748986760333e-07, + "loss": 1.8214, + "step": 13055 + }, + { + "epoch": 1.76, + "learning_rate": 2.3561199675763308e-07, + "loss": 1.9932, + "step": 13060 + }, + { + "epoch": 1.77, + "learning_rate": 2.3493650364766278e-07, + "loss": 1.9733, + "step": 13065 + }, + { + "epoch": 1.77, + "learning_rate": 2.342610105376925e-07, + "loss": 1.9566, + "step": 13070 + }, + { + "epoch": 1.77, + "learning_rate": 2.3358551742772224e-07, + "loss": 1.897, + "step": 13075 + }, + { + "epoch": 1.77, + "learning_rate": 2.3291002431775194e-07, + "loss": 1.9646, + "step": 13080 + }, + { + "epoch": 1.77, + "learning_rate": 2.3223453120778166e-07, + "loss": 2.0013, + "step": 13085 + }, + { + "epoch": 1.77, + "learning_rate": 2.315590380978114e-07, + "loss": 1.8838, + "step": 13090 + }, + { + "epoch": 1.77, + "learning_rate": 2.308835449878411e-07, + "loss": 2.0112, + "step": 13095 + }, + { + "epoch": 1.77, + "learning_rate": 2.3020805187787084e-07, + "loss": 1.9287, + "step": 13100 + }, + { + "epoch": 1.77, + "learning_rate": 2.2953255876790057e-07, + "loss": 2.0699, + "step": 13105 + }, + { + "epoch": 1.77, + "learning_rate": 2.288570656579303e-07, + "loss": 1.8843, + "step": 13110 + }, + { + "epoch": 1.77, + "learning_rate": 2.2818157254796e-07, + "loss": 1.9018, + "step": 13115 + }, + { + "epoch": 1.77, + "learning_rate": 2.2750607943798972e-07, + "loss": 1.9611, + "step": 13120 + }, + { + "epoch": 1.77, + "learning_rate": 2.2683058632801945e-07, + "loss": 1.9831, + "step": 13125 + }, + { + "epoch": 1.77, + "learning_rate": 2.2615509321804915e-07, + "loss": 1.9069, + "step": 13130 + }, + { + "epoch": 1.77, + "learning_rate": 2.254796001080789e-07, + "loss": 1.9747, + "step": 13135 + }, + { + "epoch": 1.78, + "learning_rate": 2.2480410699810863e-07, + "loss": 1.9792, + "step": 13140 + }, + { + "epoch": 1.78, + "learning_rate": 2.2412861388813833e-07, + "loss": 2.0183, + "step": 13145 + }, + { + "epoch": 1.78, + "learning_rate": 2.2345312077816805e-07, + "loss": 1.9591, + "step": 13150 + }, + { + "epoch": 1.78, + "learning_rate": 2.2277762766819778e-07, + "loss": 1.9085, + "step": 13155 + }, + { + "epoch": 1.78, + "learning_rate": 2.2210213455822748e-07, + "loss": 2.0675, + "step": 13160 + }, + { + "epoch": 1.78, + "learning_rate": 2.214266414482572e-07, + "loss": 1.9204, + "step": 13165 + }, + { + "epoch": 1.78, + "learning_rate": 2.2075114833828696e-07, + "loss": 2.035, + "step": 13170 + }, + { + "epoch": 1.78, + "learning_rate": 2.2007565522831668e-07, + "loss": 2.0147, + "step": 13175 + }, + { + "epoch": 1.78, + "learning_rate": 2.1940016211834638e-07, + "loss": 1.917, + "step": 13180 + }, + { + "epoch": 1.78, + "learning_rate": 2.187246690083761e-07, + "loss": 1.9055, + "step": 13185 + }, + { + "epoch": 1.78, + "learning_rate": 2.1804917589840584e-07, + "loss": 1.9916, + "step": 13190 + }, + { + "epoch": 1.78, + "learning_rate": 2.1737368278843553e-07, + "loss": 1.9845, + "step": 13195 + }, + { + "epoch": 1.78, + "learning_rate": 2.1669818967846526e-07, + "loss": 2.0573, + "step": 13200 + }, + { + "epoch": 1.78, + "eval_loss": 1.9494786262512207, + "eval_runtime": 165.1665, + "eval_samples_per_second": 3.608, + "eval_steps_per_second": 0.454, + "step": 13200 + }, + { + "epoch": 1.78, + "learning_rate": 2.1602269656849501e-07, + "loss": 2.0237, + "step": 13205 + }, + { + "epoch": 1.78, + "learning_rate": 2.1534720345852471e-07, + "loss": 1.9262, + "step": 13210 + }, + { + "epoch": 1.79, + "learning_rate": 2.1467171034855444e-07, + "loss": 1.9819, + "step": 13215 + }, + { + "epoch": 1.79, + "learning_rate": 2.1399621723858417e-07, + "loss": 1.9537, + "step": 13220 + }, + { + "epoch": 1.79, + "learning_rate": 2.1332072412861387e-07, + "loss": 1.8926, + "step": 13225 + }, + { + "epoch": 1.79, + "learning_rate": 2.126452310186436e-07, + "loss": 1.9364, + "step": 13230 + }, + { + "epoch": 1.79, + "learning_rate": 2.1196973790867332e-07, + "loss": 1.9544, + "step": 13235 + }, + { + "epoch": 1.79, + "learning_rate": 2.1129424479870302e-07, + "loss": 1.9288, + "step": 13240 + }, + { + "epoch": 1.79, + "learning_rate": 2.1061875168873277e-07, + "loss": 1.9206, + "step": 13245 + }, + { + "epoch": 1.79, + "learning_rate": 2.099432585787625e-07, + "loss": 1.9185, + "step": 13250 + }, + { + "epoch": 1.79, + "learning_rate": 2.0926776546879222e-07, + "loss": 1.9306, + "step": 13255 + }, + { + "epoch": 1.79, + "learning_rate": 2.0859227235882192e-07, + "loss": 1.9818, + "step": 13260 + }, + { + "epoch": 1.79, + "learning_rate": 2.0791677924885165e-07, + "loss": 1.9057, + "step": 13265 + }, + { + "epoch": 1.79, + "learning_rate": 2.0724128613888138e-07, + "loss": 1.9167, + "step": 13270 + }, + { + "epoch": 1.79, + "learning_rate": 2.0656579302891108e-07, + "loss": 1.9288, + "step": 13275 + }, + { + "epoch": 1.79, + "learning_rate": 2.0589029991894083e-07, + "loss": 2.0249, + "step": 13280 + }, + { + "epoch": 1.79, + "learning_rate": 2.0521480680897055e-07, + "loss": 2.0134, + "step": 13285 + }, + { + "epoch": 1.8, + "learning_rate": 2.0453931369900025e-07, + "loss": 2.0235, + "step": 13290 + }, + { + "epoch": 1.8, + "learning_rate": 2.0386382058902998e-07, + "loss": 1.8421, + "step": 13295 + }, + { + "epoch": 1.8, + "learning_rate": 2.031883274790597e-07, + "loss": 1.9947, + "step": 13300 + }, + { + "epoch": 1.8, + "learning_rate": 2.025128343690894e-07, + "loss": 2.0185, + "step": 13305 + }, + { + "epoch": 1.8, + "learning_rate": 2.0183734125911913e-07, + "loss": 1.955, + "step": 13310 + }, + { + "epoch": 1.8, + "learning_rate": 2.0116184814914889e-07, + "loss": 2.025, + "step": 13315 + }, + { + "epoch": 1.8, + "learning_rate": 2.004863550391786e-07, + "loss": 2.0244, + "step": 13320 + }, + { + "epoch": 1.8, + "learning_rate": 1.998108619292083e-07, + "loss": 1.9509, + "step": 13325 + }, + { + "epoch": 1.8, + "learning_rate": 1.9913536881923804e-07, + "loss": 2.064, + "step": 13330 + }, + { + "epoch": 1.8, + "learning_rate": 1.9845987570926776e-07, + "loss": 1.8872, + "step": 13335 + }, + { + "epoch": 1.8, + "learning_rate": 1.9778438259929746e-07, + "loss": 1.9271, + "step": 13340 + }, + { + "epoch": 1.8, + "learning_rate": 1.971088894893272e-07, + "loss": 1.9738, + "step": 13345 + }, + { + "epoch": 1.8, + "learning_rate": 1.9643339637935692e-07, + "loss": 1.9089, + "step": 13350 + }, + { + "epoch": 1.8, + "learning_rate": 1.9575790326938664e-07, + "loss": 1.9611, + "step": 13355 + }, + { + "epoch": 1.8, + "learning_rate": 1.9508241015941637e-07, + "loss": 1.8635, + "step": 13360 + }, + { + "epoch": 1.81, + "learning_rate": 1.944069170494461e-07, + "loss": 2.0379, + "step": 13365 + }, + { + "epoch": 1.81, + "learning_rate": 1.937314239394758e-07, + "loss": 1.9776, + "step": 13370 + }, + { + "epoch": 1.81, + "learning_rate": 1.9305593082950552e-07, + "loss": 1.8788, + "step": 13375 + }, + { + "epoch": 1.81, + "learning_rate": 1.9238043771953525e-07, + "loss": 1.9518, + "step": 13380 + }, + { + "epoch": 1.81, + "learning_rate": 1.9170494460956497e-07, + "loss": 2.0224, + "step": 13385 + }, + { + "epoch": 1.81, + "learning_rate": 1.910294514995947e-07, + "loss": 1.8859, + "step": 13390 + }, + { + "epoch": 1.81, + "learning_rate": 1.9035395838962443e-07, + "loss": 1.9991, + "step": 13395 + }, + { + "epoch": 1.81, + "learning_rate": 1.8967846527965415e-07, + "loss": 1.9029, + "step": 13400 + }, + { + "epoch": 1.81, + "learning_rate": 1.8900297216968385e-07, + "loss": 1.9768, + "step": 13405 + }, + { + "epoch": 1.81, + "learning_rate": 1.8832747905971358e-07, + "loss": 1.8818, + "step": 13410 + }, + { + "epoch": 1.81, + "learning_rate": 1.876519859497433e-07, + "loss": 1.8601, + "step": 13415 + }, + { + "epoch": 1.81, + "learning_rate": 1.86976492839773e-07, + "loss": 1.9527, + "step": 13420 + }, + { + "epoch": 1.81, + "learning_rate": 1.8630099972980276e-07, + "loss": 1.9236, + "step": 13425 + }, + { + "epoch": 1.81, + "learning_rate": 1.8562550661983248e-07, + "loss": 1.8954, + "step": 13430 + }, + { + "epoch": 1.81, + "learning_rate": 1.8495001350986218e-07, + "loss": 2.0441, + "step": 13435 + }, + { + "epoch": 1.82, + "learning_rate": 1.842745203998919e-07, + "loss": 1.9821, + "step": 13440 + }, + { + "epoch": 1.82, + "learning_rate": 1.8359902728992164e-07, + "loss": 1.958, + "step": 13445 + }, + { + "epoch": 1.82, + "learning_rate": 1.8292353417995134e-07, + "loss": 1.8381, + "step": 13450 + }, + { + "epoch": 1.82, + "learning_rate": 1.8224804106998106e-07, + "loss": 1.9425, + "step": 13455 + }, + { + "epoch": 1.82, + "learning_rate": 1.8157254796001082e-07, + "loss": 1.9096, + "step": 13460 + }, + { + "epoch": 1.82, + "learning_rate": 1.8089705485004054e-07, + "loss": 1.9968, + "step": 13465 + }, + { + "epoch": 1.82, + "learning_rate": 1.8022156174007024e-07, + "loss": 1.8728, + "step": 13470 + }, + { + "epoch": 1.82, + "learning_rate": 1.7954606863009997e-07, + "loss": 1.9997, + "step": 13475 + }, + { + "epoch": 1.82, + "learning_rate": 1.788705755201297e-07, + "loss": 1.931, + "step": 13480 + }, + { + "epoch": 1.82, + "learning_rate": 1.781950824101594e-07, + "loss": 1.9951, + "step": 13485 + }, + { + "epoch": 1.82, + "learning_rate": 1.7751958930018912e-07, + "loss": 1.9485, + "step": 13490 + }, + { + "epoch": 1.82, + "learning_rate": 1.7684409619021885e-07, + "loss": 2.0155, + "step": 13495 + }, + { + "epoch": 1.82, + "learning_rate": 1.7616860308024857e-07, + "loss": 1.9448, + "step": 13500 + }, + { + "epoch": 1.82, + "learning_rate": 1.754931099702783e-07, + "loss": 2.0389, + "step": 13505 + }, + { + "epoch": 1.83, + "learning_rate": 1.7481761686030803e-07, + "loss": 1.9617, + "step": 13510 + }, + { + "epoch": 1.83, + "learning_rate": 1.7414212375033773e-07, + "loss": 2.0045, + "step": 13515 + }, + { + "epoch": 1.83, + "learning_rate": 1.7346663064036745e-07, + "loss": 1.8886, + "step": 13520 + }, + { + "epoch": 1.83, + "learning_rate": 1.7279113753039718e-07, + "loss": 2.0266, + "step": 13525 + }, + { + "epoch": 1.83, + "learning_rate": 1.721156444204269e-07, + "loss": 2.0236, + "step": 13530 + }, + { + "epoch": 1.83, + "learning_rate": 1.7144015131045663e-07, + "loss": 1.9988, + "step": 13535 + }, + { + "epoch": 1.83, + "learning_rate": 1.7076465820048636e-07, + "loss": 1.8733, + "step": 13540 + }, + { + "epoch": 1.83, + "learning_rate": 1.7008916509051608e-07, + "loss": 1.886, + "step": 13545 + }, + { + "epoch": 1.83, + "learning_rate": 1.6941367198054578e-07, + "loss": 2.0076, + "step": 13550 + }, + { + "epoch": 1.83, + "learning_rate": 1.687381788705755e-07, + "loss": 1.9322, + "step": 13555 + }, + { + "epoch": 1.83, + "learning_rate": 1.6806268576060524e-07, + "loss": 1.955, + "step": 13560 + }, + { + "epoch": 1.83, + "learning_rate": 1.6738719265063494e-07, + "loss": 1.9884, + "step": 13565 + }, + { + "epoch": 1.83, + "learning_rate": 1.667116995406647e-07, + "loss": 1.9705, + "step": 13570 + }, + { + "epoch": 1.83, + "learning_rate": 1.6603620643069441e-07, + "loss": 2.0089, + "step": 13575 + }, + { + "epoch": 1.83, + "learning_rate": 1.6536071332072411e-07, + "loss": 2.0441, + "step": 13580 + }, + { + "epoch": 1.84, + "learning_rate": 1.6468522021075384e-07, + "loss": 1.9654, + "step": 13585 + }, + { + "epoch": 1.84, + "learning_rate": 1.6400972710078357e-07, + "loss": 1.919, + "step": 13590 + }, + { + "epoch": 1.84, + "learning_rate": 1.633342339908133e-07, + "loss": 2.0188, + "step": 13595 + }, + { + "epoch": 1.84, + "learning_rate": 1.62658740880843e-07, + "loss": 2.0181, + "step": 13600 + }, + { + "epoch": 1.84, + "eval_loss": 1.9488131999969482, + "eval_runtime": 165.4599, + "eval_samples_per_second": 3.602, + "eval_steps_per_second": 0.453, + "step": 13600 + }, + { + "epoch": 1.84, + "learning_rate": 1.6198324777087272e-07, + "loss": 1.8996, + "step": 13605 + }, + { + "epoch": 1.84, + "learning_rate": 1.6130775466090247e-07, + "loss": 1.9036, + "step": 13610 + }, + { + "epoch": 1.84, + "learning_rate": 1.6063226155093217e-07, + "loss": 1.9854, + "step": 13615 + }, + { + "epoch": 1.84, + "learning_rate": 1.599567684409619e-07, + "loss": 2.0518, + "step": 13620 + }, + { + "epoch": 1.84, + "learning_rate": 1.5928127533099162e-07, + "loss": 1.9395, + "step": 13625 + }, + { + "epoch": 1.84, + "learning_rate": 1.5860578222102132e-07, + "loss": 1.941, + "step": 13630 + }, + { + "epoch": 1.84, + "learning_rate": 1.5793028911105105e-07, + "loss": 1.8729, + "step": 13635 + }, + { + "epoch": 1.84, + "learning_rate": 1.5725479600108078e-07, + "loss": 2.0032, + "step": 13640 + }, + { + "epoch": 1.84, + "learning_rate": 1.565793028911105e-07, + "loss": 1.8852, + "step": 13645 + }, + { + "epoch": 1.84, + "learning_rate": 1.5590380978114023e-07, + "loss": 2.0203, + "step": 13650 + }, + { + "epoch": 1.84, + "learning_rate": 1.5522831667116996e-07, + "loss": 2.0542, + "step": 13655 + }, + { + "epoch": 1.85, + "learning_rate": 1.5455282356119968e-07, + "loss": 1.979, + "step": 13660 + }, + { + "epoch": 1.85, + "learning_rate": 1.5387733045122938e-07, + "loss": 1.9459, + "step": 13665 + }, + { + "epoch": 1.85, + "learning_rate": 1.532018373412591e-07, + "loss": 1.9584, + "step": 13670 + }, + { + "epoch": 1.85, + "learning_rate": 1.5252634423128883e-07, + "loss": 2.0211, + "step": 13675 + }, + { + "epoch": 1.85, + "learning_rate": 1.5185085112131856e-07, + "loss": 1.9053, + "step": 13680 + }, + { + "epoch": 1.85, + "learning_rate": 1.5117535801134829e-07, + "loss": 1.9061, + "step": 13685 + }, + { + "epoch": 1.85, + "learning_rate": 1.50499864901378e-07, + "loss": 1.963, + "step": 13690 + }, + { + "epoch": 1.85, + "learning_rate": 1.498243717914077e-07, + "loss": 1.9455, + "step": 13695 + }, + { + "epoch": 1.85, + "learning_rate": 1.4914887868143744e-07, + "loss": 1.9384, + "step": 13700 + }, + { + "epoch": 1.85, + "learning_rate": 1.4847338557146717e-07, + "loss": 1.974, + "step": 13705 + }, + { + "epoch": 1.85, + "learning_rate": 1.4779789246149686e-07, + "loss": 2.0507, + "step": 13710 + }, + { + "epoch": 1.85, + "learning_rate": 1.4712239935152662e-07, + "loss": 1.8949, + "step": 13715 + }, + { + "epoch": 1.85, + "learning_rate": 1.4644690624155634e-07, + "loss": 1.9455, + "step": 13720 + }, + { + "epoch": 1.85, + "learning_rate": 1.4577141313158607e-07, + "loss": 1.8905, + "step": 13725 + }, + { + "epoch": 1.85, + "learning_rate": 1.4509592002161577e-07, + "loss": 1.9834, + "step": 13730 + }, + { + "epoch": 1.86, + "learning_rate": 1.444204269116455e-07, + "loss": 1.9444, + "step": 13735 + }, + { + "epoch": 1.86, + "learning_rate": 1.4374493380167522e-07, + "loss": 1.8684, + "step": 13740 + }, + { + "epoch": 1.86, + "learning_rate": 1.4306944069170492e-07, + "loss": 1.9758, + "step": 13745 + }, + { + "epoch": 1.86, + "learning_rate": 1.4239394758173465e-07, + "loss": 1.9717, + "step": 13750 + }, + { + "epoch": 1.86, + "learning_rate": 1.417184544717644e-07, + "loss": 2.0405, + "step": 13755 + }, + { + "epoch": 1.86, + "learning_rate": 1.410429613617941e-07, + "loss": 1.8387, + "step": 13760 + }, + { + "epoch": 1.86, + "learning_rate": 1.4036746825182383e-07, + "loss": 1.8584, + "step": 13765 + }, + { + "epoch": 1.86, + "learning_rate": 1.3969197514185355e-07, + "loss": 2.015, + "step": 13770 + }, + { + "epoch": 1.86, + "learning_rate": 1.3901648203188325e-07, + "loss": 1.9601, + "step": 13775 + }, + { + "epoch": 1.86, + "learning_rate": 1.3834098892191298e-07, + "loss": 2.0155, + "step": 13780 + }, + { + "epoch": 1.86, + "learning_rate": 1.376654958119427e-07, + "loss": 1.9741, + "step": 13785 + }, + { + "epoch": 1.86, + "learning_rate": 1.3699000270197246e-07, + "loss": 1.9371, + "step": 13790 + }, + { + "epoch": 1.86, + "learning_rate": 1.3631450959200216e-07, + "loss": 2.0218, + "step": 13795 + }, + { + "epoch": 1.86, + "learning_rate": 1.3563901648203188e-07, + "loss": 2.0401, + "step": 13800 + }, + { + "epoch": 1.86, + "learning_rate": 1.349635233720616e-07, + "loss": 2.0355, + "step": 13805 + }, + { + "epoch": 1.87, + "learning_rate": 1.342880302620913e-07, + "loss": 2.0323, + "step": 13810 + }, + { + "epoch": 1.87, + "learning_rate": 1.3361253715212104e-07, + "loss": 1.9805, + "step": 13815 + }, + { + "epoch": 1.87, + "learning_rate": 1.3293704404215076e-07, + "loss": 1.9251, + "step": 13820 + }, + { + "epoch": 1.87, + "learning_rate": 1.322615509321805e-07, + "loss": 1.9105, + "step": 13825 + }, + { + "epoch": 1.87, + "learning_rate": 1.3158605782221022e-07, + "loss": 1.9058, + "step": 13830 + }, + { + "epoch": 1.87, + "learning_rate": 1.3091056471223994e-07, + "loss": 1.99, + "step": 13835 + }, + { + "epoch": 1.87, + "learning_rate": 1.3023507160226964e-07, + "loss": 2.0117, + "step": 13840 + }, + { + "epoch": 1.87, + "learning_rate": 1.2955957849229937e-07, + "loss": 1.9481, + "step": 13845 + }, + { + "epoch": 1.87, + "learning_rate": 1.288840853823291e-07, + "loss": 1.9639, + "step": 13850 + }, + { + "epoch": 1.87, + "learning_rate": 1.282085922723588e-07, + "loss": 1.8786, + "step": 13855 + }, + { + "epoch": 1.87, + "learning_rate": 1.2753309916238852e-07, + "loss": 1.9065, + "step": 13860 + }, + { + "epoch": 1.87, + "learning_rate": 1.2685760605241827e-07, + "loss": 2.0212, + "step": 13865 + }, + { + "epoch": 1.87, + "learning_rate": 1.26182112942448e-07, + "loss": 2.0138, + "step": 13870 + }, + { + "epoch": 1.87, + "learning_rate": 1.255066198324777e-07, + "loss": 1.8938, + "step": 13875 + }, + { + "epoch": 1.88, + "learning_rate": 1.2483112672250743e-07, + "loss": 2.0057, + "step": 13880 + }, + { + "epoch": 1.88, + "learning_rate": 1.2415563361253715e-07, + "loss": 1.9609, + "step": 13885 + }, + { + "epoch": 1.88, + "learning_rate": 1.2348014050256688e-07, + "loss": 1.9497, + "step": 13890 + }, + { + "epoch": 1.88, + "learning_rate": 1.2280464739259658e-07, + "loss": 1.9263, + "step": 13895 + }, + { + "epoch": 1.88, + "learning_rate": 1.221291542826263e-07, + "loss": 1.9301, + "step": 13900 + }, + { + "epoch": 1.88, + "learning_rate": 1.2145366117265603e-07, + "loss": 1.9621, + "step": 13905 + }, + { + "epoch": 1.88, + "learning_rate": 1.2077816806268576e-07, + "loss": 1.9314, + "step": 13910 + }, + { + "epoch": 1.88, + "learning_rate": 1.2010267495271548e-07, + "loss": 1.9993, + "step": 13915 + }, + { + "epoch": 1.88, + "learning_rate": 1.1942718184274518e-07, + "loss": 1.9101, + "step": 13920 + }, + { + "epoch": 1.88, + "learning_rate": 1.1875168873277492e-07, + "loss": 1.9138, + "step": 13925 + }, + { + "epoch": 1.88, + "learning_rate": 1.1807619562280465e-07, + "loss": 2.0387, + "step": 13930 + }, + { + "epoch": 1.88, + "learning_rate": 1.1740070251283436e-07, + "loss": 1.9822, + "step": 13935 + }, + { + "epoch": 1.88, + "learning_rate": 1.1672520940286408e-07, + "loss": 1.8946, + "step": 13940 + }, + { + "epoch": 1.88, + "learning_rate": 1.1604971629289381e-07, + "loss": 1.9925, + "step": 13945 + }, + { + "epoch": 1.88, + "learning_rate": 1.1537422318292353e-07, + "loss": 1.9311, + "step": 13950 + }, + { + "epoch": 1.89, + "learning_rate": 1.1469873007295325e-07, + "loss": 1.9261, + "step": 13955 + }, + { + "epoch": 1.89, + "learning_rate": 1.1402323696298297e-07, + "loss": 2.0045, + "step": 13960 + }, + { + "epoch": 1.89, + "learning_rate": 1.1334774385301269e-07, + "loss": 1.932, + "step": 13965 + }, + { + "epoch": 1.89, + "learning_rate": 1.1267225074304242e-07, + "loss": 1.9065, + "step": 13970 + }, + { + "epoch": 1.89, + "learning_rate": 1.1199675763307213e-07, + "loss": 1.8788, + "step": 13975 + }, + { + "epoch": 1.89, + "learning_rate": 1.1132126452310187e-07, + "loss": 1.9045, + "step": 13980 + }, + { + "epoch": 1.89, + "learning_rate": 1.1064577141313159e-07, + "loss": 1.9846, + "step": 13985 + }, + { + "epoch": 1.89, + "learning_rate": 1.099702783031613e-07, + "loss": 1.9391, + "step": 13990 + }, + { + "epoch": 1.89, + "learning_rate": 1.0929478519319102e-07, + "loss": 2.0232, + "step": 13995 + }, + { + "epoch": 1.89, + "learning_rate": 1.0861929208322075e-07, + "loss": 1.9595, + "step": 14000 + }, + { + "epoch": 1.89, + "eval_loss": 1.9482014179229736, + "eval_runtime": 165.3869, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.453, + "step": 14000 + }, + { + "epoch": 1.89, + "learning_rate": 1.0794379897325046e-07, + "loss": 1.995, + "step": 14005 + }, + { + "epoch": 1.89, + "learning_rate": 1.0726830586328019e-07, + "loss": 1.9835, + "step": 14010 + }, + { + "epoch": 1.89, + "learning_rate": 1.065928127533099e-07, + "loss": 1.9466, + "step": 14015 + }, + { + "epoch": 1.89, + "learning_rate": 1.0591731964333964e-07, + "loss": 2.0073, + "step": 14020 + }, + { + "epoch": 1.89, + "learning_rate": 1.0524182653336936e-07, + "loss": 2.0322, + "step": 14025 + }, + { + "epoch": 1.9, + "learning_rate": 1.0456633342339907e-07, + "loss": 1.9171, + "step": 14030 + }, + { + "epoch": 1.9, + "learning_rate": 1.0389084031342881e-07, + "loss": 1.9881, + "step": 14035 + }, + { + "epoch": 1.9, + "learning_rate": 1.0321534720345852e-07, + "loss": 2.0138, + "step": 14040 + }, + { + "epoch": 1.9, + "learning_rate": 1.0253985409348823e-07, + "loss": 2.0584, + "step": 14045 + }, + { + "epoch": 1.9, + "learning_rate": 1.0186436098351796e-07, + "loss": 1.926, + "step": 14050 + }, + { + "epoch": 1.9, + "learning_rate": 1.0118886787354769e-07, + "loss": 1.9551, + "step": 14055 + }, + { + "epoch": 1.9, + "learning_rate": 1.0051337476357741e-07, + "loss": 1.9745, + "step": 14060 + }, + { + "epoch": 1.9, + "learning_rate": 9.983788165360713e-08, + "loss": 2.0285, + "step": 14065 + }, + { + "epoch": 1.9, + "learning_rate": 9.916238854363685e-08, + "loss": 2.0206, + "step": 14070 + }, + { + "epoch": 1.9, + "learning_rate": 9.848689543366658e-08, + "loss": 1.9034, + "step": 14075 + }, + { + "epoch": 1.9, + "learning_rate": 9.781140232369629e-08, + "loss": 1.9332, + "step": 14080 + }, + { + "epoch": 1.9, + "learning_rate": 9.7135909213726e-08, + "loss": 1.8562, + "step": 14085 + }, + { + "epoch": 1.9, + "learning_rate": 9.646041610375574e-08, + "loss": 1.9742, + "step": 14090 + }, + { + "epoch": 1.9, + "learning_rate": 9.578492299378546e-08, + "loss": 1.9395, + "step": 14095 + }, + { + "epoch": 1.9, + "learning_rate": 9.510942988381518e-08, + "loss": 2.0896, + "step": 14100 + }, + { + "epoch": 1.91, + "learning_rate": 9.44339367738449e-08, + "loss": 1.9833, + "step": 14105 + }, + { + "epoch": 1.91, + "learning_rate": 9.375844366387462e-08, + "loss": 1.909, + "step": 14110 + }, + { + "epoch": 1.91, + "learning_rate": 9.308295055390435e-08, + "loss": 1.8967, + "step": 14115 + }, + { + "epoch": 1.91, + "learning_rate": 9.240745744393406e-08, + "loss": 1.9244, + "step": 14120 + }, + { + "epoch": 1.91, + "learning_rate": 9.17319643339638e-08, + "loss": 1.8847, + "step": 14125 + }, + { + "epoch": 1.91, + "learning_rate": 9.105647122399351e-08, + "loss": 1.9759, + "step": 14130 + }, + { + "epoch": 1.91, + "learning_rate": 9.038097811402323e-08, + "loss": 2.0605, + "step": 14135 + }, + { + "epoch": 1.91, + "learning_rate": 8.970548500405295e-08, + "loss": 2.0337, + "step": 14140 + }, + { + "epoch": 1.91, + "learning_rate": 8.902999189408268e-08, + "loss": 2.0754, + "step": 14145 + }, + { + "epoch": 1.91, + "learning_rate": 8.835449878411239e-08, + "loss": 2.1058, + "step": 14150 + }, + { + "epoch": 1.91, + "learning_rate": 8.767900567414212e-08, + "loss": 1.9074, + "step": 14155 + }, + { + "epoch": 1.91, + "learning_rate": 8.700351256417183e-08, + "loss": 1.9853, + "step": 14160 + }, + { + "epoch": 1.91, + "learning_rate": 8.632801945420157e-08, + "loss": 1.9416, + "step": 14165 + }, + { + "epoch": 1.91, + "learning_rate": 8.565252634423129e-08, + "loss": 1.9635, + "step": 14170 + }, + { + "epoch": 1.91, + "learning_rate": 8.4977033234261e-08, + "loss": 1.9167, + "step": 14175 + }, + { + "epoch": 1.92, + "learning_rate": 8.430154012429074e-08, + "loss": 2.0947, + "step": 14180 + }, + { + "epoch": 1.92, + "learning_rate": 8.362604701432045e-08, + "loss": 1.9955, + "step": 14185 + }, + { + "epoch": 1.92, + "learning_rate": 8.295055390435018e-08, + "loss": 1.9231, + "step": 14190 + }, + { + "epoch": 1.92, + "learning_rate": 8.227506079437989e-08, + "loss": 1.9107, + "step": 14195 + }, + { + "epoch": 1.92, + "learning_rate": 8.159956768440962e-08, + "loss": 1.9193, + "step": 14200 + }, + { + "epoch": 1.92, + "learning_rate": 8.092407457443934e-08, + "loss": 1.955, + "step": 14205 + }, + { + "epoch": 1.92, + "learning_rate": 8.024858146446906e-08, + "loss": 2.0003, + "step": 14210 + }, + { + "epoch": 1.92, + "learning_rate": 7.957308835449877e-08, + "loss": 1.993, + "step": 14215 + }, + { + "epoch": 1.92, + "learning_rate": 7.889759524452851e-08, + "loss": 1.9449, + "step": 14220 + }, + { + "epoch": 1.92, + "learning_rate": 7.822210213455822e-08, + "loss": 2.0275, + "step": 14225 + }, + { + "epoch": 1.92, + "learning_rate": 7.754660902458795e-08, + "loss": 1.955, + "step": 14230 + }, + { + "epoch": 1.92, + "learning_rate": 7.687111591461767e-08, + "loss": 1.9618, + "step": 14235 + }, + { + "epoch": 1.92, + "learning_rate": 7.619562280464739e-08, + "loss": 1.9257, + "step": 14240 + }, + { + "epoch": 1.92, + "learning_rate": 7.552012969467711e-08, + "loss": 1.9296, + "step": 14245 + }, + { + "epoch": 1.93, + "learning_rate": 7.484463658470683e-08, + "loss": 2.0142, + "step": 14250 + }, + { + "epoch": 1.93, + "learning_rate": 7.416914347473655e-08, + "loss": 1.9446, + "step": 14255 + }, + { + "epoch": 1.93, + "learning_rate": 7.349365036476628e-08, + "loss": 1.9638, + "step": 14260 + }, + { + "epoch": 1.93, + "learning_rate": 7.281815725479599e-08, + "loss": 1.9541, + "step": 14265 + }, + { + "epoch": 1.93, + "learning_rate": 7.214266414482572e-08, + "loss": 1.9324, + "step": 14270 + }, + { + "epoch": 1.93, + "learning_rate": 7.146717103485544e-08, + "loss": 1.9464, + "step": 14275 + }, + { + "epoch": 1.93, + "learning_rate": 7.079167792488516e-08, + "loss": 1.8618, + "step": 14280 + }, + { + "epoch": 1.93, + "learning_rate": 7.011618481491488e-08, + "loss": 1.8554, + "step": 14285 + }, + { + "epoch": 1.93, + "learning_rate": 6.944069170494461e-08, + "loss": 1.8643, + "step": 14290 + }, + { + "epoch": 1.93, + "learning_rate": 6.876519859497434e-08, + "loss": 1.8951, + "step": 14295 + }, + { + "epoch": 1.93, + "learning_rate": 6.808970548500405e-08, + "loss": 2.0413, + "step": 14300 + }, + { + "epoch": 1.93, + "learning_rate": 6.741421237503376e-08, + "loss": 2.0529, + "step": 14305 + }, + { + "epoch": 1.93, + "learning_rate": 6.67387192650635e-08, + "loss": 1.9858, + "step": 14310 + }, + { + "epoch": 1.93, + "learning_rate": 6.606322615509321e-08, + "loss": 1.9525, + "step": 14315 + }, + { + "epoch": 1.93, + "learning_rate": 6.538773304512293e-08, + "loss": 1.9431, + "step": 14320 + }, + { + "epoch": 1.94, + "learning_rate": 6.471223993515267e-08, + "loss": 2.0377, + "step": 14325 + }, + { + "epoch": 1.94, + "learning_rate": 6.403674682518238e-08, + "loss": 1.804, + "step": 14330 + }, + { + "epoch": 1.94, + "learning_rate": 6.336125371521211e-08, + "loss": 2.0369, + "step": 14335 + }, + { + "epoch": 1.94, + "learning_rate": 6.268576060524182e-08, + "loss": 1.9369, + "step": 14340 + }, + { + "epoch": 1.94, + "learning_rate": 6.201026749527155e-08, + "loss": 1.9237, + "step": 14345 + }, + { + "epoch": 1.94, + "learning_rate": 6.133477438530127e-08, + "loss": 1.9972, + "step": 14350 + }, + { + "epoch": 1.94, + "learning_rate": 6.065928127533099e-08, + "loss": 1.9778, + "step": 14355 + }, + { + "epoch": 1.94, + "learning_rate": 5.998378816536071e-08, + "loss": 1.9137, + "step": 14360 + }, + { + "epoch": 1.94, + "learning_rate": 5.930829505539043e-08, + "loss": 1.9997, + "step": 14365 + }, + { + "epoch": 1.94, + "learning_rate": 5.863280194542016e-08, + "loss": 1.9587, + "step": 14370 + }, + { + "epoch": 1.94, + "learning_rate": 5.795730883544987e-08, + "loss": 2.0108, + "step": 14375 + }, + { + "epoch": 1.94, + "learning_rate": 5.72818157254796e-08, + "loss": 1.8876, + "step": 14380 + }, + { + "epoch": 1.94, + "learning_rate": 5.660632261550932e-08, + "loss": 2.042, + "step": 14385 + }, + { + "epoch": 1.94, + "learning_rate": 5.593082950553904e-08, + "loss": 1.9656, + "step": 14390 + }, + { + "epoch": 1.94, + "learning_rate": 5.525533639556876e-08, + "loss": 1.9729, + "step": 14395 + }, + { + "epoch": 1.95, + "learning_rate": 5.457984328559848e-08, + "loss": 2.0976, + "step": 14400 + }, + { + "epoch": 1.95, + "eval_loss": 1.9481086730957031, + "eval_runtime": 165.5103, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.453, + "step": 14400 + }, + { + "epoch": 1.95, + "learning_rate": 5.390435017562821e-08, + "loss": 1.8936, + "step": 14405 + }, + { + "epoch": 1.95, + "learning_rate": 5.322885706565793e-08, + "loss": 1.9298, + "step": 14410 + }, + { + "epoch": 1.95, + "learning_rate": 5.255336395568765e-08, + "loss": 1.9587, + "step": 14415 + }, + { + "epoch": 1.95, + "learning_rate": 5.187787084571737e-08, + "loss": 1.8686, + "step": 14420 + }, + { + "epoch": 1.95, + "learning_rate": 5.1202377735747094e-08, + "loss": 1.92, + "step": 14425 + }, + { + "epoch": 1.95, + "learning_rate": 5.052688462577682e-08, + "loss": 1.9898, + "step": 14430 + }, + { + "epoch": 1.95, + "learning_rate": 4.985139151580653e-08, + "loss": 1.9962, + "step": 14435 + }, + { + "epoch": 1.95, + "learning_rate": 4.917589840583626e-08, + "loss": 1.9581, + "step": 14440 + }, + { + "epoch": 1.95, + "learning_rate": 4.850040529586598e-08, + "loss": 2.0457, + "step": 14445 + }, + { + "epoch": 1.95, + "learning_rate": 4.7824912185895705e-08, + "loss": 2.0811, + "step": 14450 + }, + { + "epoch": 1.95, + "learning_rate": 4.714941907592542e-08, + "loss": 1.9705, + "step": 14455 + }, + { + "epoch": 1.95, + "learning_rate": 4.6473925965955145e-08, + "loss": 1.8892, + "step": 14460 + }, + { + "epoch": 1.95, + "learning_rate": 4.5798432855984864e-08, + "loss": 1.9186, + "step": 14465 + }, + { + "epoch": 1.95, + "learning_rate": 4.512293974601459e-08, + "loss": 2.0552, + "step": 14470 + }, + { + "epoch": 1.96, + "learning_rate": 4.444744663604431e-08, + "loss": 1.9579, + "step": 14475 + }, + { + "epoch": 1.96, + "learning_rate": 4.377195352607403e-08, + "loss": 1.9762, + "step": 14480 + }, + { + "epoch": 1.96, + "learning_rate": 4.3096460416103756e-08, + "loss": 2.0308, + "step": 14485 + }, + { + "epoch": 1.96, + "learning_rate": 4.2420967306133476e-08, + "loss": 1.9251, + "step": 14490 + }, + { + "epoch": 1.96, + "learning_rate": 4.17454741961632e-08, + "loss": 1.9119, + "step": 14495 + }, + { + "epoch": 1.96, + "learning_rate": 4.1069981086192915e-08, + "loss": 1.9872, + "step": 14500 + }, + { + "epoch": 1.96, + "learning_rate": 4.039448797622264e-08, + "loss": 1.9612, + "step": 14505 + }, + { + "epoch": 1.96, + "learning_rate": 3.971899486625236e-08, + "loss": 1.8368, + "step": 14510 + }, + { + "epoch": 1.96, + "learning_rate": 3.904350175628209e-08, + "loss": 2.0556, + "step": 14515 + }, + { + "epoch": 1.96, + "learning_rate": 3.83680086463118e-08, + "loss": 1.9518, + "step": 14520 + }, + { + "epoch": 1.96, + "learning_rate": 3.7692515536341527e-08, + "loss": 1.9561, + "step": 14525 + }, + { + "epoch": 1.96, + "learning_rate": 3.701702242637125e-08, + "loss": 2.0511, + "step": 14530 + }, + { + "epoch": 1.96, + "learning_rate": 3.634152931640097e-08, + "loss": 2.0264, + "step": 14535 + }, + { + "epoch": 1.96, + "learning_rate": 3.56660362064307e-08, + "loss": 1.9518, + "step": 14540 + }, + { + "epoch": 1.96, + "learning_rate": 3.499054309646041e-08, + "loss": 1.958, + "step": 14545 + }, + { + "epoch": 1.97, + "learning_rate": 3.431504998649014e-08, + "loss": 1.8629, + "step": 14550 + }, + { + "epoch": 1.97, + "learning_rate": 3.363955687651986e-08, + "loss": 1.9151, + "step": 14555 + }, + { + "epoch": 1.97, + "learning_rate": 3.2964063766549584e-08, + "loss": 2.046, + "step": 14560 + }, + { + "epoch": 1.97, + "learning_rate": 3.22885706565793e-08, + "loss": 1.9737, + "step": 14565 + }, + { + "epoch": 1.97, + "learning_rate": 3.1613077546609023e-08, + "loss": 1.9286, + "step": 14570 + }, + { + "epoch": 1.97, + "learning_rate": 3.093758443663874e-08, + "loss": 1.8991, + "step": 14575 + }, + { + "epoch": 1.97, + "learning_rate": 3.026209132666847e-08, + "loss": 1.9493, + "step": 14580 + }, + { + "epoch": 1.97, + "learning_rate": 2.958659821669819e-08, + "loss": 1.9246, + "step": 14585 + }, + { + "epoch": 1.97, + "learning_rate": 2.891110510672791e-08, + "loss": 2.0081, + "step": 14590 + }, + { + "epoch": 1.97, + "learning_rate": 2.823561199675763e-08, + "loss": 1.8692, + "step": 14595 + }, + { + "epoch": 1.97, + "learning_rate": 2.756011888678735e-08, + "loss": 1.9802, + "step": 14600 + }, + { + "epoch": 1.97, + "learning_rate": 2.6884625776817074e-08, + "loss": 1.9681, + "step": 14605 + }, + { + "epoch": 1.97, + "learning_rate": 2.6209132666846797e-08, + "loss": 1.9555, + "step": 14610 + }, + { + "epoch": 1.97, + "learning_rate": 2.553363955687652e-08, + "loss": 2.0044, + "step": 14615 + }, + { + "epoch": 1.98, + "learning_rate": 2.485814644690624e-08, + "loss": 1.9247, + "step": 14620 + }, + { + "epoch": 1.98, + "learning_rate": 2.4182653336935963e-08, + "loss": 1.9702, + "step": 14625 + }, + { + "epoch": 1.98, + "learning_rate": 2.3507160226965682e-08, + "loss": 1.9554, + "step": 14630 + }, + { + "epoch": 1.98, + "learning_rate": 2.2831667116995405e-08, + "loss": 2.0136, + "step": 14635 + }, + { + "epoch": 1.98, + "learning_rate": 2.215617400702513e-08, + "loss": 1.9539, + "step": 14640 + }, + { + "epoch": 1.98, + "learning_rate": 2.1480680897054848e-08, + "loss": 2.0292, + "step": 14645 + }, + { + "epoch": 1.98, + "learning_rate": 2.080518778708457e-08, + "loss": 1.9981, + "step": 14650 + }, + { + "epoch": 1.98, + "learning_rate": 2.012969467711429e-08, + "loss": 2.0681, + "step": 14655 + }, + { + "epoch": 1.98, + "learning_rate": 1.9454201567144017e-08, + "loss": 2.0592, + "step": 14660 + }, + { + "epoch": 1.98, + "learning_rate": 1.8778708457173737e-08, + "loss": 1.9643, + "step": 14665 + }, + { + "epoch": 1.98, + "learning_rate": 1.810321534720346e-08, + "loss": 2.0253, + "step": 14670 + }, + { + "epoch": 1.98, + "learning_rate": 1.742772223723318e-08, + "loss": 2.0889, + "step": 14675 + }, + { + "epoch": 1.98, + "learning_rate": 1.6752229127262902e-08, + "loss": 1.9485, + "step": 14680 + }, + { + "epoch": 1.98, + "learning_rate": 1.6076736017292622e-08, + "loss": 1.8839, + "step": 14685 + }, + { + "epoch": 1.98, + "learning_rate": 1.5401242907322345e-08, + "loss": 1.9396, + "step": 14690 + }, + { + "epoch": 1.99, + "learning_rate": 1.4725749797352068e-08, + "loss": 1.9704, + "step": 14695 + }, + { + "epoch": 1.99, + "learning_rate": 1.4050256687381789e-08, + "loss": 1.9368, + "step": 14700 + }, + { + "epoch": 1.99, + "learning_rate": 1.337476357741151e-08, + "loss": 1.9722, + "step": 14705 + }, + { + "epoch": 1.99, + "learning_rate": 1.2699270467441232e-08, + "loss": 2.0104, + "step": 14710 + }, + { + "epoch": 1.99, + "learning_rate": 1.2023777357470953e-08, + "loss": 1.9681, + "step": 14715 + }, + { + "epoch": 1.99, + "learning_rate": 1.1348284247500676e-08, + "loss": 1.9765, + "step": 14720 + }, + { + "epoch": 1.99, + "learning_rate": 1.0672791137530397e-08, + "loss": 1.9395, + "step": 14725 + }, + { + "epoch": 1.99, + "learning_rate": 9.997298027560119e-09, + "loss": 1.9876, + "step": 14730 + }, + { + "epoch": 1.99, + "learning_rate": 9.32180491758984e-09, + "loss": 1.9795, + "step": 14735 + }, + { + "epoch": 1.99, + "learning_rate": 8.646311807619561e-09, + "loss": 1.9714, + "step": 14740 + }, + { + "epoch": 1.99, + "learning_rate": 7.970818697649283e-09, + "loss": 1.9901, + "step": 14745 + }, + { + "epoch": 1.99, + "learning_rate": 7.295325587679005e-09, + "loss": 2.0808, + "step": 14750 + }, + { + "epoch": 1.99, + "learning_rate": 6.619832477708728e-09, + "loss": 1.9317, + "step": 14755 + }, + { + "epoch": 1.99, + "learning_rate": 5.944339367738449e-09, + "loss": 1.9596, + "step": 14760 + }, + { + "epoch": 1.99, + "learning_rate": 5.268846257768171e-09, + "loss": 1.9245, + "step": 14765 + }, + { + "epoch": 2.0, + "learning_rate": 4.5933531477978924e-09, + "loss": 2.0051, + "step": 14770 + }, + { + "epoch": 2.0, + "learning_rate": 3.917860037827614e-09, + "loss": 1.9284, + "step": 14775 + }, + { + "epoch": 2.0, + "learning_rate": 3.2423669278573355e-09, + "loss": 1.9367, + "step": 14780 + }, + { + "epoch": 2.0, + "learning_rate": 2.566873817887057e-09, + "loss": 2.0314, + "step": 14785 + }, + { + "epoch": 2.0, + "learning_rate": 1.8913807079167793e-09, + "loss": 1.8842, + "step": 14790 + }, + { + "epoch": 2.0, + "learning_rate": 1.215887597946501e-09, + "loss": 1.9025, + "step": 14795 + }, + { + "epoch": 2.0, + "learning_rate": 5.403944879762226e-10, + "loss": 1.9204, + "step": 14800 + }, + { + "epoch": 2.0, + "eval_loss": 1.9478894472122192, + "eval_runtime": 165.3905, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.453, + "step": 14800 + }, + { + "epoch": 2.0, + "step": 14804, + "total_flos": 8.826710223564841e+18, + "train_loss": 2.036540643423514, + "train_runtime": 233902.6153, + "train_samples_per_second": 1.013, + "train_steps_per_second": 0.063 + } + ], + "logging_steps": 5, + "max_steps": 14804, + "num_train_epochs": 2, + "save_steps": 2000, + "total_flos": 8.826710223564841e+18, + "trial_name": null, + "trial_params": null +}