{ "best_metric": 0.10869565217391304, "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-80RX1\\checkpoint-103", "epoch": 40.0, "eval_steps": 500, "global_step": 4120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 2.6699029126213593e-06, "loss": 2.0319380055747437e+25, "step": 10 }, { "epoch": 0.19, "learning_rate": 5.3398058252427185e-06, "loss": 2.1407918734188223e+25, "step": 20 }, { "epoch": 0.29, "learning_rate": 8.009708737864077e-06, "loss": 2.0319380055747437e+25, "step": 30 }, { "epoch": 0.39, "learning_rate": 1.0679611650485437e-05, "loss": 2.3584994246395388e+25, "step": 40 }, { "epoch": 0.49, "learning_rate": 1.3349514563106797e-05, "loss": 1.9956536289166384e+25, "step": 50 }, { "epoch": 0.58, "learning_rate": 1.6019417475728155e-05, "loss": 2.213361180137355e+25, "step": 60 }, { "epoch": 0.68, "learning_rate": 1.8689320388349518e-05, "loss": 2.213360995669914e+25, "step": 70 }, { "epoch": 0.78, "learning_rate": 2.1359223300970874e-05, "loss": 2.3584996091069793e+25, "step": 80 }, { "epoch": 0.87, "learning_rate": 2.4029126213592234e-05, "loss": 2.1407916889513814e+25, "step": 90 }, { "epoch": 0.97, "learning_rate": 2.6699029126213593e-05, "loss": 2.1407918734188223e+25, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7942, "eval_samples_per_second": 57.921, "eval_steps_per_second": 7.555, "step": 103 }, { "epoch": 1.07, "learning_rate": 2.9368932038834953e-05, "loss": 2.2254559723567234e+25, "step": 110 }, { "epoch": 1.17, "learning_rate": 3.203883495145631e-05, "loss": 2.2859304868558873e+25, "step": 120 }, { "epoch": 1.26, "learning_rate": 3.470873786407767e-05, "loss": 2.104507312293276e+25, "step": 130 }, { "epoch": 1.36, "learning_rate": 3.7378640776699036e-05, "loss": 2.213360995669914e+25, "step": 140 }, { "epoch": 1.46, "learning_rate": 4.004854368932039e-05, "loss": 2.3584996091069793e+25, "step": 150 }, { "epoch": 1.55, "learning_rate": 4.271844660194175e-05, "loss": 2.2496457412629006e+25, "step": 160 }, { "epoch": 1.65, "learning_rate": 4.5388349514563104e-05, "loss": 2.177076434544368e+25, "step": 170 }, { "epoch": 1.75, "learning_rate": 4.805825242718447e-05, "loss": 1.8867993921376783e+25, "step": 180 }, { "epoch": 1.84, "learning_rate": 5.072815533980583e-05, "loss": 2.213361180137355e+25, "step": 190 }, { "epoch": 1.94, "learning_rate": 5.339805825242719e-05, "loss": 1.923084137730665e+25, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7862, "eval_samples_per_second": 58.511, "eval_steps_per_second": 7.632, "step": 206 }, { "epoch": 2.04, "learning_rate": 5.4943791517629026e-05, "loss": 2.298025279075256e+25, "step": 210 }, { "epoch": 2.14, "learning_rate": 5.480327031170158e-05, "loss": 2.2496457412629006e+25, "step": 220 }, { "epoch": 2.23, "learning_rate": 5.466274910577415e-05, "loss": 1.9956534444491974e+25, "step": 230 }, { "epoch": 2.33, "learning_rate": 5.452222789984671e-05, "loss": 2.104507312293276e+25, "step": 240 }, { "epoch": 2.43, "learning_rate": 5.438170669391927e-05, "loss": 2.213361180137355e+25, "step": 250 }, { "epoch": 2.52, "learning_rate": 5.4241185487991826e-05, "loss": 2.0319380055747437e+25, "step": 260 }, { "epoch": 2.62, "learning_rate": 5.410066428206439e-05, "loss": 2.1045071278258356e+25, "step": 270 }, { "epoch": 2.72, "learning_rate": 5.396014307613695e-05, "loss": 2.3947839857650846e+25, "step": 280 }, { "epoch": 2.82, "learning_rate": 5.381962187020951e-05, "loss": 2.1407918734188223e+25, "step": 290 }, { "epoch": 2.91, "learning_rate": 5.3679100664282064e-05, "loss": 2.285930117921006e+25, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8107, "eval_samples_per_second": 56.742, "eval_steps_per_second": 7.401, "step": 309 }, { "epoch": 3.01, "learning_rate": 5.3538579458354626e-05, "loss": 2.1407916889513814e+25, "step": 310 }, { "epoch": 3.11, "learning_rate": 5.339805825242719e-05, "loss": 2.2133613646047957e+25, "step": 320 }, { "epoch": 3.2, "learning_rate": 5.325753704649975e-05, "loss": 1.9956536289166384e+25, "step": 330 }, { "epoch": 3.3, "learning_rate": 5.311701584057231e-05, "loss": 2.1045071278258356e+25, "step": 340 }, { "epoch": 3.4, "learning_rate": 5.2976494634644864e-05, "loss": 2.177076619011809e+25, "step": 350 }, { "epoch": 3.5, "learning_rate": 5.2835973428717425e-05, "loss": 2.1045071278258356e+25, "step": 360 }, { "epoch": 3.59, "learning_rate": 5.2695452222789986e-05, "loss": 2.1045071278258356e+25, "step": 370 }, { "epoch": 3.69, "learning_rate": 5.255493101686255e-05, "loss": 2.3584996091069793e+25, "step": 380 }, { "epoch": 3.79, "learning_rate": 5.24144098109351e-05, "loss": 2.285930302388447e+25, "step": 390 }, { "epoch": 3.88, "learning_rate": 5.2273888605007663e-05, "loss": 2.0682227511677303e+25, "step": 400 }, { "epoch": 3.98, "learning_rate": 5.2133367399080225e-05, "loss": 2.358499240172098e+25, "step": 410 }, { "epoch": 4.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7522, "eval_samples_per_second": 61.156, "eval_steps_per_second": 7.977, "step": 412 }, { "epoch": 4.08, "learning_rate": 5.1992846193152786e-05, "loss": 2.044032797794112e+25, "step": 420 }, { "epoch": 4.17, "learning_rate": 5.185232498722535e-05, "loss": 2.031938190042184e+25, "step": 430 }, { "epoch": 4.27, "learning_rate": 5.17118037812979e-05, "loss": 2.213360995669914e+25, "step": 440 }, { "epoch": 4.37, "learning_rate": 5.157128257537046e-05, "loss": 1.9956534444491974e+25, "step": 450 }, { "epoch": 4.47, "learning_rate": 5.1430761369443024e-05, "loss": 2.213361180137355e+25, "step": 460 }, { "epoch": 4.56, "learning_rate": 5.129024016351559e-05, "loss": 2.068222382232849e+25, "step": 470 }, { "epoch": 4.66, "learning_rate": 5.114971895758815e-05, "loss": 2.177076619011809e+25, "step": 480 }, { "epoch": 4.76, "learning_rate": 5.100919775166071e-05, "loss": 2.0682225667002894e+25, "step": 490 }, { "epoch": 4.85, "learning_rate": 5.086867654573327e-05, "loss": 2.3222148635139926e+25, "step": 500 }, { "epoch": 4.95, "learning_rate": 5.072815533980583e-05, "loss": 2.4310687313580712e+25, "step": 510 }, { "epoch": 5.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7562, "eval_samples_per_second": 60.833, "eval_steps_per_second": 7.935, "step": 515 }, { "epoch": 5.05, "learning_rate": 5.058763413387839e-05, "loss": 2.3584996091069793e+25, "step": 520 }, { "epoch": 5.15, "learning_rate": 5.0447112927950946e-05, "loss": 2.0682227511677303e+25, "step": 530 }, { "epoch": 5.24, "learning_rate": 5.030659172202351e-05, "loss": 2.1407918734188223e+25, "step": 540 }, { "epoch": 5.34, "learning_rate": 5.016607051609607e-05, "loss": 1.9593688833236517e+25, "step": 550 }, { "epoch": 5.44, "learning_rate": 5.002554931016863e-05, "loss": 2.177076619011809e+25, "step": 560 }, { "epoch": 5.53, "learning_rate": 4.9885028104241185e-05, "loss": 1.9956534444491974e+25, "step": 570 }, { "epoch": 5.63, "learning_rate": 4.9744506898313746e-05, "loss": 2.2133613646047957e+25, "step": 580 }, { "epoch": 5.73, "learning_rate": 4.960398569238631e-05, "loss": 2.3222148635139926e+25, "step": 590 }, { "epoch": 5.83, "learning_rate": 4.946346448645887e-05, "loss": 2.104507312293276e+25, "step": 600 }, { "epoch": 5.92, "learning_rate": 4.932294328053143e-05, "loss": 2.4310687313580712e+25, "step": 610 }, { "epoch": 6.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7787, "eval_samples_per_second": 59.074, "eval_steps_per_second": 7.705, "step": 618 }, { "epoch": 6.02, "learning_rate": 4.9182422074603984e-05, "loss": 2.26174071794971e+25, "step": 620 }, { "epoch": 6.12, "learning_rate": 4.9041900868676545e-05, "loss": 2.0319380055747437e+25, "step": 630 }, { "epoch": 6.21, "learning_rate": 4.890137966274911e-05, "loss": 2.3947841702325255e+25, "step": 640 }, { "epoch": 6.31, "learning_rate": 4.876085845682167e-05, "loss": 2.1045071278258356e+25, "step": 650 }, { "epoch": 6.41, "learning_rate": 4.862033725089422e-05, "loss": 2.2496459257303415e+25, "step": 660 }, { "epoch": 6.5, "learning_rate": 4.8479816044966784e-05, "loss": 2.031938190042184e+25, "step": 670 }, { "epoch": 6.6, "learning_rate": 4.8339294839039345e-05, "loss": 2.1407918734188223e+25, "step": 680 }, { "epoch": 6.7, "learning_rate": 4.8198773633111906e-05, "loss": 2.140792057886263e+25, "step": 690 }, { "epoch": 6.8, "learning_rate": 4.805825242718447e-05, "loss": 2.213360995669914e+25, "step": 700 }, { "epoch": 6.89, "learning_rate": 4.791773122125703e-05, "loss": 2.0682225667002894e+25, "step": 710 }, { "epoch": 6.99, "learning_rate": 4.777721001532959e-05, "loss": 2.2496457412629006e+25, "step": 720 }, { "epoch": 7.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7757, "eval_samples_per_second": 59.302, "eval_steps_per_second": 7.735, "step": 721 }, { "epoch": 7.09, "learning_rate": 4.763668880940215e-05, "loss": 2.044032797794112e+25, "step": 730 }, { "epoch": 7.18, "learning_rate": 4.749616760347471e-05, "loss": 2.177076434544368e+25, "step": 740 }, { "epoch": 7.28, "learning_rate": 4.735564639754727e-05, "loss": 2.177076434544368e+25, "step": 750 }, { "epoch": 7.38, "learning_rate": 4.721512519161983e-05, "loss": 2.3947841702325255e+25, "step": 760 }, { "epoch": 7.48, "learning_rate": 4.707460398569239e-05, "loss": 2.3947841702325255e+25, "step": 770 }, { "epoch": 7.57, "learning_rate": 4.693408277976495e-05, "loss": 1.923084137730665e+25, "step": 780 }, { "epoch": 7.67, "learning_rate": 4.679356157383751e-05, "loss": 2.3222148635139926e+25, "step": 790 }, { "epoch": 7.77, "learning_rate": 4.6653040367910067e-05, "loss": 2.104507496760717e+25, "step": 800 }, { "epoch": 7.86, "learning_rate": 4.651251916198263e-05, "loss": 1.995653259981757e+25, "step": 810 }, { "epoch": 7.96, "learning_rate": 4.637199795605519e-05, "loss": 2.1045071278258356e+25, "step": 820 }, { "epoch": 8.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8062, "eval_samples_per_second": 57.059, "eval_steps_per_second": 7.443, "step": 824 }, { "epoch": 8.06, "learning_rate": 4.623147675012775e-05, "loss": 2.0319380055747437e+25, "step": 830 }, { "epoch": 8.16, "learning_rate": 4.6090955544200305e-05, "loss": 2.213361180137355e+25, "step": 840 }, { "epoch": 8.25, "learning_rate": 4.5950434338272866e-05, "loss": 1.995653259981757e+25, "step": 850 }, { "epoch": 8.35, "learning_rate": 4.580991313234543e-05, "loss": 1.850515015479573e+25, "step": 860 }, { "epoch": 8.45, "learning_rate": 4.566939192641799e-05, "loss": 2.431068915825512e+25, "step": 870 }, { "epoch": 8.54, "learning_rate": 4.552887072049055e-05, "loss": 2.394784354699966e+25, "step": 880 }, { "epoch": 8.64, "learning_rate": 4.5388349514563104e-05, "loss": 2.2133613646047957e+25, "step": 890 }, { "epoch": 8.74, "learning_rate": 4.5247828308635666e-05, "loss": 2.24964555679546e+25, "step": 900 }, { "epoch": 8.83, "learning_rate": 4.510730710270823e-05, "loss": 2.2496457412629006e+25, "step": 910 }, { "epoch": 8.93, "learning_rate": 4.496678589678079e-05, "loss": 2.1045071278258356e+25, "step": 920 }, { "epoch": 9.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7902, "eval_samples_per_second": 58.215, "eval_steps_per_second": 7.593, "step": 927 }, { "epoch": 9.03, "learning_rate": 4.482626469085334e-05, "loss": 2.2254559723567234e+25, "step": 930 }, { "epoch": 9.13, "learning_rate": 4.468574348492591e-05, "loss": 2.285930117921006e+25, "step": 940 }, { "epoch": 9.22, "learning_rate": 4.454522227899847e-05, "loss": 2.177076619011809e+25, "step": 950 }, { "epoch": 9.32, "learning_rate": 4.440470107307103e-05, "loss": 2.2496459257303415e+25, "step": 960 }, { "epoch": 9.42, "learning_rate": 4.426417986714359e-05, "loss": 1.9593688833236517e+25, "step": 970 }, { "epoch": 9.51, "learning_rate": 4.412365866121615e-05, "loss": 2.24964555679546e+25, "step": 980 }, { "epoch": 9.61, "learning_rate": 4.398313745528871e-05, "loss": 2.285930117921006e+25, "step": 990 }, { "epoch": 9.71, "learning_rate": 4.384261624936127e-05, "loss": 2.285930117921006e+25, "step": 1000 }, { "epoch": 9.81, "learning_rate": 4.370209504343383e-05, "loss": 1.8505148310121323e+25, "step": 1010 }, { "epoch": 9.9, "learning_rate": 4.356157383750639e-05, "loss": 1.9593688833236517e+25, "step": 1020 }, { "epoch": 10.0, "learning_rate": 4.342105263157895e-05, "loss": 2.3343098402008016e+25, "step": 1030 }, { "epoch": 10.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7707, "eval_samples_per_second": 59.688, "eval_steps_per_second": 7.785, "step": 1030 }, { "epoch": 10.1, "learning_rate": 4.328053142565151e-05, "loss": 2.1407918734188223e+25, "step": 1040 }, { "epoch": 10.19, "learning_rate": 4.314001021972407e-05, "loss": 2.4310687313580712e+25, "step": 1050 }, { "epoch": 10.29, "learning_rate": 4.2999489013796626e-05, "loss": 1.9230843221981057e+25, "step": 1060 }, { "epoch": 10.39, "learning_rate": 4.285896780786919e-05, "loss": 2.285930117921006e+25, "step": 1070 }, { "epoch": 10.49, "learning_rate": 4.271844660194175e-05, "loss": 1.886799576605119e+25, "step": 1080 }, { "epoch": 10.58, "learning_rate": 4.257792539601431e-05, "loss": 2.177076250076927e+25, "step": 1090 }, { "epoch": 10.68, "learning_rate": 4.243740419008687e-05, "loss": 2.0682227511677303e+25, "step": 1100 }, { "epoch": 10.78, "learning_rate": 4.2296882984159425e-05, "loss": 2.1407918734188223e+25, "step": 1110 }, { "epoch": 10.87, "learning_rate": 4.2156361778231986e-05, "loss": 2.2496457412629006e+25, "step": 1120 }, { "epoch": 10.97, "learning_rate": 4.201584057230455e-05, "loss": 2.3222148635139926e+25, "step": 1130 }, { "epoch": 11.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7762, "eval_samples_per_second": 59.265, "eval_steps_per_second": 7.73, "step": 1133 }, { "epoch": 11.07, "learning_rate": 4.187531936637711e-05, "loss": 2.1166021045126447e+25, "step": 1140 }, { "epoch": 11.17, "learning_rate": 4.173479816044967e-05, "loss": 2.177076250076927e+25, "step": 1150 }, { "epoch": 11.26, "learning_rate": 4.1594276954522225e-05, "loss": 2.1045071278258356e+25, "step": 1160 }, { "epoch": 11.36, "learning_rate": 4.1453755748594786e-05, "loss": 2.068222382232849e+25, "step": 1170 }, { "epoch": 11.46, "learning_rate": 4.1313234542667354e-05, "loss": 1.923084137730665e+25, "step": 1180 }, { "epoch": 11.55, "learning_rate": 4.1172713336739915e-05, "loss": 2.213361180137355e+25, "step": 1190 }, { "epoch": 11.65, "learning_rate": 4.103219213081247e-05, "loss": 2.285930117921006e+25, "step": 1200 }, { "epoch": 11.75, "learning_rate": 4.089167092488503e-05, "loss": 2.104507312293276e+25, "step": 1210 }, { "epoch": 11.84, "learning_rate": 4.075114971895759e-05, "loss": 2.2859304868558873e+25, "step": 1220 }, { "epoch": 11.94, "learning_rate": 4.0610628513030154e-05, "loss": 2.3222150479814335e+25, "step": 1230 }, { "epoch": 12.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7912, "eval_samples_per_second": 58.14, "eval_steps_per_second": 7.583, "step": 1236 }, { "epoch": 12.04, "learning_rate": 4.047010730710271e-05, "loss": 2.285930117921006e+25, "step": 1240 }, { "epoch": 12.14, "learning_rate": 4.032958610117527e-05, "loss": 2.2496457412629006e+25, "step": 1250 }, { "epoch": 12.23, "learning_rate": 4.018906489524783e-05, "loss": 2.3222148635139926e+25, "step": 1260 }, { "epoch": 12.33, "learning_rate": 4.004854368932039e-05, "loss": 2.3222148635139926e+25, "step": 1270 }, { "epoch": 12.43, "learning_rate": 3.990802248339295e-05, "loss": 2.1045071278258356e+25, "step": 1280 }, { "epoch": 12.52, "learning_rate": 3.976750127746551e-05, "loss": 1.850515015479573e+25, "step": 1290 }, { "epoch": 12.62, "learning_rate": 3.962698007153807e-05, "loss": 2.177076250076927e+25, "step": 1300 }, { "epoch": 12.72, "learning_rate": 3.948645886561063e-05, "loss": 2.2496459257303415e+25, "step": 1310 }, { "epoch": 12.82, "learning_rate": 3.934593765968319e-05, "loss": 2.1407918734188223e+25, "step": 1320 }, { "epoch": 12.91, "learning_rate": 3.9205416453755746e-05, "loss": 2.1407918734188223e+25, "step": 1330 }, { "epoch": 13.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7662, "eval_samples_per_second": 60.039, "eval_steps_per_second": 7.831, "step": 1339 }, { "epoch": 13.01, "learning_rate": 3.906489524782831e-05, "loss": 2.2859304868558873e+25, "step": 1340 }, { "epoch": 13.11, "learning_rate": 3.892437404190087e-05, "loss": 2.104507312293276e+25, "step": 1350 }, { "epoch": 13.2, "learning_rate": 3.878385283597343e-05, "loss": 2.0682227511677303e+25, "step": 1360 }, { "epoch": 13.3, "learning_rate": 3.864333163004599e-05, "loss": 2.0682227511677303e+25, "step": 1370 }, { "epoch": 13.4, "learning_rate": 3.8502810424118545e-05, "loss": 2.0319378211073027e+25, "step": 1380 }, { "epoch": 13.5, "learning_rate": 3.836228921819111e-05, "loss": 2.3222148635139926e+25, "step": 1390 }, { "epoch": 13.59, "learning_rate": 3.822176801226367e-05, "loss": 2.3947839857650846e+25, "step": 1400 }, { "epoch": 13.69, "learning_rate": 3.8081246806336236e-05, "loss": 2.213361180137355e+25, "step": 1410 }, { "epoch": 13.79, "learning_rate": 3.794072560040879e-05, "loss": 2.104507312293276e+25, "step": 1420 }, { "epoch": 13.88, "learning_rate": 3.780020439448135e-05, "loss": 2.177076619011809e+25, "step": 1430 }, { "epoch": 13.98, "learning_rate": 3.765968318855391e-05, "loss": 2.1407916889513814e+25, "step": 1440 }, { "epoch": 14.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8082, "eval_samples_per_second": 56.918, "eval_steps_per_second": 7.424, "step": 1442 }, { "epoch": 14.08, "learning_rate": 3.7519161982626474e-05, "loss": 2.2617409024171505e+25, "step": 1450 }, { "epoch": 14.17, "learning_rate": 3.7378640776699036e-05, "loss": 2.213360995669914e+25, "step": 1460 }, { "epoch": 14.27, "learning_rate": 3.723811957077159e-05, "loss": 2.177076619011809e+25, "step": 1470 }, { "epoch": 14.37, "learning_rate": 3.709759836484415e-05, "loss": 2.104507312293276e+25, "step": 1480 }, { "epoch": 14.47, "learning_rate": 3.695707715891671e-05, "loss": 2.140792057886263e+25, "step": 1490 }, { "epoch": 14.56, "learning_rate": 3.6816555952989274e-05, "loss": 2.1407918734188223e+25, "step": 1500 }, { "epoch": 14.66, "learning_rate": 3.667603474706183e-05, "loss": 2.2859304868558873e+25, "step": 1510 }, { "epoch": 14.76, "learning_rate": 3.653551354113439e-05, "loss": 2.0682225667002894e+25, "step": 1520 }, { "epoch": 14.85, "learning_rate": 3.639499233520695e-05, "loss": 2.2133608112024734e+25, "step": 1530 }, { "epoch": 14.95, "learning_rate": 3.625447112927951e-05, "loss": 2.177076434544368e+25, "step": 1540 }, { "epoch": 15.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7906, "eval_samples_per_second": 58.183, "eval_steps_per_second": 7.589, "step": 1545 }, { "epoch": 15.05, "learning_rate": 3.611394992335207e-05, "loss": 1.9109895299787373e+25, "step": 1550 }, { "epoch": 15.15, "learning_rate": 3.597342871742463e-05, "loss": 2.1407918734188223e+25, "step": 1560 }, { "epoch": 15.24, "learning_rate": 3.583290751149719e-05, "loss": 2.104507312293276e+25, "step": 1570 }, { "epoch": 15.34, "learning_rate": 3.569238630556975e-05, "loss": 2.2496457412629006e+25, "step": 1580 }, { "epoch": 15.44, "learning_rate": 3.555186509964231e-05, "loss": 2.1407918734188223e+25, "step": 1590 }, { "epoch": 15.53, "learning_rate": 3.5411343893714866e-05, "loss": 2.177076434544368e+25, "step": 1600 }, { "epoch": 15.63, "learning_rate": 3.527082268778743e-05, "loss": 2.1407918734188223e+25, "step": 1610 }, { "epoch": 15.73, "learning_rate": 3.513030148185999e-05, "loss": 2.285930117921006e+25, "step": 1620 }, { "epoch": 15.83, "learning_rate": 3.498978027593255e-05, "loss": 2.3222148635139926e+25, "step": 1630 }, { "epoch": 15.92, "learning_rate": 3.484925907000511e-05, "loss": 2.24964555679546e+25, "step": 1640 }, { "epoch": 16.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7962, "eval_samples_per_second": 57.774, "eval_steps_per_second": 7.536, "step": 1648 }, { "epoch": 16.02, "learning_rate": 3.470873786407767e-05, "loss": 2.0682227511677303e+25, "step": 1650 }, { "epoch": 16.12, "learning_rate": 3.4568216658150234e-05, "loss": 1.923084137730665e+25, "step": 1660 }, { "epoch": 16.21, "learning_rate": 3.4427695452222795e-05, "loss": 2.2496457412629006e+25, "step": 1670 }, { "epoch": 16.31, "learning_rate": 3.4287174246295356e-05, "loss": 2.177076619011809e+25, "step": 1680 }, { "epoch": 16.41, "learning_rate": 3.414665304036791e-05, "loss": 2.1407918734188223e+25, "step": 1690 }, { "epoch": 16.5, "learning_rate": 3.400613183444047e-05, "loss": 2.1407916889513814e+25, "step": 1700 }, { "epoch": 16.6, "learning_rate": 3.386561062851303e-05, "loss": 2.0682227511677303e+25, "step": 1710 }, { "epoch": 16.7, "learning_rate": 3.3725089422585595e-05, "loss": 2.285930302388447e+25, "step": 1720 }, { "epoch": 16.8, "learning_rate": 3.358456821665815e-05, "loss": 2.104507496760717e+25, "step": 1730 }, { "epoch": 16.89, "learning_rate": 3.344404701073071e-05, "loss": 2.2859304868558873e+25, "step": 1740 }, { "epoch": 16.99, "learning_rate": 3.330352580480327e-05, "loss": 2.285930302388447e+25, "step": 1750 }, { "epoch": 17.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7727, "eval_samples_per_second": 59.533, "eval_steps_per_second": 7.765, "step": 1751 }, { "epoch": 17.09, "learning_rate": 3.316300459887583e-05, "loss": 2.2254557878892825e+25, "step": 1760 }, { "epoch": 17.18, "learning_rate": 3.3022483392948394e-05, "loss": 1.9956534444491974e+25, "step": 1770 }, { "epoch": 17.28, "learning_rate": 3.288196218702095e-05, "loss": 2.3584994246395388e+25, "step": 1780 }, { "epoch": 17.38, "learning_rate": 3.274144098109351e-05, "loss": 2.1045071278258356e+25, "step": 1790 }, { "epoch": 17.48, "learning_rate": 3.260091977516607e-05, "loss": 1.886799576605119e+25, "step": 1800 }, { "epoch": 17.57, "learning_rate": 3.246039856923863e-05, "loss": 2.3584994246395388e+25, "step": 1810 }, { "epoch": 17.67, "learning_rate": 3.2319877363311194e-05, "loss": 2.104507312293276e+25, "step": 1820 }, { "epoch": 17.77, "learning_rate": 3.217935615738375e-05, "loss": 2.285930302388447e+25, "step": 1830 }, { "epoch": 17.86, "learning_rate": 3.203883495145631e-05, "loss": 2.3584994246395388e+25, "step": 1840 }, { "epoch": 17.96, "learning_rate": 3.189831374552887e-05, "loss": 1.9593686988562108e+25, "step": 1850 }, { "epoch": 18.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8012, "eval_samples_per_second": 57.414, "eval_steps_per_second": 7.489, "step": 1854 }, { "epoch": 18.06, "learning_rate": 3.175779253960143e-05, "loss": 2.1528866656381905e+25, "step": 1860 }, { "epoch": 18.16, "learning_rate": 3.1617271333673986e-05, "loss": 2.0682225667002894e+25, "step": 1870 }, { "epoch": 18.25, "learning_rate": 3.147675012774655e-05, "loss": 2.285930117921006e+25, "step": 1880 }, { "epoch": 18.35, "learning_rate": 3.1336228921819116e-05, "loss": 2.285930302388447e+25, "step": 1890 }, { "epoch": 18.45, "learning_rate": 3.119570771589168e-05, "loss": 2.104507312293276e+25, "step": 1900 }, { "epoch": 18.54, "learning_rate": 3.105518650996423e-05, "loss": 2.140792057886263e+25, "step": 1910 }, { "epoch": 18.64, "learning_rate": 3.091466530403679e-05, "loss": 2.213360995669914e+25, "step": 1920 }, { "epoch": 18.74, "learning_rate": 3.0774144098109354e-05, "loss": 2.177076250076927e+25, "step": 1930 }, { "epoch": 18.83, "learning_rate": 3.0633622892181915e-05, "loss": 1.9593688833236517e+25, "step": 1940 }, { "epoch": 18.93, "learning_rate": 3.0493101686254473e-05, "loss": 2.2859304868558873e+25, "step": 1950 }, { "epoch": 19.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7922, "eval_samples_per_second": 58.068, "eval_steps_per_second": 7.574, "step": 1957 }, { "epoch": 19.03, "learning_rate": 3.0352580480327034e-05, "loss": 2.116602288980085e+25, "step": 1960 }, { "epoch": 19.13, "learning_rate": 3.0212059274399592e-05, "loss": 2.3222148635139926e+25, "step": 1970 }, { "epoch": 19.22, "learning_rate": 3.0071538068472154e-05, "loss": 2.177076619011809e+25, "step": 1980 }, { "epoch": 19.32, "learning_rate": 2.993101686254471e-05, "loss": 2.3222148635139926e+25, "step": 1990 }, { "epoch": 19.42, "learning_rate": 2.9790495656617273e-05, "loss": 2.1045071278258356e+25, "step": 2000 }, { "epoch": 19.51, "learning_rate": 2.964997445068983e-05, "loss": 1.9593685143887703e+25, "step": 2010 }, { "epoch": 19.61, "learning_rate": 2.9509453244762392e-05, "loss": 2.213360995669914e+25, "step": 2020 }, { "epoch": 19.71, "learning_rate": 2.9368932038834953e-05, "loss": 2.2496457412629006e+25, "step": 2030 }, { "epoch": 19.81, "learning_rate": 2.922841083290751e-05, "loss": 2.0682225667002894e+25, "step": 2040 }, { "epoch": 19.9, "learning_rate": 2.9087889626980072e-05, "loss": 2.177076434544368e+25, "step": 2050 }, { "epoch": 20.0, "learning_rate": 2.894736842105263e-05, "loss": 2.1528866656381905e+25, "step": 2060 }, { "epoch": 20.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7897, "eval_samples_per_second": 58.251, "eval_steps_per_second": 7.598, "step": 2060 }, { "epoch": 20.1, "learning_rate": 2.880684721512519e-05, "loss": 2.177076250076927e+25, "step": 2070 }, { "epoch": 20.19, "learning_rate": 2.866632600919775e-05, "loss": 2.213360995669914e+25, "step": 2080 }, { "epoch": 20.29, "learning_rate": 2.852580480327031e-05, "loss": 1.9593686988562108e+25, "step": 2090 }, { "epoch": 20.39, "learning_rate": 2.8385283597342872e-05, "loss": 2.358499240172098e+25, "step": 2100 }, { "epoch": 20.49, "learning_rate": 2.824476239141543e-05, "loss": 2.213360995669914e+25, "step": 2110 }, { "epoch": 20.58, "learning_rate": 2.8104241185487994e-05, "loss": 2.285930117921006e+25, "step": 2120 }, { "epoch": 20.68, "learning_rate": 2.7963719979560556e-05, "loss": 2.3222148635139926e+25, "step": 2130 }, { "epoch": 20.78, "learning_rate": 2.7823198773633117e-05, "loss": 2.1045071278258356e+25, "step": 2140 }, { "epoch": 20.87, "learning_rate": 2.7682677567705675e-05, "loss": 2.2496457412629006e+25, "step": 2150 }, { "epoch": 20.97, "learning_rate": 2.7542156361778236e-05, "loss": 1.7053764020425078e+25, "step": 2160 }, { "epoch": 21.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7782, "eval_samples_per_second": 59.113, "eval_steps_per_second": 7.71, "step": 2163 }, { "epoch": 21.07, "learning_rate": 2.740163515585079e-05, "loss": 2.213360995669914e+25, "step": 2170 }, { "epoch": 21.17, "learning_rate": 2.7261113949923355e-05, "loss": 2.1407918734188223e+25, "step": 2180 }, { "epoch": 21.26, "learning_rate": 2.7120592743995913e-05, "loss": 2.104507496760717e+25, "step": 2190 }, { "epoch": 21.36, "learning_rate": 2.6980071538068474e-05, "loss": 2.5036378536091632e+25, "step": 2200 }, { "epoch": 21.46, "learning_rate": 2.6839550332141032e-05, "loss": 2.0682227511677303e+25, "step": 2210 }, { "epoch": 21.55, "learning_rate": 2.6699029126213593e-05, "loss": 2.213360995669914e+25, "step": 2220 }, { "epoch": 21.65, "learning_rate": 2.6558507920286155e-05, "loss": 2.1407918734188223e+25, "step": 2230 }, { "epoch": 21.75, "learning_rate": 2.6417986714358713e-05, "loss": 2.1407918734188223e+25, "step": 2240 }, { "epoch": 21.84, "learning_rate": 2.6277465508431274e-05, "loss": 2.1407916889513814e+25, "step": 2250 }, { "epoch": 21.94, "learning_rate": 2.6136944302503832e-05, "loss": 2.213360995669914e+25, "step": 2260 }, { "epoch": 22.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7812, "eval_samples_per_second": 58.884, "eval_steps_per_second": 7.681, "step": 2266 }, { "epoch": 22.04, "learning_rate": 2.5996423096576393e-05, "loss": 2.1045071278258356e+25, "step": 2270 }, { "epoch": 22.14, "learning_rate": 2.585590189064895e-05, "loss": 2.0682227511677303e+25, "step": 2280 }, { "epoch": 22.23, "learning_rate": 2.5715380684721512e-05, "loss": 2.213360995669914e+25, "step": 2290 }, { "epoch": 22.33, "learning_rate": 2.5574859478794073e-05, "loss": 2.177076250076927e+25, "step": 2300 }, { "epoch": 22.43, "learning_rate": 2.5434338272866635e-05, "loss": 2.3584996091069793e+25, "step": 2310 }, { "epoch": 22.52, "learning_rate": 2.5293817066939196e-05, "loss": 2.213361180137355e+25, "step": 2320 }, { "epoch": 22.62, "learning_rate": 2.5153295861011754e-05, "loss": 2.177076434544368e+25, "step": 2330 }, { "epoch": 22.72, "learning_rate": 2.5012774655084315e-05, "loss": 2.213360995669914e+25, "step": 2340 }, { "epoch": 22.82, "learning_rate": 2.4872253449156873e-05, "loss": 2.1407918734188223e+25, "step": 2350 }, { "epoch": 22.91, "learning_rate": 2.4731732243229434e-05, "loss": 2.104507496760717e+25, "step": 2360 }, { "epoch": 23.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7982, "eval_samples_per_second": 57.631, "eval_steps_per_second": 7.517, "step": 2369 }, { "epoch": 23.01, "learning_rate": 2.4591211037301992e-05, "loss": 2.116602288980085e+25, "step": 2370 }, { "epoch": 23.11, "learning_rate": 2.4450689831374553e-05, "loss": 2.3584994246395388e+25, "step": 2380 }, { "epoch": 23.2, "learning_rate": 2.431016862544711e-05, "loss": 2.177076619011809e+25, "step": 2390 }, { "epoch": 23.3, "learning_rate": 2.4169647419519672e-05, "loss": 2.177076619011809e+25, "step": 2400 }, { "epoch": 23.4, "learning_rate": 2.4029126213592234e-05, "loss": 2.104507312293276e+25, "step": 2410 }, { "epoch": 23.5, "learning_rate": 2.3888605007664795e-05, "loss": 2.2859304868558873e+25, "step": 2420 }, { "epoch": 23.59, "learning_rate": 2.3748083801737356e-05, "loss": 2.213361180137355e+25, "step": 2430 }, { "epoch": 23.69, "learning_rate": 2.3607562595809914e-05, "loss": 2.285930302388447e+25, "step": 2440 }, { "epoch": 23.79, "learning_rate": 2.3467041389882475e-05, "loss": 1.9593688833236517e+25, "step": 2450 }, { "epoch": 23.88, "learning_rate": 2.3326520183955033e-05, "loss": 1.9230843221981057e+25, "step": 2460 }, { "epoch": 23.98, "learning_rate": 2.3185998978027595e-05, "loss": 2.1407915044839405e+25, "step": 2470 }, { "epoch": 24.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7787, "eval_samples_per_second": 59.074, "eval_steps_per_second": 7.705, "step": 2472 }, { "epoch": 24.08, "learning_rate": 2.3045477772100152e-05, "loss": 2.0440329822615527e+25, "step": 2480 }, { "epoch": 24.17, "learning_rate": 2.2904956566172714e-05, "loss": 2.3222148635139926e+25, "step": 2490 }, { "epoch": 24.27, "learning_rate": 2.2764435360245275e-05, "loss": 2.322215232448874e+25, "step": 2500 }, { "epoch": 24.37, "learning_rate": 2.2623914154317833e-05, "loss": 2.177076434544368e+25, "step": 2510 }, { "epoch": 24.47, "learning_rate": 2.2483392948390394e-05, "loss": 2.2859304868558873e+25, "step": 2520 }, { "epoch": 24.56, "learning_rate": 2.2342871742462955e-05, "loss": 2.177076434544368e+25, "step": 2530 }, { "epoch": 24.66, "learning_rate": 2.2202350536535517e-05, "loss": 2.177076434544368e+25, "step": 2540 }, { "epoch": 24.76, "learning_rate": 2.2061829330608075e-05, "loss": 1.8142304543540272e+25, "step": 2550 }, { "epoch": 24.85, "learning_rate": 2.1921308124680636e-05, "loss": 2.3584996091069793e+25, "step": 2560 }, { "epoch": 24.95, "learning_rate": 2.1780786918753194e-05, "loss": 2.177076250076927e+25, "step": 2570 }, { "epoch": 25.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8352, "eval_samples_per_second": 55.076, "eval_steps_per_second": 7.184, "step": 2575 }, { "epoch": 25.05, "learning_rate": 2.1640265712825755e-05, "loss": 1.947274091104283e+25, "step": 2580 }, { "epoch": 25.15, "learning_rate": 2.1499744506898313e-05, "loss": 2.1407918734188223e+25, "step": 2590 }, { "epoch": 25.24, "learning_rate": 2.1359223300970874e-05, "loss": 2.104507312293276e+25, "step": 2600 }, { "epoch": 25.34, "learning_rate": 2.1218702095043435e-05, "loss": 2.0682227511677303e+25, "step": 2610 }, { "epoch": 25.44, "learning_rate": 2.1078180889115993e-05, "loss": 2.394784354699966e+25, "step": 2620 }, { "epoch": 25.53, "learning_rate": 2.0937659683188554e-05, "loss": 2.0682225667002894e+25, "step": 2630 }, { "epoch": 25.63, "learning_rate": 2.0797138477261112e-05, "loss": 2.0319380055747437e+25, "step": 2640 }, { "epoch": 25.73, "learning_rate": 2.0656617271333677e-05, "loss": 2.0682227511677303e+25, "step": 2650 }, { "epoch": 25.83, "learning_rate": 2.0516096065406235e-05, "loss": 2.3584996091069793e+25, "step": 2660 }, { "epoch": 25.92, "learning_rate": 2.0375574859478796e-05, "loss": 2.3947841702325255e+25, "step": 2670 }, { "epoch": 26.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8067, "eval_samples_per_second": 57.023, "eval_steps_per_second": 7.438, "step": 2678 }, { "epoch": 26.02, "learning_rate": 2.0235053653551354e-05, "loss": 2.1528866656381905e+25, "step": 2680 }, { "epoch": 26.12, "learning_rate": 2.0094532447623915e-05, "loss": 2.3222148635139926e+25, "step": 2690 }, { "epoch": 26.21, "learning_rate": 1.9954011241696477e-05, "loss": 2.285930302388447e+25, "step": 2700 }, { "epoch": 26.31, "learning_rate": 1.9813490035769034e-05, "loss": 1.923084137730665e+25, "step": 2710 }, { "epoch": 26.41, "learning_rate": 1.9672968829841596e-05, "loss": 2.1407918734188223e+25, "step": 2720 }, { "epoch": 26.5, "learning_rate": 1.9532447623914154e-05, "loss": 2.3222148635139926e+25, "step": 2730 }, { "epoch": 26.6, "learning_rate": 1.9391926417986715e-05, "loss": 2.0319380055747437e+25, "step": 2740 }, { "epoch": 26.7, "learning_rate": 1.9251405212059273e-05, "loss": 2.177076250076927e+25, "step": 2750 }, { "epoch": 26.8, "learning_rate": 1.9110884006131834e-05, "loss": 2.213360995669914e+25, "step": 2760 }, { "epoch": 26.89, "learning_rate": 1.8970362800204395e-05, "loss": 2.0682225667002894e+25, "step": 2770 }, { "epoch": 26.99, "learning_rate": 1.8829841594276956e-05, "loss": 2.1045071278258356e+25, "step": 2780 }, { "epoch": 27.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.826, "eval_samples_per_second": 55.688, "eval_steps_per_second": 7.264, "step": 2781 }, { "epoch": 27.09, "learning_rate": 1.8689320388349518e-05, "loss": 2.0803175433870985e+25, "step": 2790 }, { "epoch": 27.18, "learning_rate": 1.8548799182422076e-05, "loss": 2.068222382232849e+25, "step": 2800 }, { "epoch": 27.28, "learning_rate": 1.8408277976494637e-05, "loss": 2.2496457412629006e+25, "step": 2810 }, { "epoch": 27.38, "learning_rate": 1.8267756770567195e-05, "loss": 2.2496457412629006e+25, "step": 2820 }, { "epoch": 27.48, "learning_rate": 1.8127235564639756e-05, "loss": 2.177076619011809e+25, "step": 2830 }, { "epoch": 27.57, "learning_rate": 1.7986714358712314e-05, "loss": 2.177076434544368e+25, "step": 2840 }, { "epoch": 27.67, "learning_rate": 1.7846193152784875e-05, "loss": 2.177076619011809e+25, "step": 2850 }, { "epoch": 27.77, "learning_rate": 1.7705671946857433e-05, "loss": 2.2496457412629006e+25, "step": 2860 }, { "epoch": 27.86, "learning_rate": 1.7565150740929994e-05, "loss": 2.1407918734188223e+25, "step": 2870 }, { "epoch": 27.96, "learning_rate": 1.7424629535002556e-05, "loss": 2.177076250076927e+25, "step": 2880 }, { "epoch": 28.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8323, "eval_samples_per_second": 55.266, "eval_steps_per_second": 7.209, "step": 2884 }, { "epoch": 28.06, "learning_rate": 1.7284108329075117e-05, "loss": 1.9714636755430202e+25, "step": 2890 }, { "epoch": 28.16, "learning_rate": 1.7143587123147678e-05, "loss": 2.104507312293276e+25, "step": 2900 }, { "epoch": 28.25, "learning_rate": 1.7003065917220236e-05, "loss": 2.1407918734188223e+25, "step": 2910 }, { "epoch": 28.35, "learning_rate": 1.6862544711292797e-05, "loss": 1.9593686988562108e+25, "step": 2920 }, { "epoch": 28.45, "learning_rate": 1.6722023505365355e-05, "loss": 2.467353292483617e+25, "step": 2930 }, { "epoch": 28.54, "learning_rate": 1.6581502299437916e-05, "loss": 2.2496457412629006e+25, "step": 2940 }, { "epoch": 28.64, "learning_rate": 1.6440981093510474e-05, "loss": 2.3222148635139926e+25, "step": 2950 }, { "epoch": 28.74, "learning_rate": 1.6300459887583036e-05, "loss": 1.9956534444491974e+25, "step": 2960 }, { "epoch": 28.83, "learning_rate": 1.6159938681655597e-05, "loss": 2.3222148635139926e+25, "step": 2970 }, { "epoch": 28.93, "learning_rate": 1.6019417475728155e-05, "loss": 2.1407918734188223e+25, "step": 2980 }, { "epoch": 29.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8133, "eval_samples_per_second": 56.562, "eval_steps_per_second": 7.378, "step": 2987 }, { "epoch": 29.03, "learning_rate": 1.5878896269800716e-05, "loss": 2.1891712267637367e+25, "step": 2990 }, { "epoch": 29.13, "learning_rate": 1.5738375063873274e-05, "loss": 2.177076250076927e+25, "step": 3000 }, { "epoch": 29.22, "learning_rate": 1.559785385794584e-05, "loss": 2.177076434544368e+25, "step": 3010 }, { "epoch": 29.32, "learning_rate": 1.5457332652018396e-05, "loss": 2.177076619011809e+25, "step": 3020 }, { "epoch": 29.42, "learning_rate": 1.5316811446090958e-05, "loss": 2.3584996091069793e+25, "step": 3030 }, { "epoch": 29.51, "learning_rate": 1.5176290240163517e-05, "loss": 1.923084137730665e+25, "step": 3040 }, { "epoch": 29.61, "learning_rate": 1.5035769034236077e-05, "loss": 2.2496457412629006e+25, "step": 3050 }, { "epoch": 29.71, "learning_rate": 1.4895247828308636e-05, "loss": 2.24964555679546e+25, "step": 3060 }, { "epoch": 29.81, "learning_rate": 1.4754726622381196e-05, "loss": 1.923084137730665e+25, "step": 3070 }, { "epoch": 29.9, "learning_rate": 1.4614205416453755e-05, "loss": 2.177076250076927e+25, "step": 3080 }, { "epoch": 30.0, "learning_rate": 1.4473684210526315e-05, "loss": 2.1528866656381905e+25, "step": 3090 }, { "epoch": 30.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8077, "eval_samples_per_second": 56.953, "eval_steps_per_second": 7.429, "step": 3090 }, { "epoch": 30.1, "learning_rate": 1.4333163004598875e-05, "loss": 2.3222148635139926e+25, "step": 3100 }, { "epoch": 30.19, "learning_rate": 1.4192641798671436e-05, "loss": 2.3222148635139926e+25, "step": 3110 }, { "epoch": 30.29, "learning_rate": 1.4052120592743997e-05, "loss": 2.3222150479814335e+25, "step": 3120 }, { "epoch": 30.39, "learning_rate": 1.3911599386816558e-05, "loss": 2.213360995669914e+25, "step": 3130 }, { "epoch": 30.49, "learning_rate": 1.3771078180889118e-05, "loss": 2.177076619011809e+25, "step": 3140 }, { "epoch": 30.58, "learning_rate": 1.3630556974961678e-05, "loss": 2.1407918734188223e+25, "step": 3150 }, { "epoch": 30.68, "learning_rate": 1.3490035769034237e-05, "loss": 1.995653259981757e+25, "step": 3160 }, { "epoch": 30.78, "learning_rate": 1.3349514563106797e-05, "loss": 1.923084137730665e+25, "step": 3170 }, { "epoch": 30.87, "learning_rate": 1.3208993357179356e-05, "loss": 2.1407918734188223e+25, "step": 3180 }, { "epoch": 30.97, "learning_rate": 1.3068472151251916e-05, "loss": 2.104507312293276e+25, "step": 3190 }, { "epoch": 31.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8172, "eval_samples_per_second": 56.29, "eval_steps_per_second": 7.342, "step": 3193 }, { "epoch": 31.07, "learning_rate": 1.2927950945324475e-05, "loss": 2.3584994246395388e+25, "step": 3200 }, { "epoch": 31.17, "learning_rate": 1.2787429739397037e-05, "loss": 2.1045071278258356e+25, "step": 3210 }, { "epoch": 31.26, "learning_rate": 1.2646908533469598e-05, "loss": 2.1407918734188223e+25, "step": 3220 }, { "epoch": 31.36, "learning_rate": 1.2506387327542158e-05, "loss": 2.140792057886263e+25, "step": 3230 }, { "epoch": 31.46, "learning_rate": 1.2365866121614717e-05, "loss": 2.213361180137355e+25, "step": 3240 }, { "epoch": 31.55, "learning_rate": 1.2225344915687277e-05, "loss": 2.0319380055747437e+25, "step": 3250 }, { "epoch": 31.65, "learning_rate": 1.2084823709759836e-05, "loss": 2.2496457412629006e+25, "step": 3260 }, { "epoch": 31.75, "learning_rate": 1.1944302503832397e-05, "loss": 2.177076619011809e+25, "step": 3270 }, { "epoch": 31.84, "learning_rate": 1.1803781297904957e-05, "loss": 1.9956534444491974e+25, "step": 3280 }, { "epoch": 31.94, "learning_rate": 1.1663260091977517e-05, "loss": 2.3584994246395388e+25, "step": 3290 }, { "epoch": 32.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7732, "eval_samples_per_second": 59.495, "eval_steps_per_second": 7.76, "step": 3296 }, { "epoch": 32.04, "learning_rate": 1.1522738886050076e-05, "loss": 2.116601920045204e+25, "step": 3300 }, { "epoch": 32.14, "learning_rate": 1.1382217680122637e-05, "loss": 2.177076434544368e+25, "step": 3310 }, { "epoch": 32.23, "learning_rate": 1.1241696474195197e-05, "loss": 2.2859304868558873e+25, "step": 3320 }, { "epoch": 32.33, "learning_rate": 1.1101175268267758e-05, "loss": 2.24964555679546e+25, "step": 3330 }, { "epoch": 32.43, "learning_rate": 1.0960654062340318e-05, "loss": 2.104507312293276e+25, "step": 3340 }, { "epoch": 32.52, "learning_rate": 1.0820132856412877e-05, "loss": 1.923084137730665e+25, "step": 3350 }, { "epoch": 32.62, "learning_rate": 1.0679611650485437e-05, "loss": 1.995653259981757e+25, "step": 3360 }, { "epoch": 32.72, "learning_rate": 1.0539090444557997e-05, "loss": 2.394784354699966e+25, "step": 3370 }, { "epoch": 32.82, "learning_rate": 1.0398569238630556e-05, "loss": 2.3222148635139926e+25, "step": 3380 }, { "epoch": 32.91, "learning_rate": 1.0258048032703117e-05, "loss": 2.1045069433583947e+25, "step": 3390 }, { "epoch": 33.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8002, "eval_samples_per_second": 57.486, "eval_steps_per_second": 7.498, "step": 3399 }, { "epoch": 33.01, "learning_rate": 1.0117526826775677e-05, "loss": 2.24964555679546e+25, "step": 3400 }, { "epoch": 33.11, "learning_rate": 9.977005620848238e-06, "loss": 1.9593686988562108e+25, "step": 3410 }, { "epoch": 33.2, "learning_rate": 9.836484414920798e-06, "loss": 2.394784354699966e+25, "step": 3420 }, { "epoch": 33.3, "learning_rate": 9.695963208993357e-06, "loss": 2.2496457412629006e+25, "step": 3430 }, { "epoch": 33.4, "learning_rate": 9.555442003065917e-06, "loss": 1.9593688833236517e+25, "step": 3440 }, { "epoch": 33.5, "learning_rate": 9.414920797138478e-06, "loss": 2.0682225667002894e+25, "step": 3450 }, { "epoch": 33.59, "learning_rate": 9.274399591211038e-06, "loss": 2.2496457412629006e+25, "step": 3460 }, { "epoch": 33.69, "learning_rate": 9.133878385283597e-06, "loss": 2.0682225667002894e+25, "step": 3470 }, { "epoch": 33.79, "learning_rate": 8.993357179356157e-06, "loss": 2.322215232448874e+25, "step": 3480 }, { "epoch": 33.88, "learning_rate": 8.852835973428717e-06, "loss": 2.0682227511677303e+25, "step": 3490 }, { "epoch": 33.98, "learning_rate": 8.712314767501278e-06, "loss": 2.2859304868558873e+25, "step": 3500 }, { "epoch": 34.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7812, "eval_samples_per_second": 58.886, "eval_steps_per_second": 7.681, "step": 3502 }, { "epoch": 34.08, "learning_rate": 8.571793561573839e-06, "loss": 2.189171595698618e+25, "step": 3510 }, { "epoch": 34.17, "learning_rate": 8.431272355646399e-06, "loss": 2.285930117921006e+25, "step": 3520 }, { "epoch": 34.27, "learning_rate": 8.290751149718958e-06, "loss": 1.886799576605119e+25, "step": 3530 }, { "epoch": 34.37, "learning_rate": 8.150229943791518e-06, "loss": 2.213361180137355e+25, "step": 3540 }, { "epoch": 34.47, "learning_rate": 8.009708737864077e-06, "loss": 2.177076434544368e+25, "step": 3550 }, { "epoch": 34.56, "learning_rate": 7.869187531936637e-06, "loss": 2.431068915825512e+25, "step": 3560 }, { "epoch": 34.66, "learning_rate": 7.728666326009198e-06, "loss": 1.9956534444491974e+25, "step": 3570 }, { "epoch": 34.76, "learning_rate": 7.588145120081759e-06, "loss": 2.213360995669914e+25, "step": 3580 }, { "epoch": 34.85, "learning_rate": 7.447623914154318e-06, "loss": 2.1045071278258356e+25, "step": 3590 }, { "epoch": 34.95, "learning_rate": 7.307102708226878e-06, "loss": 2.140792057886263e+25, "step": 3600 }, { "epoch": 35.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7977, "eval_samples_per_second": 57.667, "eval_steps_per_second": 7.522, "step": 3605 }, { "epoch": 35.05, "learning_rate": 7.166581502299437e-06, "loss": 2.298025279075256e+25, "step": 3610 }, { "epoch": 35.15, "learning_rate": 7.026060296371999e-06, "loss": 2.213360995669914e+25, "step": 3620 }, { "epoch": 35.24, "learning_rate": 6.885539090444559e-06, "loss": 2.3584996091069793e+25, "step": 3630 }, { "epoch": 35.34, "learning_rate": 6.7450178845171186e-06, "loss": 2.104507496760717e+25, "step": 3640 }, { "epoch": 35.44, "learning_rate": 6.604496678589678e-06, "loss": 2.2496457412629006e+25, "step": 3650 }, { "epoch": 35.53, "learning_rate": 6.463975472662238e-06, "loss": 1.9593688833236517e+25, "step": 3660 }, { "epoch": 35.63, "learning_rate": 6.323454266734799e-06, "loss": 1.995653259981757e+25, "step": 3670 }, { "epoch": 35.73, "learning_rate": 6.1829330608073585e-06, "loss": 2.3584996091069793e+25, "step": 3680 }, { "epoch": 35.83, "learning_rate": 6.042411854879918e-06, "loss": 1.8505148310121323e+25, "step": 3690 }, { "epoch": 35.92, "learning_rate": 5.9018906489524785e-06, "loss": 2.104507496760717e+25, "step": 3700 }, { "epoch": 36.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7792, "eval_samples_per_second": 59.035, "eval_steps_per_second": 7.7, "step": 3708 }, { "epoch": 36.02, "learning_rate": 5.761369443025038e-06, "loss": 2.3584994246395388e+25, "step": 3710 }, { "epoch": 36.12, "learning_rate": 5.6208482370975985e-06, "loss": 2.24964555679546e+25, "step": 3720 }, { "epoch": 36.21, "learning_rate": 5.480327031170159e-06, "loss": 2.213361180137355e+25, "step": 3730 }, { "epoch": 36.31, "learning_rate": 5.3398058252427185e-06, "loss": 2.3584994246395388e+25, "step": 3740 }, { "epoch": 36.41, "learning_rate": 5.199284619315278e-06, "loss": 1.9956536289166384e+25, "step": 3750 }, { "epoch": 36.5, "learning_rate": 5.0587634133878385e-06, "loss": 1.923084137730665e+25, "step": 3760 }, { "epoch": 36.6, "learning_rate": 4.918242207460399e-06, "loss": 2.285930117921006e+25, "step": 3770 }, { "epoch": 36.7, "learning_rate": 4.7777210015329585e-06, "loss": 2.3222148635139926e+25, "step": 3780 }, { "epoch": 36.8, "learning_rate": 4.637199795605519e-06, "loss": 2.177076619011809e+25, "step": 3790 }, { "epoch": 36.89, "learning_rate": 4.4966785896780785e-06, "loss": 2.104507496760717e+25, "step": 3800 }, { "epoch": 36.99, "learning_rate": 4.356157383750639e-06, "loss": 2.177076434544368e+25, "step": 3810 }, { "epoch": 37.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7992, "eval_samples_per_second": 57.558, "eval_steps_per_second": 7.508, "step": 3811 }, { "epoch": 37.09, "learning_rate": 4.215636177823199e-06, "loss": 1.8626098076989416e+25, "step": 3820 }, { "epoch": 37.18, "learning_rate": 4.075114971895759e-06, "loss": 1.923084137730665e+25, "step": 3830 }, { "epoch": 37.28, "learning_rate": 3.9345937659683185e-06, "loss": 2.1045071278258356e+25, "step": 3840 }, { "epoch": 37.38, "learning_rate": 3.7940725600408793e-06, "loss": 2.1045071278258356e+25, "step": 3850 }, { "epoch": 37.48, "learning_rate": 3.653551354113439e-06, "loss": 2.2496457412629006e+25, "step": 3860 }, { "epoch": 37.57, "learning_rate": 3.5130301481859993e-06, "loss": 2.3584994246395388e+25, "step": 3870 }, { "epoch": 37.67, "learning_rate": 3.3725089422585593e-06, "loss": 2.177076619011809e+25, "step": 3880 }, { "epoch": 37.77, "learning_rate": 3.231987736331119e-06, "loss": 2.3584994246395388e+25, "step": 3890 }, { "epoch": 37.86, "learning_rate": 3.0914665304036793e-06, "loss": 2.1407918734188223e+25, "step": 3900 }, { "epoch": 37.96, "learning_rate": 2.9509453244762393e-06, "loss": 2.2496457412629006e+25, "step": 3910 }, { "epoch": 38.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.8064, "eval_samples_per_second": 57.045, "eval_steps_per_second": 7.441, "step": 3914 }, { "epoch": 38.06, "learning_rate": 2.8104241185487993e-06, "loss": 2.3947839857650846e+25, "step": 3920 }, { "epoch": 38.16, "learning_rate": 2.6699029126213593e-06, "loss": 2.1407918734188223e+25, "step": 3930 }, { "epoch": 38.25, "learning_rate": 2.5293817066939193e-06, "loss": 1.9593686988562108e+25, "step": 3940 }, { "epoch": 38.35, "learning_rate": 2.3888605007664792e-06, "loss": 2.3222148635139926e+25, "step": 3950 }, { "epoch": 38.45, "learning_rate": 2.2483392948390392e-06, "loss": 2.1407918734188223e+25, "step": 3960 }, { "epoch": 38.54, "learning_rate": 2.1078180889115997e-06, "loss": 2.213361180137355e+25, "step": 3970 }, { "epoch": 38.64, "learning_rate": 1.9672968829841592e-06, "loss": 2.068222382232849e+25, "step": 3980 }, { "epoch": 38.74, "learning_rate": 1.8267756770567194e-06, "loss": 2.177076434544368e+25, "step": 3990 }, { "epoch": 38.83, "learning_rate": 1.6862544711292796e-06, "loss": 2.24964555679546e+25, "step": 4000 }, { "epoch": 38.93, "learning_rate": 1.5457332652018396e-06, "loss": 2.1407918734188223e+25, "step": 4010 }, { "epoch": 39.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7932, "eval_samples_per_second": 57.993, "eval_steps_per_second": 7.564, "step": 4017 }, { "epoch": 39.03, "learning_rate": 1.4052120592743996e-06, "loss": 2.2254557878892825e+25, "step": 4020 }, { "epoch": 39.13, "learning_rate": 1.2646908533469596e-06, "loss": 2.1407918734188223e+25, "step": 4030 }, { "epoch": 39.22, "learning_rate": 1.1241696474195196e-06, "loss": 2.0319380055747437e+25, "step": 4040 }, { "epoch": 39.32, "learning_rate": 9.836484414920796e-07, "loss": 2.35849979357442e+25, "step": 4050 }, { "epoch": 39.42, "learning_rate": 8.431272355646398e-07, "loss": 1.9593688833236517e+25, "step": 4060 }, { "epoch": 39.51, "learning_rate": 7.026060296371998e-07, "loss": 2.2133613646047957e+25, "step": 4070 }, { "epoch": 39.61, "learning_rate": 5.620848237097598e-07, "loss": 2.2859304868558873e+25, "step": 4080 }, { "epoch": 39.71, "learning_rate": 4.215636177823199e-07, "loss": 1.9593688833236517e+25, "step": 4090 }, { "epoch": 39.81, "learning_rate": 2.810424118548799e-07, "loss": 2.3222148635139926e+25, "step": 4100 }, { "epoch": 39.9, "learning_rate": 1.4052120592743995e-07, "loss": 2.1407918734188223e+25, "step": 4110 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 2.3222148635139926e+25, "step": 4120 }, { "epoch": 40.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.5872499347325405e+25, "eval_runtime": 0.7917, "eval_samples_per_second": 58.104, "eval_steps_per_second": 7.579, "step": 4120 }, { "epoch": 40.0, "step": 4120, "total_flos": 2.538683085785334e+18, "train_loss": 2.1692970037867085e+25, "train_runtime": 690.2509, "train_samples_per_second": 47.461, "train_steps_per_second": 5.969 } ], "logging_steps": 10, "max_steps": 4120, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.538683085785334e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }