|
{ |
|
"best_metric": 0.8697857948139797, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-ISIC-2019/checkpoint-5125", |
|
"epoch": 99.2, |
|
"eval_steps": 500, |
|
"global_step": 6200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009983870967741936, |
|
"loss": 1.7219, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009967741935483871, |
|
"loss": 1.3003, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0009951612903225807, |
|
"loss": 1.1303, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0009935483870967743, |
|
"loss": 1.0819, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0009919354838709678, |
|
"loss": 1.0046, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0009903225806451614, |
|
"loss": 1.0273, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6629086809470124, |
|
"eval_loss": 0.9625440239906311, |
|
"eval_runtime": 47.077, |
|
"eval_samples_per_second": 37.683, |
|
"eval_steps_per_second": 0.595, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.000988709677419355, |
|
"loss": 0.9252, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0009870967741935483, |
|
"loss": 0.9336, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.000985483870967742, |
|
"loss": 0.9069, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0009838709677419356, |
|
"loss": 0.8775, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0009822580645161292, |
|
"loss": 0.8402, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0009806451612903225, |
|
"loss": 0.8456, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6989853438556933, |
|
"eval_loss": 0.8067747950553894, |
|
"eval_runtime": 46.3737, |
|
"eval_samples_per_second": 38.254, |
|
"eval_steps_per_second": 0.604, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.000979032258064516, |
|
"loss": 0.8235, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0009774193548387096, |
|
"loss": 0.804, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0009758064516129033, |
|
"loss": 0.7431, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0009741935483870968, |
|
"loss": 0.7396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0009725806451612903, |
|
"loss": 0.7445, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0009709677419354839, |
|
"loss": 0.771, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7361894024802705, |
|
"eval_loss": 0.7125946283340454, |
|
"eval_runtime": 48.6481, |
|
"eval_samples_per_second": 36.466, |
|
"eval_steps_per_second": 0.576, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0009693548387096774, |
|
"loss": 0.7465, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.000967741935483871, |
|
"loss": 0.6992, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0009661290322580646, |
|
"loss": 0.727, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0009645161290322581, |
|
"loss": 0.7012, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0009629032258064516, |
|
"loss": 0.6845, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0009612903225806452, |
|
"loss": 0.6944, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0009596774193548388, |
|
"loss": 0.682, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7497181510710259, |
|
"eval_loss": 0.6901255249977112, |
|
"eval_runtime": 28.1416, |
|
"eval_samples_per_second": 63.038, |
|
"eval_steps_per_second": 0.995, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.0009580645161290322, |
|
"loss": 0.6684, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0009564516129032258, |
|
"loss": 0.6545, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0009548387096774193, |
|
"loss": 0.657, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.000953225806451613, |
|
"loss": 0.6439, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0009516129032258065, |
|
"loss": 0.6307, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00095, |
|
"loss": 0.641, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.7570462232243518, |
|
"eval_loss": 0.6499941945075989, |
|
"eval_runtime": 28.131, |
|
"eval_samples_per_second": 63.062, |
|
"eval_steps_per_second": 0.995, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0009483870967741936, |
|
"loss": 0.6175, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0009467741935483871, |
|
"loss": 0.6139, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0009451612903225807, |
|
"loss": 0.6091, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0009435483870967742, |
|
"loss": 0.59, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0009419354838709677, |
|
"loss": 0.6034, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0009403225806451613, |
|
"loss": 0.569, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7638105975197295, |
|
"eval_loss": 0.645984947681427, |
|
"eval_runtime": 28.0361, |
|
"eval_samples_per_second": 63.276, |
|
"eval_steps_per_second": 0.999, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.0009387096774193549, |
|
"loss": 0.6036, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.0009370967741935485, |
|
"loss": 0.5876, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0009354838709677419, |
|
"loss": 0.5796, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0009338709677419355, |
|
"loss": 0.5397, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.000932258064516129, |
|
"loss": 0.5535, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.0009306451612903226, |
|
"loss": 0.5696, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.7795941375422774, |
|
"eval_loss": 0.5974171757698059, |
|
"eval_runtime": 28.0898, |
|
"eval_samples_per_second": 63.155, |
|
"eval_steps_per_second": 0.997, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.0009290322580645161, |
|
"loss": 0.556, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0009274193548387097, |
|
"loss": 0.5536, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0009258064516129033, |
|
"loss": 0.5497, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0009241935483870968, |
|
"loss": 0.5626, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.0009225806451612904, |
|
"loss": 0.5352, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.0009209677419354839, |
|
"loss": 0.5656, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0009193548387096774, |
|
"loss": 0.5411, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7795941375422774, |
|
"eval_loss": 0.607583224773407, |
|
"eval_runtime": 28.0445, |
|
"eval_samples_per_second": 63.257, |
|
"eval_steps_per_second": 0.998, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.0009177419354838709, |
|
"loss": 0.5269, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.0009161290322580645, |
|
"loss": 0.4964, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.0009145161290322582, |
|
"loss": 0.5051, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0009129032258064517, |
|
"loss": 0.4851, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0009112903225806452, |
|
"loss": 0.5234, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.0009096774193548387, |
|
"loss": 0.5015, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.7880496054114995, |
|
"eval_loss": 0.5632555484771729, |
|
"eval_runtime": 28.0469, |
|
"eval_samples_per_second": 63.251, |
|
"eval_steps_per_second": 0.998, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.0009080645161290323, |
|
"loss": 0.4887, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0009064516129032259, |
|
"loss": 0.4903, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.0009048387096774193, |
|
"loss": 0.4753, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0009032258064516129, |
|
"loss": 0.4818, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0009016129032258064, |
|
"loss": 0.4992, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 0.4999, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7891770011273957, |
|
"eval_loss": 0.5725666284561157, |
|
"eval_runtime": 28.1997, |
|
"eval_samples_per_second": 62.909, |
|
"eval_steps_per_second": 0.993, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.0008983870967741936, |
|
"loss": 0.492, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.0008967741935483871, |
|
"loss": 0.4499, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.0008951612903225806, |
|
"loss": 0.4381, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 0.0008935483870967742, |
|
"loss": 0.4625, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 0.0008919354838709678, |
|
"loss": 0.4654, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.0008903225806451613, |
|
"loss": 0.4569, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.7993235625704622, |
|
"eval_loss": 0.5587171316146851, |
|
"eval_runtime": 28.1042, |
|
"eval_samples_per_second": 63.122, |
|
"eval_steps_per_second": 0.996, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.0008887096774193548, |
|
"loss": 0.4954, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.0008870967741935484, |
|
"loss": 0.4597, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 0.000885483870967742, |
|
"loss": 0.4272, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.0008838709677419356, |
|
"loss": 0.4457, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.000882258064516129, |
|
"loss": 0.4292, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 0.0008806451612903226, |
|
"loss": 0.4469, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0008790322580645161, |
|
"loss": 0.4348, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7998872604284104, |
|
"eval_loss": 0.5712145566940308, |
|
"eval_runtime": 28.1914, |
|
"eval_samples_per_second": 62.927, |
|
"eval_steps_per_second": 0.993, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.0008774193548387097, |
|
"loss": 0.441, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.0008758064516129032, |
|
"loss": 0.3975, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.0008741935483870968, |
|
"loss": 0.4342, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 0.0008725806451612904, |
|
"loss": 0.427, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0008709677419354839, |
|
"loss": 0.4037, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.0008693548387096775, |
|
"loss": 0.4321, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.7970687711386697, |
|
"eval_loss": 0.5455379486083984, |
|
"eval_runtime": 28.1711, |
|
"eval_samples_per_second": 62.972, |
|
"eval_steps_per_second": 0.994, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 0.000867741935483871, |
|
"loss": 0.3868, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 0.0008661290322580645, |
|
"loss": 0.3746, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 0.000864516129032258, |
|
"loss": 0.4057, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.0008629032258064516, |
|
"loss": 0.4019, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.0008612903225806453, |
|
"loss": 0.3797, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 0.0008596774193548387, |
|
"loss": 0.4072, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8083427282976324, |
|
"eval_loss": 0.5409186482429504, |
|
"eval_runtime": 28.1106, |
|
"eval_samples_per_second": 63.108, |
|
"eval_steps_per_second": 0.996, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 0.0008580645161290323, |
|
"loss": 0.3634, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 0.0008564516129032258, |
|
"loss": 0.372, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0008548387096774194, |
|
"loss": 0.3916, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 0.0008532258064516129, |
|
"loss": 0.3917, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.0008516129032258064, |
|
"loss": 0.3985, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 0.00085, |
|
"loss": 0.3821, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8105975197294251, |
|
"eval_loss": 0.5463915467262268, |
|
"eval_runtime": 28.0622, |
|
"eval_samples_per_second": 63.217, |
|
"eval_steps_per_second": 0.998, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 0.0008483870967741936, |
|
"loss": 0.3922, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.0008467741935483872, |
|
"loss": 0.3777, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 0.0008451612903225807, |
|
"loss": 0.3864, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 0.0008435483870967742, |
|
"loss": 0.3871, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 0.0008419354838709677, |
|
"loss": 0.3632, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 0.0008403225806451613, |
|
"loss": 0.3622, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0008387096774193549, |
|
"loss": 0.376, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8151071025930101, |
|
"eval_loss": 0.5402312278747559, |
|
"eval_runtime": 27.9895, |
|
"eval_samples_per_second": 63.381, |
|
"eval_steps_per_second": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 0.0008370967741935483, |
|
"loss": 0.3304, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 0.0008354838709677419, |
|
"loss": 0.3486, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 0.0008338709677419355, |
|
"loss": 0.3666, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.0008322580645161291, |
|
"loss": 0.3372, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0008306451612903227, |
|
"loss": 0.3628, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0008290322580645161, |
|
"loss": 0.3427, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8167981961668546, |
|
"eval_loss": 0.5326964855194092, |
|
"eval_runtime": 28.178, |
|
"eval_samples_per_second": 62.957, |
|
"eval_steps_per_second": 0.994, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 0.0008274193548387097, |
|
"loss": 0.338, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 0.0008258064516129032, |
|
"loss": 0.3497, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 0.0008241935483870968, |
|
"loss": 0.3289, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0008225806451612903, |
|
"loss": 0.3288, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 0.0008209677419354839, |
|
"loss": 0.3521, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 0.0008193548387096774, |
|
"loss": 0.2938, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8100338218714769, |
|
"eval_loss": 0.5300917625427246, |
|
"eval_runtime": 28.1235, |
|
"eval_samples_per_second": 63.079, |
|
"eval_steps_per_second": 0.996, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 0.000817741935483871, |
|
"loss": 0.3332, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 0.0008161290322580646, |
|
"loss": 0.3199, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.0008145161290322581, |
|
"loss": 0.3049, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.0008129032258064516, |
|
"loss": 0.3194, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 0.0008112903225806451, |
|
"loss": 0.3367, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 0.0008096774193548387, |
|
"loss": 0.3116, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.8134160090191658, |
|
"eval_loss": 0.5456886887550354, |
|
"eval_runtime": 28.0707, |
|
"eval_samples_per_second": 63.198, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 0.0008080645161290324, |
|
"loss": 0.337, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0008064516129032258, |
|
"loss": 0.3245, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.0008048387096774194, |
|
"loss": 0.3222, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 0.0008032258064516129, |
|
"loss": 0.3146, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 0.0008016129032258065, |
|
"loss": 0.3091, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 0.0008, |
|
"loss": 0.3027, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0007983870967741935, |
|
"loss": 0.3231, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8156708004509583, |
|
"eval_loss": 0.5506556630134583, |
|
"eval_runtime": 28.0877, |
|
"eval_samples_per_second": 63.159, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.000796774193548387, |
|
"loss": 0.2817, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 0.0007951612903225807, |
|
"loss": 0.301, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 0.0007935483870967743, |
|
"loss": 0.3078, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 20.64, |
|
"learning_rate": 0.0007919354838709678, |
|
"loss": 0.3075, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 0.0007903225806451613, |
|
"loss": 0.316, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 0.0007887096774193548, |
|
"loss": 0.2942, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.8156708004509583, |
|
"eval_loss": 0.5306803584098816, |
|
"eval_runtime": 28.1394, |
|
"eval_samples_per_second": 63.043, |
|
"eval_steps_per_second": 0.995, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 0.0007870967741935484, |
|
"loss": 0.2789, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 0.000785483870967742, |
|
"loss": 0.282, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 0.0007838709677419354, |
|
"loss": 0.2837, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 0.0007822580645161291, |
|
"loss": 0.2955, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 0.0007806451612903226, |
|
"loss": 0.2847, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 0.0007790322580645162, |
|
"loss": 0.299, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8320180383314544, |
|
"eval_loss": 0.517823338508606, |
|
"eval_runtime": 28.1763, |
|
"eval_samples_per_second": 62.961, |
|
"eval_steps_per_second": 0.994, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"learning_rate": 0.0007774193548387097, |
|
"loss": 0.2907, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 0.0007758064516129032, |
|
"loss": 0.2688, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 0.0007741935483870968, |
|
"loss": 0.2671, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.56, |
|
"learning_rate": 0.0007725806451612903, |
|
"loss": 0.2757, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 22.72, |
|
"learning_rate": 0.0007709677419354839, |
|
"loss": 0.2682, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 0.0007693548387096775, |
|
"loss": 0.2821, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.8241262683201804, |
|
"eval_loss": 0.5435750484466553, |
|
"eval_runtime": 28.0623, |
|
"eval_samples_per_second": 63.217, |
|
"eval_steps_per_second": 0.998, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 0.000767741935483871, |
|
"loss": 0.2823, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 0.0007661290322580645, |
|
"loss": 0.2778, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 0.0007645161290322581, |
|
"loss": 0.2605, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 23.52, |
|
"learning_rate": 0.0007629032258064517, |
|
"loss": 0.2503, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 0.0007612903225806451, |
|
"loss": 0.2849, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 23.84, |
|
"learning_rate": 0.0007596774193548387, |
|
"loss": 0.2728, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.0007580645161290322, |
|
"loss": 0.2576, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.822435174746336, |
|
"eval_loss": 0.5331800580024719, |
|
"eval_runtime": 28.0307, |
|
"eval_samples_per_second": 63.288, |
|
"eval_steps_per_second": 0.999, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"learning_rate": 0.0007564516129032259, |
|
"loss": 0.2493, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 0.0007548387096774194, |
|
"loss": 0.2422, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 0.0007532258064516129, |
|
"loss": 0.2671, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 0.0007516129032258065, |
|
"loss": 0.2586, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 0.00075, |
|
"loss": 0.2563, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 0.0007483870967741936, |
|
"loss": 0.2728, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.8314543404735062, |
|
"eval_loss": 0.5400711894035339, |
|
"eval_runtime": 28.1644, |
|
"eval_samples_per_second": 62.987, |
|
"eval_steps_per_second": 0.994, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"learning_rate": 0.0007467741935483871, |
|
"loss": 0.2507, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 25.28, |
|
"learning_rate": 0.0007451612903225806, |
|
"loss": 0.2245, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 25.44, |
|
"learning_rate": 0.0007435483870967741, |
|
"loss": 0.242, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.0007419354838709678, |
|
"loss": 0.2412, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"learning_rate": 0.0007403225806451614, |
|
"loss": 0.2357, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 0.0007387096774193549, |
|
"loss": 0.2383, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8342728297632469, |
|
"eval_loss": 0.5709508657455444, |
|
"eval_runtime": 28.0824, |
|
"eval_samples_per_second": 63.171, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 26.08, |
|
"learning_rate": 0.0007370967741935484, |
|
"loss": 0.213, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 0.0007354838709677419, |
|
"loss": 0.2228, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 0.0007338709677419355, |
|
"loss": 0.2234, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"learning_rate": 0.000732258064516129, |
|
"loss": 0.2327, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 0.0007306451612903225, |
|
"loss": 0.2474, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 0.0007290322580645162, |
|
"loss": 0.2504, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.8325817361894025, |
|
"eval_loss": 0.5497803092002869, |
|
"eval_runtime": 28.0865, |
|
"eval_samples_per_second": 63.162, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 0.0007274193548387097, |
|
"loss": 0.2576, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 0.0007258064516129033, |
|
"loss": 0.2213, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 0.0007241935483870968, |
|
"loss": 0.2332, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"learning_rate": 0.0007225806451612903, |
|
"loss": 0.2632, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 0.0007209677419354838, |
|
"loss": 0.2209, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 0.0007193548387096774, |
|
"loss": 0.2336, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.000717741935483871, |
|
"loss": 0.2474, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8348365276211951, |
|
"eval_loss": 0.5372113585472107, |
|
"eval_runtime": 28.1229, |
|
"eval_samples_per_second": 63.08, |
|
"eval_steps_per_second": 0.996, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 0.0007161290322580646, |
|
"loss": 0.2054, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"learning_rate": 0.0007145161290322581, |
|
"loss": 0.2309, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"learning_rate": 0.0007129032258064516, |
|
"loss": 0.2155, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 0.0007112903225806452, |
|
"loss": 0.2158, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 0.0007096774193548388, |
|
"loss": 0.2239, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 0.0007080645161290322, |
|
"loss": 0.2156, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.830890642615558, |
|
"eval_loss": 0.562776505947113, |
|
"eval_runtime": 28.1235, |
|
"eval_samples_per_second": 63.079, |
|
"eval_steps_per_second": 0.996, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 0.0007064516129032258, |
|
"loss": 0.2085, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 0.0007048387096774193, |
|
"loss": 0.1977, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 0.000703225806451613, |
|
"loss": 0.2147, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 0.0007016129032258065, |
|
"loss": 0.2381, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"learning_rate": 0.0007, |
|
"loss": 0.229, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 29.92, |
|
"learning_rate": 0.0006983870967741936, |
|
"loss": 0.2035, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8376550169109357, |
|
"eval_loss": 0.5537896752357483, |
|
"eval_runtime": 28.2907, |
|
"eval_samples_per_second": 62.706, |
|
"eval_steps_per_second": 0.99, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 30.08, |
|
"learning_rate": 0.0006967741935483871, |
|
"loss": 0.2102, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 30.24, |
|
"learning_rate": 0.0006951612903225807, |
|
"loss": 0.1885, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 0.0006935483870967742, |
|
"loss": 0.2167, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 30.56, |
|
"learning_rate": 0.0006919354838709677, |
|
"loss": 0.1848, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 30.72, |
|
"learning_rate": 0.0006903225806451613, |
|
"loss": 0.2228, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 30.88, |
|
"learning_rate": 0.0006887096774193549, |
|
"loss": 0.2043, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.8416009019165727, |
|
"eval_loss": 0.5484991073608398, |
|
"eval_runtime": 28.1175, |
|
"eval_samples_per_second": 63.092, |
|
"eval_steps_per_second": 0.996, |
|
"step": 1937 |
|
}, |
|
{ |
|
"epoch": 31.04, |
|
"learning_rate": 0.0006870967741935485, |
|
"loss": 0.1918, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 0.0006854838709677419, |
|
"loss": 0.1832, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 0.0006840322580645162, |
|
"loss": 0.1831, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 31.52, |
|
"learning_rate": 0.0006824193548387096, |
|
"loss": 0.1991, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 31.68, |
|
"learning_rate": 0.0006808064516129033, |
|
"loss": 0.2157, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 31.84, |
|
"learning_rate": 0.0006791935483870968, |
|
"loss": 0.2158, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.0006775806451612904, |
|
"loss": 0.1964, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8359639233370914, |
|
"eval_loss": 0.5694667100906372, |
|
"eval_runtime": 28.2192, |
|
"eval_samples_per_second": 62.865, |
|
"eval_steps_per_second": 0.992, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.16, |
|
"learning_rate": 0.0006759677419354839, |
|
"loss": 0.193, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 32.32, |
|
"learning_rate": 0.0006743548387096774, |
|
"loss": 0.1981, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"learning_rate": 0.000672741935483871, |
|
"loss": 0.2037, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 0.0006711290322580645, |
|
"loss": 0.2001, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 0.0006695161290322581, |
|
"loss": 0.2013, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"learning_rate": 0.0006679032258064516, |
|
"loss": 0.2086, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.8438556933483653, |
|
"eval_loss": 0.5628190040588379, |
|
"eval_runtime": 28.2509, |
|
"eval_samples_per_second": 62.794, |
|
"eval_steps_per_second": 0.991, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 33.12, |
|
"learning_rate": 0.0006662903225806452, |
|
"loss": 0.1837, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 33.28, |
|
"learning_rate": 0.0006646774193548387, |
|
"loss": 0.1725, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 0.0006630645161290323, |
|
"loss": 0.1834, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 0.0006614516129032259, |
|
"loss": 0.1934, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 0.0006598387096774193, |
|
"loss": 0.2077, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 0.0006582258064516129, |
|
"loss": 0.1893, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8399098083427283, |
|
"eval_loss": 0.5582923293113708, |
|
"eval_runtime": 28.2188, |
|
"eval_samples_per_second": 62.866, |
|
"eval_steps_per_second": 0.992, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 34.08, |
|
"learning_rate": 0.0006566129032258064, |
|
"loss": 0.1985, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 34.24, |
|
"learning_rate": 0.0006550000000000001, |
|
"loss": 0.2009, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"learning_rate": 0.0006533870967741936, |
|
"loss": 0.1944, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 34.56, |
|
"learning_rate": 0.0006517741935483871, |
|
"loss": 0.1963, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 34.72, |
|
"learning_rate": 0.0006501612903225807, |
|
"loss": 0.1798, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"learning_rate": 0.0006485483870967742, |
|
"loss": 0.1857, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.8387824126268321, |
|
"eval_loss": 0.5524682998657227, |
|
"eval_runtime": 28.2513, |
|
"eval_samples_per_second": 62.793, |
|
"eval_steps_per_second": 0.991, |
|
"step": 2187 |
|
}, |
|
{ |
|
"epoch": 35.04, |
|
"learning_rate": 0.0006469354838709678, |
|
"loss": 0.1827, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 0.0006453225806451612, |
|
"loss": 0.1675, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.36, |
|
"learning_rate": 0.0006437096774193548, |
|
"loss": 0.1481, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 0.0006420967741935483, |
|
"loss": 0.1806, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 35.68, |
|
"learning_rate": 0.000640483870967742, |
|
"loss": 0.2022, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 35.84, |
|
"learning_rate": 0.0006388709677419356, |
|
"loss": 0.203, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.000637258064516129, |
|
"loss": 0.1811, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8444193912063134, |
|
"eval_loss": 0.5287083387374878, |
|
"eval_runtime": 28.086, |
|
"eval_samples_per_second": 63.163, |
|
"eval_steps_per_second": 0.997, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 36.16, |
|
"learning_rate": 0.0006356451612903226, |
|
"loss": 0.1869, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"learning_rate": 0.0006340322580645161, |
|
"loss": 0.1836, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 36.48, |
|
"learning_rate": 0.0006324193548387097, |
|
"loss": 0.1816, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 0.0006308064516129032, |
|
"loss": 0.1786, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 0.0006291935483870967, |
|
"loss": 0.1749, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 0.0006275806451612904, |
|
"loss": 0.196, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.8416009019165727, |
|
"eval_loss": 0.5324433445930481, |
|
"eval_runtime": 28.2732, |
|
"eval_samples_per_second": 62.745, |
|
"eval_steps_per_second": 0.99, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 37.12, |
|
"learning_rate": 0.0006259677419354839, |
|
"loss": 0.1595, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 37.28, |
|
"learning_rate": 0.0006243548387096775, |
|
"loss": 0.1769, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 37.44, |
|
"learning_rate": 0.000622741935483871, |
|
"loss": 0.1692, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"learning_rate": 0.0006211290322580645, |
|
"loss": 0.1621, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 37.76, |
|
"learning_rate": 0.000619516129032258, |
|
"loss": 0.1726, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 37.92, |
|
"learning_rate": 0.0006179032258064516, |
|
"loss": 0.1644, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8472378804960541, |
|
"eval_loss": 0.5432766675949097, |
|
"eval_runtime": 28.0779, |
|
"eval_samples_per_second": 63.181, |
|
"eval_steps_per_second": 0.997, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"learning_rate": 0.0006162903225806452, |
|
"loss": 0.167, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 38.24, |
|
"learning_rate": 0.0006146774193548387, |
|
"loss": 0.1735, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 0.0006130645161290323, |
|
"loss": 0.1682, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 38.56, |
|
"learning_rate": 0.0006114516129032258, |
|
"loss": 0.1452, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 38.72, |
|
"learning_rate": 0.0006098387096774194, |
|
"loss": 0.1612, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 38.88, |
|
"learning_rate": 0.000608225806451613, |
|
"loss": 0.1754, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.8478015783540023, |
|
"eval_loss": 0.5511437058448792, |
|
"eval_runtime": 28.1474, |
|
"eval_samples_per_second": 63.025, |
|
"eval_steps_per_second": 0.995, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 39.04, |
|
"learning_rate": 0.0006066129032258064, |
|
"loss": 0.1515, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"learning_rate": 0.000605, |
|
"loss": 0.1515, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 0.0006033870967741935, |
|
"loss": 0.1491, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"learning_rate": 0.0006017741935483872, |
|
"loss": 0.1682, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 39.68, |
|
"learning_rate": 0.0006001612903225807, |
|
"loss": 0.1631, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 39.84, |
|
"learning_rate": 0.0005985483870967742, |
|
"loss": 0.1862, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005969354838709678, |
|
"loss": 0.1521, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.846674182638106, |
|
"eval_loss": 0.5625754594802856, |
|
"eval_runtime": 28.1213, |
|
"eval_samples_per_second": 63.084, |
|
"eval_steps_per_second": 0.996, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 40.16, |
|
"learning_rate": 0.0005953225806451613, |
|
"loss": 0.1711, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 0.0005937096774193549, |
|
"loss": 0.1473, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 40.48, |
|
"learning_rate": 0.0005920967741935483, |
|
"loss": 0.1683, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 40.64, |
|
"learning_rate": 0.0005904838709677419, |
|
"loss": 0.1459, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 0.0005888709677419355, |
|
"loss": 0.1506, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 40.96, |
|
"learning_rate": 0.0005872580645161291, |
|
"loss": 0.1536, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_accuracy": 0.8500563697857948, |
|
"eval_loss": 0.5633701086044312, |
|
"eval_runtime": 28.1665, |
|
"eval_samples_per_second": 62.983, |
|
"eval_steps_per_second": 0.994, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 0.0005856451612903227, |
|
"loss": 0.1767, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 41.28, |
|
"learning_rate": 0.0005840322580645161, |
|
"loss": 0.1589, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 41.44, |
|
"learning_rate": 0.0005824193548387097, |
|
"loss": 0.1693, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 0.0005808064516129032, |
|
"loss": 0.1604, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 41.76, |
|
"learning_rate": 0.0005791935483870968, |
|
"loss": 0.1625, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 41.92, |
|
"learning_rate": 0.0005775806451612903, |
|
"loss": 0.1399, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8596392333709132, |
|
"eval_loss": 0.5802281498908997, |
|
"eval_runtime": 28.0866, |
|
"eval_samples_per_second": 63.162, |
|
"eval_steps_per_second": 0.997, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 42.08, |
|
"learning_rate": 0.0005759677419354839, |
|
"loss": 0.1367, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 42.24, |
|
"learning_rate": 0.0005743548387096775, |
|
"loss": 0.1555, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 0.000572741935483871, |
|
"loss": 0.1521, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 42.56, |
|
"learning_rate": 0.0005711290322580646, |
|
"loss": 0.1523, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 42.72, |
|
"learning_rate": 0.000569516129032258, |
|
"loss": 0.1456, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 42.88, |
|
"learning_rate": 0.0005679032258064516, |
|
"loss": 0.1589, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"eval_accuracy": 0.8297632468996617, |
|
"eval_loss": 0.6153758764266968, |
|
"eval_runtime": 28.0406, |
|
"eval_samples_per_second": 63.265, |
|
"eval_steps_per_second": 0.999, |
|
"step": 2687 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"learning_rate": 0.0005662903225806451, |
|
"loss": 0.155, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 0.0005646774193548387, |
|
"loss": 0.1505, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 0.0005630645161290324, |
|
"loss": 0.1443, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 43.52, |
|
"learning_rate": 0.0005614516129032258, |
|
"loss": 0.1616, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 43.68, |
|
"learning_rate": 0.0005598387096774194, |
|
"loss": 0.1525, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 43.84, |
|
"learning_rate": 0.0005582258064516129, |
|
"loss": 0.1471, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.0005566129032258065, |
|
"loss": 0.1575, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8523111612175873, |
|
"eval_loss": 0.5629739761352539, |
|
"eval_runtime": 28.1428, |
|
"eval_samples_per_second": 63.036, |
|
"eval_steps_per_second": 0.995, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"learning_rate": 0.000555, |
|
"loss": 0.1323, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 44.32, |
|
"learning_rate": 0.0005533870967741935, |
|
"loss": 0.1582, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 44.48, |
|
"learning_rate": 0.000551774193548387, |
|
"loss": 0.1531, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 44.64, |
|
"learning_rate": 0.0005501612903225806, |
|
"loss": 0.1552, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 0.0005485483870967743, |
|
"loss": 0.1419, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"learning_rate": 0.0005469354838709677, |
|
"loss": 0.1523, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.8489289740698985, |
|
"eval_loss": 0.5822137594223022, |
|
"eval_runtime": 28.2989, |
|
"eval_samples_per_second": 62.688, |
|
"eval_steps_per_second": 0.989, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 45.12, |
|
"learning_rate": 0.0005453225806451613, |
|
"loss": 0.1371, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 0.0005437096774193548, |
|
"loss": 0.1283, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 45.44, |
|
"learning_rate": 0.0005420967741935484, |
|
"loss": 0.1446, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 0.000540483870967742, |
|
"loss": 0.1482, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 0.0005388709677419354, |
|
"loss": 0.1473, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 45.92, |
|
"learning_rate": 0.000537258064516129, |
|
"loss": 0.1457, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8528748590755355, |
|
"eval_loss": 0.5841559767723083, |
|
"eval_runtime": 28.078, |
|
"eval_samples_per_second": 63.181, |
|
"eval_steps_per_second": 0.997, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 46.08, |
|
"learning_rate": 0.0005356451612903226, |
|
"loss": 0.1491, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 46.24, |
|
"learning_rate": 0.0005340322580645162, |
|
"loss": 0.1229, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 0.0005324193548387098, |
|
"loss": 0.1407, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 46.56, |
|
"learning_rate": 0.0005308064516129032, |
|
"loss": 0.176, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 46.72, |
|
"learning_rate": 0.0005291935483870968, |
|
"loss": 0.1251, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 46.88, |
|
"learning_rate": 0.0005275806451612903, |
|
"loss": 0.1326, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_accuracy": 0.855129650507328, |
|
"eval_loss": 0.5729023218154907, |
|
"eval_runtime": 28.0033, |
|
"eval_samples_per_second": 63.35, |
|
"eval_steps_per_second": 1.0, |
|
"step": 2937 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"learning_rate": 0.0005259677419354839, |
|
"loss": 0.1496, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 0.0005243548387096774, |
|
"loss": 0.1367, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 47.36, |
|
"learning_rate": 0.000522741935483871, |
|
"loss": 0.1465, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 47.52, |
|
"learning_rate": 0.0005211290322580646, |
|
"loss": 0.1378, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 47.68, |
|
"learning_rate": 0.0005195161290322581, |
|
"loss": 0.1484, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 47.84, |
|
"learning_rate": 0.0005179032258064517, |
|
"loss": 0.1522, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.0005162903225806451, |
|
"loss": 0.1319, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.85456595264938, |
|
"eval_loss": 0.5705844759941101, |
|
"eval_runtime": 28.1635, |
|
"eval_samples_per_second": 62.989, |
|
"eval_steps_per_second": 0.994, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.16, |
|
"learning_rate": 0.0005146774193548387, |
|
"loss": 0.1512, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 48.32, |
|
"learning_rate": 0.0005130645161290322, |
|
"loss": 0.1257, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"learning_rate": 0.0005114516129032258, |
|
"loss": 0.1271, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 48.64, |
|
"learning_rate": 0.0005098387096774195, |
|
"loss": 0.14, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"learning_rate": 0.0005082258064516129, |
|
"loss": 0.1365, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"learning_rate": 0.0005066129032258065, |
|
"loss": 0.131, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_accuracy": 0.855129650507328, |
|
"eval_loss": 0.5893039703369141, |
|
"eval_runtime": 28.1829, |
|
"eval_samples_per_second": 62.946, |
|
"eval_steps_per_second": 0.994, |
|
"step": 3062 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"learning_rate": 0.000505, |
|
"loss": 0.1177, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 49.28, |
|
"learning_rate": 0.0005033870967741936, |
|
"loss": 0.1575, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 49.44, |
|
"learning_rate": 0.0005017741935483871, |
|
"loss": 0.124, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 0.0005001612903225806, |
|
"loss": 0.1442, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 49.76, |
|
"learning_rate": 0.0004985483870967741, |
|
"loss": 0.1365, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 49.92, |
|
"learning_rate": 0.0004969354838709678, |
|
"loss": 0.1588, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8461104847801578, |
|
"eval_loss": 0.5694898962974548, |
|
"eval_runtime": 28.2133, |
|
"eval_samples_per_second": 62.878, |
|
"eval_steps_per_second": 0.992, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 50.08, |
|
"learning_rate": 0.0004953225806451613, |
|
"loss": 0.1235, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 50.24, |
|
"learning_rate": 0.0004937096774193548, |
|
"loss": 0.1229, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"learning_rate": 0.0004920967741935484, |
|
"loss": 0.1534, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 50.56, |
|
"learning_rate": 0.0004904838709677419, |
|
"loss": 0.139, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"learning_rate": 0.0004888709677419355, |
|
"loss": 0.1338, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 50.88, |
|
"learning_rate": 0.00048725806451612905, |
|
"loss": 0.1297, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"eval_accuracy": 0.8455467869222097, |
|
"eval_loss": 0.5901888608932495, |
|
"eval_runtime": 28.0563, |
|
"eval_samples_per_second": 63.23, |
|
"eval_steps_per_second": 0.998, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 51.04, |
|
"learning_rate": 0.0004856451612903226, |
|
"loss": 0.1162, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 0.0004840322580645161, |
|
"loss": 0.1169, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 51.36, |
|
"learning_rate": 0.0004824193548387097, |
|
"loss": 0.1517, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 51.52, |
|
"learning_rate": 0.0004808064516129033, |
|
"loss": 0.1246, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 51.68, |
|
"learning_rate": 0.0004791935483870968, |
|
"loss": 0.1336, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 51.84, |
|
"learning_rate": 0.00047758064516129035, |
|
"loss": 0.1459, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 0.00047596774193548385, |
|
"loss": 0.1603, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8449830890642616, |
|
"eval_loss": 0.5921454429626465, |
|
"eval_runtime": 28.2225, |
|
"eval_samples_per_second": 62.858, |
|
"eval_steps_per_second": 0.992, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 52.16, |
|
"learning_rate": 0.00047435483870967747, |
|
"loss": 0.1322, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 52.32, |
|
"learning_rate": 0.00047274193548387097, |
|
"loss": 0.1273, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 52.48, |
|
"learning_rate": 0.00047112903225806453, |
|
"loss": 0.1303, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 52.64, |
|
"learning_rate": 0.0004695161290322581, |
|
"loss": 0.1255, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 0.0004679032258064516, |
|
"loss": 0.1279, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 52.96, |
|
"learning_rate": 0.0004662903225806452, |
|
"loss": 0.108, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_accuracy": 0.8478015783540023, |
|
"eval_loss": 0.614061176776886, |
|
"eval_runtime": 28.139, |
|
"eval_samples_per_second": 63.044, |
|
"eval_steps_per_second": 0.995, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"learning_rate": 0.0004646774193548387, |
|
"loss": 0.1431, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"learning_rate": 0.00046306451612903226, |
|
"loss": 0.1216, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 53.44, |
|
"learning_rate": 0.00046145161290322577, |
|
"loss": 0.1322, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"learning_rate": 0.0004598387096774194, |
|
"loss": 0.1063, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 53.76, |
|
"learning_rate": 0.00045822580645161294, |
|
"loss": 0.1464, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 53.92, |
|
"learning_rate": 0.00045661290322580644, |
|
"loss": 0.1483, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.850620067643743, |
|
"eval_loss": 0.5862211585044861, |
|
"eval_runtime": 28.0887, |
|
"eval_samples_per_second": 63.157, |
|
"eval_steps_per_second": 0.997, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 54.08, |
|
"learning_rate": 0.000455, |
|
"loss": 0.1393, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"learning_rate": 0.00045338709677419356, |
|
"loss": 0.12, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 0.0004517741935483871, |
|
"loss": 0.1374, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 54.56, |
|
"learning_rate": 0.0004501612903225806, |
|
"loss": 0.1226, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 0.0004485483870967742, |
|
"loss": 0.1384, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 54.88, |
|
"learning_rate": 0.0004469354838709678, |
|
"loss": 0.1191, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"eval_accuracy": 0.8455467869222097, |
|
"eval_loss": 0.570696234703064, |
|
"eval_runtime": 28.1609, |
|
"eval_samples_per_second": 62.995, |
|
"eval_steps_per_second": 0.994, |
|
"step": 3437 |
|
}, |
|
{ |
|
"epoch": 55.04, |
|
"learning_rate": 0.0004453225806451613, |
|
"loss": 0.1202, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"learning_rate": 0.00044370967741935485, |
|
"loss": 0.1174, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 55.36, |
|
"learning_rate": 0.00044209677419354836, |
|
"loss": 0.128, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 55.52, |
|
"learning_rate": 0.00044048387096774197, |
|
"loss": 0.1255, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 55.68, |
|
"learning_rate": 0.0004388709677419355, |
|
"loss": 0.1271, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 55.84, |
|
"learning_rate": 0.00043725806451612903, |
|
"loss": 0.1212, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.0004356451612903226, |
|
"loss": 0.1148, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8635851183765502, |
|
"eval_loss": 0.5643802285194397, |
|
"eval_runtime": 28.141, |
|
"eval_samples_per_second": 63.04, |
|
"eval_steps_per_second": 0.995, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 56.16, |
|
"learning_rate": 0.00043403225806451615, |
|
"loss": 0.1241, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 56.32, |
|
"learning_rate": 0.0004324193548387097, |
|
"loss": 0.1207, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 56.48, |
|
"learning_rate": 0.0004308064516129032, |
|
"loss": 0.1255, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 56.64, |
|
"learning_rate": 0.00042919354838709677, |
|
"loss": 0.1303, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"learning_rate": 0.0004275806451612903, |
|
"loss": 0.1107, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"learning_rate": 0.0004259677419354839, |
|
"loss": 0.1052, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_accuracy": 0.8602029312288614, |
|
"eval_loss": 0.5903654098510742, |
|
"eval_runtime": 28.2281, |
|
"eval_samples_per_second": 62.845, |
|
"eval_steps_per_second": 0.992, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 57.12, |
|
"learning_rate": 0.00042435483870967744, |
|
"loss": 0.1367, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 57.28, |
|
"learning_rate": 0.00042274193548387095, |
|
"loss": 0.1064, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 57.44, |
|
"learning_rate": 0.00042112903225806456, |
|
"loss": 0.1231, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 0.00041951612903225806, |
|
"loss": 0.1355, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 57.76, |
|
"learning_rate": 0.0004179032258064516, |
|
"loss": 0.1231, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 57.92, |
|
"learning_rate": 0.0004162903225806452, |
|
"loss": 0.1307, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8489289740698985, |
|
"eval_loss": 0.5817931890487671, |
|
"eval_runtime": 28.0504, |
|
"eval_samples_per_second": 63.243, |
|
"eval_steps_per_second": 0.998, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 58.08, |
|
"learning_rate": 0.00041467741935483874, |
|
"loss": 0.1187, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"learning_rate": 0.0004130645161290323, |
|
"loss": 0.1284, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"learning_rate": 0.0004114516129032258, |
|
"loss": 0.1093, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 58.56, |
|
"learning_rate": 0.00040983870967741936, |
|
"loss": 0.114, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 58.72, |
|
"learning_rate": 0.0004082258064516129, |
|
"loss": 0.1088, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 58.88, |
|
"learning_rate": 0.0004066129032258065, |
|
"loss": 0.1188, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 58.99, |
|
"eval_accuracy": 0.8489289740698985, |
|
"eval_loss": 0.5897976756095886, |
|
"eval_runtime": 27.9744, |
|
"eval_samples_per_second": 63.415, |
|
"eval_steps_per_second": 1.001, |
|
"step": 3687 |
|
}, |
|
{ |
|
"epoch": 59.04, |
|
"learning_rate": 0.00040500000000000003, |
|
"loss": 0.1092, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 0.00040338709677419354, |
|
"loss": 0.109, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 59.36, |
|
"learning_rate": 0.00040177419354838715, |
|
"loss": 0.1137, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 59.52, |
|
"learning_rate": 0.00040016129032258065, |
|
"loss": 0.1177, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 59.68, |
|
"learning_rate": 0.0003985483870967742, |
|
"loss": 0.124, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 59.84, |
|
"learning_rate": 0.0003969354838709677, |
|
"loss": 0.106, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0003953225806451613, |
|
"loss": 0.1114, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8517474633596392, |
|
"eval_loss": 0.6034874320030212, |
|
"eval_runtime": 28.0461, |
|
"eval_samples_per_second": 63.253, |
|
"eval_steps_per_second": 0.998, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 60.16, |
|
"learning_rate": 0.0003937096774193549, |
|
"loss": 0.1381, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 60.32, |
|
"learning_rate": 0.0003920967741935484, |
|
"loss": 0.1086, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 60.48, |
|
"learning_rate": 0.00039048387096774195, |
|
"loss": 0.1097, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 60.64, |
|
"learning_rate": 0.00038887096774193545, |
|
"loss": 0.1156, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 0.00038725806451612906, |
|
"loss": 0.1135, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 60.96, |
|
"learning_rate": 0.00038564516129032257, |
|
"loss": 0.1055, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_accuracy": 0.8534385569334837, |
|
"eval_loss": 0.6122295260429382, |
|
"eval_runtime": 28.3492, |
|
"eval_samples_per_second": 62.577, |
|
"eval_steps_per_second": 0.988, |
|
"step": 3812 |
|
}, |
|
{ |
|
"epoch": 61.12, |
|
"learning_rate": 0.0003840322580645161, |
|
"loss": 0.1219, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 61.28, |
|
"learning_rate": 0.0003824193548387097, |
|
"loss": 0.1206, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 61.44, |
|
"learning_rate": 0.00038080645161290324, |
|
"loss": 0.1059, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"learning_rate": 0.0003791935483870968, |
|
"loss": 0.1175, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 61.76, |
|
"learning_rate": 0.0003775806451612903, |
|
"loss": 0.1152, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 61.92, |
|
"learning_rate": 0.00037596774193548386, |
|
"loss": 0.1326, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8540022547914318, |
|
"eval_loss": 0.6128527522087097, |
|
"eval_runtime": 28.0856, |
|
"eval_samples_per_second": 63.164, |
|
"eval_steps_per_second": 0.997, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 62.08, |
|
"learning_rate": 0.0003743548387096774, |
|
"loss": 0.0893, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 62.24, |
|
"learning_rate": 0.000372741935483871, |
|
"loss": 0.1252, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 0.00037112903225806454, |
|
"loss": 0.1166, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 62.56, |
|
"learning_rate": 0.00036951612903225804, |
|
"loss": 0.1182, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 62.72, |
|
"learning_rate": 0.00036790322580645165, |
|
"loss": 0.1234, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 62.88, |
|
"learning_rate": 0.00036629032258064516, |
|
"loss": 0.118, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"eval_accuracy": 0.8528748590755355, |
|
"eval_loss": 0.5965889096260071, |
|
"eval_runtime": 28.2787, |
|
"eval_samples_per_second": 62.733, |
|
"eval_steps_per_second": 0.99, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 63.04, |
|
"learning_rate": 0.0003646774193548387, |
|
"loss": 0.1149, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"learning_rate": 0.0003630645161290322, |
|
"loss": 0.1133, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 63.36, |
|
"learning_rate": 0.00036145161290322583, |
|
"loss": 0.0971, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 63.52, |
|
"learning_rate": 0.0003598387096774194, |
|
"loss": 0.1197, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 63.68, |
|
"learning_rate": 0.0003582258064516129, |
|
"loss": 0.1148, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 63.84, |
|
"learning_rate": 0.00035661290322580645, |
|
"loss": 0.1074, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.000355, |
|
"loss": 0.0982, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.85456595264938, |
|
"eval_loss": 0.6205869317054749, |
|
"eval_runtime": 28.2305, |
|
"eval_samples_per_second": 62.84, |
|
"eval_steps_per_second": 0.992, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.16, |
|
"learning_rate": 0.00035338709677419357, |
|
"loss": 0.1069, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 64.32, |
|
"learning_rate": 0.00035177419354838707, |
|
"loss": 0.1083, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 64.48, |
|
"learning_rate": 0.00035016129032258063, |
|
"loss": 0.1225, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 64.64, |
|
"learning_rate": 0.00034854838709677424, |
|
"loss": 0.1105, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 0.00034693548387096775, |
|
"loss": 0.1258, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 64.96, |
|
"learning_rate": 0.0003453225806451613, |
|
"loss": 0.1021, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_accuracy": 0.855129650507328, |
|
"eval_loss": 0.6053165197372437, |
|
"eval_runtime": 28.1948, |
|
"eval_samples_per_second": 62.919, |
|
"eval_steps_per_second": 0.993, |
|
"step": 4062 |
|
}, |
|
{ |
|
"epoch": 65.12, |
|
"learning_rate": 0.0003437096774193548, |
|
"loss": 0.1148, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 65.28, |
|
"learning_rate": 0.0003420967741935484, |
|
"loss": 0.1054, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 65.44, |
|
"learning_rate": 0.0003404838709677419, |
|
"loss": 0.1057, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 0.0003388709677419355, |
|
"loss": 0.0942, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 65.76, |
|
"learning_rate": 0.00033725806451612904, |
|
"loss": 0.1077, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 65.92, |
|
"learning_rate": 0.0003356451612903226, |
|
"loss": 0.0988, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8494926719278467, |
|
"eval_loss": 0.6225422024726868, |
|
"eval_runtime": 28.2781, |
|
"eval_samples_per_second": 62.734, |
|
"eval_steps_per_second": 0.99, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 66.08, |
|
"learning_rate": 0.00033403225806451616, |
|
"loss": 0.0974, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 66.24, |
|
"learning_rate": 0.00033241935483870966, |
|
"loss": 0.0966, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"learning_rate": 0.0003308064516129032, |
|
"loss": 0.1017, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 66.56, |
|
"learning_rate": 0.00032919354838709683, |
|
"loss": 0.1116, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 66.72, |
|
"learning_rate": 0.00032758064516129034, |
|
"loss": 0.1077, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 66.88, |
|
"learning_rate": 0.0003259677419354839, |
|
"loss": 0.102, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"eval_accuracy": 0.8579481397970687, |
|
"eval_loss": 0.6113544702529907, |
|
"eval_runtime": 28.1113, |
|
"eval_samples_per_second": 63.106, |
|
"eval_steps_per_second": 0.996, |
|
"step": 4187 |
|
}, |
|
{ |
|
"epoch": 67.04, |
|
"learning_rate": 0.0003243548387096774, |
|
"loss": 0.1125, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 0.000322741935483871, |
|
"loss": 0.1098, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 67.36, |
|
"learning_rate": 0.0003211290322580645, |
|
"loss": 0.1023, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 67.52, |
|
"learning_rate": 0.00031951612903225807, |
|
"loss": 0.0989, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 67.68, |
|
"learning_rate": 0.00031790322580645163, |
|
"loss": 0.1012, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 67.84, |
|
"learning_rate": 0.0003162903225806452, |
|
"loss": 0.1014, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 0.00031467741935483875, |
|
"loss": 0.108, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8461104847801578, |
|
"eval_loss": 0.6544247269630432, |
|
"eval_runtime": 28.177, |
|
"eval_samples_per_second": 62.959, |
|
"eval_steps_per_second": 0.994, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 68.16, |
|
"learning_rate": 0.00031306451612903225, |
|
"loss": 0.1138, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 68.32, |
|
"learning_rate": 0.0003114516129032258, |
|
"loss": 0.0909, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 68.48, |
|
"learning_rate": 0.0003098387096774193, |
|
"loss": 0.0898, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 68.64, |
|
"learning_rate": 0.0003082258064516129, |
|
"loss": 0.1027, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 0.0003066129032258065, |
|
"loss": 0.1173, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"learning_rate": 0.000305, |
|
"loss": 0.0959, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_accuracy": 0.846674182638106, |
|
"eval_loss": 0.647339403629303, |
|
"eval_runtime": 28.0484, |
|
"eval_samples_per_second": 63.248, |
|
"eval_steps_per_second": 0.998, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 69.12, |
|
"learning_rate": 0.00030338709677419354, |
|
"loss": 0.1037, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 69.28, |
|
"learning_rate": 0.0003017741935483871, |
|
"loss": 0.0928, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 69.44, |
|
"learning_rate": 0.00030016129032258066, |
|
"loss": 0.1067, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"learning_rate": 0.00029854838709677417, |
|
"loss": 0.108, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 69.76, |
|
"learning_rate": 0.0002969354838709677, |
|
"loss": 0.0749, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 69.92, |
|
"learning_rate": 0.00029532258064516134, |
|
"loss": 0.0988, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8483652762119503, |
|
"eval_loss": 0.6325012445449829, |
|
"eval_runtime": 28.2324, |
|
"eval_samples_per_second": 62.836, |
|
"eval_steps_per_second": 0.992, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 70.08, |
|
"learning_rate": 0.00029370967741935484, |
|
"loss": 0.1079, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 70.24, |
|
"learning_rate": 0.0002920967741935484, |
|
"loss": 0.1031, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 0.0002904838709677419, |
|
"loss": 0.0927, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 70.56, |
|
"learning_rate": 0.0002888709677419355, |
|
"loss": 0.1085, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 70.72, |
|
"learning_rate": 0.000287258064516129, |
|
"loss": 0.1092, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 70.88, |
|
"learning_rate": 0.0002856451612903226, |
|
"loss": 0.0949, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"eval_accuracy": 0.8472378804960541, |
|
"eval_loss": 0.6548543572425842, |
|
"eval_runtime": 28.0888, |
|
"eval_samples_per_second": 63.157, |
|
"eval_steps_per_second": 0.997, |
|
"step": 4437 |
|
}, |
|
{ |
|
"epoch": 71.04, |
|
"learning_rate": 0.00028403225806451613, |
|
"loss": 0.1037, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"learning_rate": 0.0002824193548387097, |
|
"loss": 0.1136, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 71.36, |
|
"learning_rate": 0.00028080645161290325, |
|
"loss": 0.0862, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 71.52, |
|
"learning_rate": 0.00027919354838709675, |
|
"loss": 0.1058, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 71.68, |
|
"learning_rate": 0.0002775806451612903, |
|
"loss": 0.1019, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 71.84, |
|
"learning_rate": 0.00027596774193548387, |
|
"loss": 0.1134, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 0.00027435483870967743, |
|
"loss": 0.0998, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8478015783540023, |
|
"eval_loss": 0.6151257157325745, |
|
"eval_runtime": 28.1977, |
|
"eval_samples_per_second": 62.913, |
|
"eval_steps_per_second": 0.993, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 72.16, |
|
"learning_rate": 0.000272741935483871, |
|
"loss": 0.0872, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 72.32, |
|
"learning_rate": 0.0002711290322580645, |
|
"loss": 0.0963, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 72.48, |
|
"learning_rate": 0.0002695161290322581, |
|
"loss": 0.0852, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 0.0002679032258064516, |
|
"loss": 0.0984, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 0.00026629032258064517, |
|
"loss": 0.099, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 72.96, |
|
"learning_rate": 0.00026467741935483867, |
|
"loss": 0.0861, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_accuracy": 0.8489289740698985, |
|
"eval_loss": 0.614136278629303, |
|
"eval_runtime": 28.0817, |
|
"eval_samples_per_second": 63.173, |
|
"eval_steps_per_second": 0.997, |
|
"step": 4562 |
|
}, |
|
{ |
|
"epoch": 73.12, |
|
"learning_rate": 0.0002630645161290323, |
|
"loss": 0.08, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 73.28, |
|
"learning_rate": 0.00026145161290322584, |
|
"loss": 0.1114, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 73.44, |
|
"learning_rate": 0.00025983870967741934, |
|
"loss": 0.1046, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 0.0002582258064516129, |
|
"loss": 0.0929, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 73.76, |
|
"learning_rate": 0.00025661290322580646, |
|
"loss": 0.0834, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 73.92, |
|
"learning_rate": 0.000255, |
|
"loss": 0.099, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8517474633596392, |
|
"eval_loss": 0.6109188795089722, |
|
"eval_runtime": 28.0633, |
|
"eval_samples_per_second": 63.214, |
|
"eval_steps_per_second": 0.998, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 74.08, |
|
"learning_rate": 0.0002533870967741935, |
|
"loss": 0.1019, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 74.24, |
|
"learning_rate": 0.0002517741935483871, |
|
"loss": 0.094, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"learning_rate": 0.0002501612903225807, |
|
"loss": 0.1015, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 74.56, |
|
"learning_rate": 0.0002485483870967742, |
|
"loss": 0.0993, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 74.72, |
|
"learning_rate": 0.00024693548387096775, |
|
"loss": 0.0891, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 74.88, |
|
"learning_rate": 0.0002453225806451613, |
|
"loss": 0.0848, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_accuracy": 0.8478015783540023, |
|
"eval_loss": 0.620224118232727, |
|
"eval_runtime": 28.0241, |
|
"eval_samples_per_second": 63.303, |
|
"eval_steps_per_second": 0.999, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 75.04, |
|
"learning_rate": 0.00024370967741935484, |
|
"loss": 0.1051, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"learning_rate": 0.0002420967741935484, |
|
"loss": 0.1093, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 75.36, |
|
"learning_rate": 0.00024048387096774193, |
|
"loss": 0.0857, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 75.52, |
|
"learning_rate": 0.0002388709677419355, |
|
"loss": 0.1061, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 75.68, |
|
"learning_rate": 0.00023725806451612902, |
|
"loss": 0.0892, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 75.84, |
|
"learning_rate": 0.0002356451612903226, |
|
"loss": 0.095, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 0.00023403225806451614, |
|
"loss": 0.0881, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.85456595264938, |
|
"eval_loss": 0.6248630881309509, |
|
"eval_runtime": 28.2315, |
|
"eval_samples_per_second": 62.838, |
|
"eval_steps_per_second": 0.992, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 76.16, |
|
"learning_rate": 0.0002324193548387097, |
|
"loss": 0.0901, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 76.32, |
|
"learning_rate": 0.00023080645161290323, |
|
"loss": 0.092, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 76.48, |
|
"learning_rate": 0.00022919354838709679, |
|
"loss": 0.0922, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 76.64, |
|
"learning_rate": 0.00022758064516129032, |
|
"loss": 0.0964, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"learning_rate": 0.0002259677419354839, |
|
"loss": 0.0972, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"learning_rate": 0.00022435483870967743, |
|
"loss": 0.1046, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_accuracy": 0.8568207440811725, |
|
"eval_loss": 0.6102315783500671, |
|
"eval_runtime": 28.0363, |
|
"eval_samples_per_second": 63.275, |
|
"eval_steps_per_second": 0.999, |
|
"step": 4812 |
|
}, |
|
{ |
|
"epoch": 77.12, |
|
"learning_rate": 0.00022274193548387096, |
|
"loss": 0.0826, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 77.28, |
|
"learning_rate": 0.00022112903225806452, |
|
"loss": 0.0921, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 77.44, |
|
"learning_rate": 0.00021951612903225805, |
|
"loss": 0.0906, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 77.6, |
|
"learning_rate": 0.0002179032258064516, |
|
"loss": 0.0933, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 77.76, |
|
"learning_rate": 0.00021629032258064514, |
|
"loss": 0.0742, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 77.92, |
|
"learning_rate": 0.00021467741935483873, |
|
"loss": 0.0859, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8624577226606539, |
|
"eval_loss": 0.6111776232719421, |
|
"eval_runtime": 27.9757, |
|
"eval_samples_per_second": 63.412, |
|
"eval_steps_per_second": 1.001, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 78.08, |
|
"learning_rate": 0.00021306451612903226, |
|
"loss": 0.1, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 78.24, |
|
"learning_rate": 0.00021145161290322582, |
|
"loss": 0.092, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"learning_rate": 0.00020983870967741935, |
|
"loss": 0.0771, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 78.56, |
|
"learning_rate": 0.0002082258064516129, |
|
"loss": 0.0928, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 78.72, |
|
"learning_rate": 0.00020661290322580644, |
|
"loss": 0.0971, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 78.88, |
|
"learning_rate": 0.000205, |
|
"loss": 0.0946, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 78.99, |
|
"eval_accuracy": 0.863021420518602, |
|
"eval_loss": 0.6136212944984436, |
|
"eval_runtime": 28.0893, |
|
"eval_samples_per_second": 63.156, |
|
"eval_steps_per_second": 0.997, |
|
"step": 4937 |
|
}, |
|
{ |
|
"epoch": 79.04, |
|
"learning_rate": 0.00020338709677419355, |
|
"loss": 0.0892, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 79.2, |
|
"learning_rate": 0.0002017741935483871, |
|
"loss": 0.0833, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 79.36, |
|
"learning_rate": 0.00020016129032258064, |
|
"loss": 0.0956, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 79.52, |
|
"learning_rate": 0.0001985483870967742, |
|
"loss": 0.0865, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 79.68, |
|
"learning_rate": 0.00019693548387096773, |
|
"loss": 0.0848, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 79.84, |
|
"learning_rate": 0.0001953225806451613, |
|
"loss": 0.0973, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.00019370967741935482, |
|
"loss": 0.0902, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.863021420518602, |
|
"eval_loss": 0.6027141213417053, |
|
"eval_runtime": 28.2443, |
|
"eval_samples_per_second": 62.809, |
|
"eval_steps_per_second": 0.991, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.16, |
|
"learning_rate": 0.0001920967741935484, |
|
"loss": 0.09, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 80.32, |
|
"learning_rate": 0.00019048387096774194, |
|
"loss": 0.0998, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 80.48, |
|
"learning_rate": 0.0001888709677419355, |
|
"loss": 0.0877, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 80.64, |
|
"learning_rate": 0.00018725806451612903, |
|
"loss": 0.0928, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 80.8, |
|
"learning_rate": 0.00018564516129032258, |
|
"loss": 0.0895, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 80.96, |
|
"learning_rate": 0.00018403225806451612, |
|
"loss": 0.093, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_accuracy": 0.8641488162344984, |
|
"eval_loss": 0.6099376082420349, |
|
"eval_runtime": 27.9879, |
|
"eval_samples_per_second": 63.385, |
|
"eval_steps_per_second": 1.0, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 81.12, |
|
"learning_rate": 0.0001824193548387097, |
|
"loss": 0.0809, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 81.28, |
|
"learning_rate": 0.00018080645161290323, |
|
"loss": 0.0947, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 81.44, |
|
"learning_rate": 0.0001791935483870968, |
|
"loss": 0.0886, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"learning_rate": 0.00017758064516129032, |
|
"loss": 0.0915, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 81.76, |
|
"learning_rate": 0.00017596774193548388, |
|
"loss": 0.0873, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 81.92, |
|
"learning_rate": 0.0001743548387096774, |
|
"loss": 0.0857, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8697857948139797, |
|
"eval_loss": 0.5907533764839172, |
|
"eval_runtime": 28.1441, |
|
"eval_samples_per_second": 63.033, |
|
"eval_steps_per_second": 0.995, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 0.00017274193548387097, |
|
"loss": 0.0941, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 82.24, |
|
"learning_rate": 0.00017112903225806453, |
|
"loss": 0.0914, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 82.4, |
|
"learning_rate": 0.00016951612903225809, |
|
"loss": 0.1, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 82.56, |
|
"learning_rate": 0.00016790322580645162, |
|
"loss": 0.0954, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 82.72, |
|
"learning_rate": 0.00016629032258064517, |
|
"loss": 0.1008, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 82.88, |
|
"learning_rate": 0.0001646774193548387, |
|
"loss": 0.0983, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"eval_accuracy": 0.8624577226606539, |
|
"eval_loss": 0.5939348340034485, |
|
"eval_runtime": 28.2324, |
|
"eval_samples_per_second": 62.836, |
|
"eval_steps_per_second": 0.992, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 83.04, |
|
"learning_rate": 0.00016306451612903226, |
|
"loss": 0.091, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"learning_rate": 0.0001614516129032258, |
|
"loss": 0.0852, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 83.36, |
|
"learning_rate": 0.00015983870967741938, |
|
"loss": 0.0883, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 83.52, |
|
"learning_rate": 0.0001582258064516129, |
|
"loss": 0.088, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 83.68, |
|
"learning_rate": 0.00015661290322580647, |
|
"loss": 0.1049, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 83.84, |
|
"learning_rate": 0.000155, |
|
"loss": 0.0909, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 0.00015338709677419356, |
|
"loss": 0.0819, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8602029312288614, |
|
"eval_loss": 0.6138933897018433, |
|
"eval_runtime": 28.1467, |
|
"eval_samples_per_second": 63.027, |
|
"eval_steps_per_second": 0.995, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 84.16, |
|
"learning_rate": 0.0001517741935483871, |
|
"loss": 0.0896, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 84.32, |
|
"learning_rate": 0.00015016129032258065, |
|
"loss": 0.0853, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 84.48, |
|
"learning_rate": 0.0001485483870967742, |
|
"loss": 0.0679, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 84.64, |
|
"learning_rate": 0.00014693548387096776, |
|
"loss": 0.0839, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"learning_rate": 0.0001453225806451613, |
|
"loss": 0.0979, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"learning_rate": 0.00014370967741935485, |
|
"loss": 0.0815, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_accuracy": 0.8635851183765502, |
|
"eval_loss": 0.6171460747718811, |
|
"eval_runtime": 28.1596, |
|
"eval_samples_per_second": 62.998, |
|
"eval_steps_per_second": 0.994, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 85.12, |
|
"learning_rate": 0.00014209677419354838, |
|
"loss": 0.0835, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 85.28, |
|
"learning_rate": 0.00014048387096774191, |
|
"loss": 0.0801, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 85.44, |
|
"learning_rate": 0.0001388709677419355, |
|
"loss": 0.0864, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 85.6, |
|
"learning_rate": 0.00013725806451612903, |
|
"loss": 0.0825, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 85.76, |
|
"learning_rate": 0.0001356451612903226, |
|
"loss": 0.0899, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 85.92, |
|
"learning_rate": 0.00013403225806451612, |
|
"loss": 0.0758, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8635851183765502, |
|
"eval_loss": 0.6262893080711365, |
|
"eval_runtime": 28.1545, |
|
"eval_samples_per_second": 63.01, |
|
"eval_steps_per_second": 0.995, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 86.08, |
|
"learning_rate": 0.00013241935483870968, |
|
"loss": 0.1037, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 86.24, |
|
"learning_rate": 0.0001308064516129032, |
|
"loss": 0.0775, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"learning_rate": 0.00012919354838709677, |
|
"loss": 0.0828, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 86.56, |
|
"learning_rate": 0.00012758064516129033, |
|
"loss": 0.0959, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 86.72, |
|
"learning_rate": 0.00012596774193548388, |
|
"loss": 0.0705, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 86.88, |
|
"learning_rate": 0.00012435483870967742, |
|
"loss": 0.0856, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 86.99, |
|
"eval_accuracy": 0.8618940248027057, |
|
"eval_loss": 0.6136890649795532, |
|
"eval_runtime": 28.2092, |
|
"eval_samples_per_second": 62.887, |
|
"eval_steps_per_second": 0.993, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 87.04, |
|
"learning_rate": 0.00012274193548387097, |
|
"loss": 0.0837, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 87.2, |
|
"learning_rate": 0.00012112903225806452, |
|
"loss": 0.0735, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 87.36, |
|
"learning_rate": 0.00011951612903225808, |
|
"loss": 0.0918, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 87.52, |
|
"learning_rate": 0.00011790322580645162, |
|
"loss": 0.0829, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 87.68, |
|
"learning_rate": 0.00011629032258064517, |
|
"loss": 0.085, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 87.84, |
|
"learning_rate": 0.00011467741935483871, |
|
"loss": 0.0964, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 0.00011306451612903227, |
|
"loss": 0.0922, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8647125140924464, |
|
"eval_loss": 0.6294208765029907, |
|
"eval_runtime": 28.0673, |
|
"eval_samples_per_second": 63.205, |
|
"eval_steps_per_second": 0.998, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 88.16, |
|
"learning_rate": 0.00011145161290322581, |
|
"loss": 0.0968, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 88.32, |
|
"learning_rate": 0.00010983870967741936, |
|
"loss": 0.0903, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 88.48, |
|
"learning_rate": 0.00010822580645161292, |
|
"loss": 0.0795, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 0.00010661290322580646, |
|
"loss": 0.0789, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 88.8, |
|
"learning_rate": 0.000105, |
|
"loss": 0.0959, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 88.96, |
|
"learning_rate": 0.00010338709677419356, |
|
"loss": 0.0728, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_accuracy": 0.8618940248027057, |
|
"eval_loss": 0.625676691532135, |
|
"eval_runtime": 28.1963, |
|
"eval_samples_per_second": 62.916, |
|
"eval_steps_per_second": 0.993, |
|
"step": 5562 |
|
}, |
|
{ |
|
"epoch": 89.12, |
|
"learning_rate": 0.00010177419354838711, |
|
"loss": 0.0859, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 89.28, |
|
"learning_rate": 0.00010016129032258064, |
|
"loss": 0.0818, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 89.44, |
|
"learning_rate": 9.854838709677418e-05, |
|
"loss": 0.0902, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 89.6, |
|
"learning_rate": 9.693548387096774e-05, |
|
"loss": 0.0732, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 89.76, |
|
"learning_rate": 9.532258064516129e-05, |
|
"loss": 0.0823, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 89.92, |
|
"learning_rate": 9.370967741935483e-05, |
|
"loss": 0.0791, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.8658399098083427, |
|
"eval_loss": 0.6167757511138916, |
|
"eval_runtime": 28.1598, |
|
"eval_samples_per_second": 62.998, |
|
"eval_steps_per_second": 0.994, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 90.08, |
|
"learning_rate": 9.209677419354839e-05, |
|
"loss": 0.0995, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 90.24, |
|
"learning_rate": 9.048387096774193e-05, |
|
"loss": 0.0949, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 90.4, |
|
"learning_rate": 8.887096774193548e-05, |
|
"loss": 0.0843, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 90.56, |
|
"learning_rate": 8.725806451612904e-05, |
|
"loss": 0.0714, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 90.72, |
|
"learning_rate": 8.564516129032258e-05, |
|
"loss": 0.0794, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 90.88, |
|
"learning_rate": 8.403225806451612e-05, |
|
"loss": 0.0761, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"eval_accuracy": 0.8675310033821871, |
|
"eval_loss": 0.6233435869216919, |
|
"eval_runtime": 28.2317, |
|
"eval_samples_per_second": 62.837, |
|
"eval_steps_per_second": 0.992, |
|
"step": 5687 |
|
}, |
|
{ |
|
"epoch": 91.04, |
|
"learning_rate": 8.241935483870967e-05, |
|
"loss": 0.0825, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 91.2, |
|
"learning_rate": 8.080645161290323e-05, |
|
"loss": 0.0808, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 91.36, |
|
"learning_rate": 7.919354838709677e-05, |
|
"loss": 0.0823, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 91.52, |
|
"learning_rate": 7.758064516129032e-05, |
|
"loss": 0.0776, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 91.68, |
|
"learning_rate": 7.596774193548387e-05, |
|
"loss": 0.0891, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 91.84, |
|
"learning_rate": 7.435483870967742e-05, |
|
"loss": 0.076, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 7.274193548387096e-05, |
|
"loss": 0.0734, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8652762119503946, |
|
"eval_loss": 0.6209710836410522, |
|
"eval_runtime": 28.4761, |
|
"eval_samples_per_second": 62.298, |
|
"eval_steps_per_second": 0.983, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 92.16, |
|
"learning_rate": 7.112903225806452e-05, |
|
"loss": 0.0934, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 92.32, |
|
"learning_rate": 6.951612903225807e-05, |
|
"loss": 0.0831, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 92.48, |
|
"learning_rate": 6.790322580645161e-05, |
|
"loss": 0.078, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 92.64, |
|
"learning_rate": 6.629032258064516e-05, |
|
"loss": 0.0802, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"learning_rate": 6.467741935483871e-05, |
|
"loss": 0.0848, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"learning_rate": 6.306451612903226e-05, |
|
"loss": 0.085, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_accuracy": 0.863021420518602, |
|
"eval_loss": 0.6186871528625488, |
|
"eval_runtime": 28.2516, |
|
"eval_samples_per_second": 62.793, |
|
"eval_steps_per_second": 0.991, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 93.12, |
|
"learning_rate": 6.145161290322582e-05, |
|
"loss": 0.0674, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 93.28, |
|
"learning_rate": 5.9838709677419355e-05, |
|
"loss": 0.0878, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 93.44, |
|
"learning_rate": 5.8225806451612906e-05, |
|
"loss": 0.0733, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 93.6, |
|
"learning_rate": 5.661290322580645e-05, |
|
"loss": 0.0753, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 93.76, |
|
"learning_rate": 5.5e-05, |
|
"loss": 0.0726, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 93.92, |
|
"learning_rate": 5.338709677419355e-05, |
|
"loss": 0.0816, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8624577226606539, |
|
"eval_loss": 0.6182591319084167, |
|
"eval_runtime": 28.2701, |
|
"eval_samples_per_second": 62.752, |
|
"eval_steps_per_second": 0.99, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 94.08, |
|
"learning_rate": 5.17741935483871e-05, |
|
"loss": 0.0768, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 94.24, |
|
"learning_rate": 5.016129032258065e-05, |
|
"loss": 0.0827, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 94.4, |
|
"learning_rate": 4.8548387096774194e-05, |
|
"loss": 0.0756, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 94.56, |
|
"learning_rate": 4.6935483870967745e-05, |
|
"loss": 0.076, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 94.72, |
|
"learning_rate": 4.53225806451613e-05, |
|
"loss": 0.0817, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 94.88, |
|
"learning_rate": 4.370967741935484e-05, |
|
"loss": 0.0763, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 94.99, |
|
"eval_accuracy": 0.8686583990980834, |
|
"eval_loss": 0.6206808686256409, |
|
"eval_runtime": 28.1778, |
|
"eval_samples_per_second": 62.957, |
|
"eval_steps_per_second": 0.994, |
|
"step": 5937 |
|
}, |
|
{ |
|
"epoch": 95.04, |
|
"learning_rate": 4.209677419354839e-05, |
|
"loss": 0.0817, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 95.2, |
|
"learning_rate": 4.048387096774194e-05, |
|
"loss": 0.0961, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 95.36, |
|
"learning_rate": 3.887096774193549e-05, |
|
"loss": 0.0767, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 95.52, |
|
"learning_rate": 3.7258064516129026e-05, |
|
"loss": 0.0724, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 95.68, |
|
"learning_rate": 3.564516129032258e-05, |
|
"loss": 0.0807, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 95.84, |
|
"learning_rate": 3.403225806451613e-05, |
|
"loss": 0.0814, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 3.2419354838709674e-05, |
|
"loss": 0.077, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8664036076662909, |
|
"eval_loss": 0.6160728335380554, |
|
"eval_runtime": 28.2948, |
|
"eval_samples_per_second": 62.697, |
|
"eval_steps_per_second": 0.99, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.16, |
|
"learning_rate": 3.0806451612903225e-05, |
|
"loss": 0.0777, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 96.32, |
|
"learning_rate": 2.9193548387096776e-05, |
|
"loss": 0.0786, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 96.48, |
|
"learning_rate": 2.7580645161290324e-05, |
|
"loss": 0.081, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 96.64, |
|
"learning_rate": 2.596774193548387e-05, |
|
"loss": 0.0765, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 96.8, |
|
"learning_rate": 2.4354838709677417e-05, |
|
"loss": 0.0776, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 96.96, |
|
"learning_rate": 2.274193548387097e-05, |
|
"loss": 0.0872, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_accuracy": 0.8664036076662909, |
|
"eval_loss": 0.6126649379730225, |
|
"eval_runtime": 28.0863, |
|
"eval_samples_per_second": 63.162, |
|
"eval_steps_per_second": 0.997, |
|
"step": 6062 |
|
}, |
|
{ |
|
"epoch": 97.12, |
|
"learning_rate": 2.1129032258064516e-05, |
|
"loss": 0.0804, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 97.28, |
|
"learning_rate": 1.9516129032258064e-05, |
|
"loss": 0.0801, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 97.44, |
|
"learning_rate": 1.7903225806451612e-05, |
|
"loss": 0.0769, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 97.6, |
|
"learning_rate": 1.629032258064516e-05, |
|
"loss": 0.0862, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 97.76, |
|
"learning_rate": 1.467741935483871e-05, |
|
"loss": 0.0778, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 97.92, |
|
"learning_rate": 1.306451612903226e-05, |
|
"loss": 0.0741, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8686583990980834, |
|
"eval_loss": 0.6152112483978271, |
|
"eval_runtime": 29.2862, |
|
"eval_samples_per_second": 60.575, |
|
"eval_steps_per_second": 0.956, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 98.08, |
|
"learning_rate": 1.1451612903225808e-05, |
|
"loss": 0.07, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 98.24, |
|
"learning_rate": 9.838709677419354e-06, |
|
"loss": 0.0789, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 98.4, |
|
"learning_rate": 8.225806451612904e-06, |
|
"loss": 0.0875, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 98.56, |
|
"learning_rate": 6.612903225806452e-06, |
|
"loss": 0.0747, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 98.72, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0859, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 98.88, |
|
"learning_rate": 3.3870967741935484e-06, |
|
"loss": 0.0746, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 98.99, |
|
"eval_accuracy": 0.8669673055242391, |
|
"eval_loss": 0.6146878600120544, |
|
"eval_runtime": 29.8754, |
|
"eval_samples_per_second": 59.38, |
|
"eval_steps_per_second": 0.937, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 99.04, |
|
"learning_rate": 1.774193548387097e-06, |
|
"loss": 0.0843, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"learning_rate": 1.6129032258064518e-07, |
|
"loss": 0.0804, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"eval_accuracy": 0.8669673055242391, |
|
"eval_loss": 0.6146762371063232, |
|
"eval_runtime": 29.3602, |
|
"eval_samples_per_second": 60.422, |
|
"eval_steps_per_second": 0.954, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"step": 6200, |
|
"total_flos": 1.2605379356394072e+20, |
|
"train_loss": 0.21611337065696717, |
|
"train_runtime": 29981.4436, |
|
"train_samples_per_second": 53.223, |
|
"train_steps_per_second": 0.207 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6200, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 1.2605379356394072e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|