{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 10300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.97, "learning_rate": 4.950000000000001e-06, "loss": 13.4586, "step": 100 }, { "epoch": 1.94, "learning_rate": 9.950000000000001e-06, "loss": 5.8722, "step": 200 }, { "epoch": 2.91, "learning_rate": 1.4950000000000001e-05, "loss": 4.0954, "step": 300 }, { "epoch": 3.88, "learning_rate": 1.995e-05, "loss": 3.572, "step": 400 }, { "epoch": 4.85, "learning_rate": 2.495e-05, "loss": 3.2914, "step": 500 }, { "epoch": 4.85, "eval_loss": 3.2282841205596924, "eval_runtime": 135.7815, "eval_samples_per_second": 20.194, "eval_steps_per_second": 2.526, "eval_wer": 1.0, "step": 500 }, { "epoch": 5.83, "learning_rate": 2.995e-05, "loss": 3.2087, "step": 600 }, { "epoch": 6.8, "learning_rate": 3.495e-05, "loss": 3.1802, "step": 700 }, { "epoch": 7.77, "learning_rate": 3.995e-05, "loss": 3.1307, "step": 800 }, { "epoch": 8.74, "learning_rate": 4.495e-05, "loss": 3.0922, "step": 900 }, { "epoch": 9.71, "learning_rate": 4.995e-05, "loss": 3.0068, "step": 1000 }, { "epoch": 9.71, "eval_loss": 2.7939300537109375, "eval_runtime": 134.6432, "eval_samples_per_second": 20.365, "eval_steps_per_second": 2.547, "eval_wer": 0.997957609040984, "step": 1000 }, { "epoch": 10.68, "learning_rate": 5.495e-05, "loss": 2.5073, "step": 1100 }, { "epoch": 11.65, "learning_rate": 5.995000000000001e-05, "loss": 1.8326, "step": 1200 }, { "epoch": 12.62, "learning_rate": 6.494999999999999e-05, "loss": 1.598, "step": 1300 }, { "epoch": 13.59, "learning_rate": 6.995e-05, "loss": 1.5016, "step": 1400 }, { "epoch": 14.56, "learning_rate": 7.495e-05, "loss": 1.4306, "step": 1500 }, { "epoch": 14.56, "eval_loss": 0.48574715852737427, "eval_runtime": 133.5239, "eval_samples_per_second": 20.536, "eval_steps_per_second": 2.569, "eval_wer": 0.6313711251304861, "step": 1500 }, { "epoch": 15.53, "learning_rate": 7.995e-05, "loss": 1.3756, "step": 1600 }, { "epoch": 16.5, "learning_rate": 8.495e-05, "loss": 1.3583, "step": 1700 }, { "epoch": 17.48, "learning_rate": 8.995e-05, "loss": 1.3058, "step": 1800 }, { "epoch": 18.45, "learning_rate": 9.495e-05, "loss": 1.2949, "step": 1900 }, { "epoch": 19.42, "learning_rate": 9.995e-05, "loss": 1.2831, "step": 2000 }, { "epoch": 19.42, "eval_loss": 0.3678707182407379, "eval_runtime": 134.1491, "eval_samples_per_second": 20.44, "eval_steps_per_second": 2.557, "eval_wer": 0.6065901148277584, "step": 2000 }, { "epoch": 20.39, "learning_rate": 9.880722891566265e-05, "loss": 1.2725, "step": 2100 }, { "epoch": 21.36, "learning_rate": 9.76144578313253e-05, "loss": 1.2436, "step": 2200 }, { "epoch": 22.33, "learning_rate": 9.640963855421687e-05, "loss": 1.2363, "step": 2300 }, { "epoch": 23.3, "learning_rate": 9.521686746987952e-05, "loss": 1.2243, "step": 2400 }, { "epoch": 24.27, "learning_rate": 9.402409638554217e-05, "loss": 1.2065, "step": 2500 }, { "epoch": 24.27, "eval_loss": 0.33028003573417664, "eval_runtime": 134.2277, "eval_samples_per_second": 20.428, "eval_steps_per_second": 2.555, "eval_wer": 0.5559842055099169, "step": 2500 }, { "epoch": 25.24, "learning_rate": 9.281927710843374e-05, "loss": 1.192, "step": 2600 }, { "epoch": 26.21, "learning_rate": 9.161445783132531e-05, "loss": 1.1816, "step": 2700 }, { "epoch": 27.18, "learning_rate": 9.040963855421686e-05, "loss": 1.1869, "step": 2800 }, { "epoch": 28.16, "learning_rate": 8.920481927710844e-05, "loss": 1.1728, "step": 2900 }, { "epoch": 29.13, "learning_rate": 8.800000000000001e-05, "loss": 1.1449, "step": 3000 }, { "epoch": 29.13, "eval_loss": 0.3007894456386566, "eval_runtime": 133.5503, "eval_samples_per_second": 20.532, "eval_steps_per_second": 2.568, "eval_wer": 0.46902373712159035, "step": 3000 }, { "epoch": 30.1, "learning_rate": 8.679518072289157e-05, "loss": 1.1408, "step": 3100 }, { "epoch": 31.07, "learning_rate": 8.559036144578315e-05, "loss": 1.1319, "step": 3200 }, { "epoch": 32.04, "learning_rate": 8.43855421686747e-05, "loss": 1.1178, "step": 3300 }, { "epoch": 33.01, "learning_rate": 8.318072289156627e-05, "loss": 1.1122, "step": 3400 }, { "epoch": 33.98, "learning_rate": 8.197590361445784e-05, "loss": 1.0926, "step": 3500 }, { "epoch": 33.98, "eval_loss": 0.28173714876174927, "eval_runtime": 132.429, "eval_samples_per_second": 20.705, "eval_steps_per_second": 2.59, "eval_wer": 0.4618980619979122, "step": 3500 }, { "epoch": 34.95, "learning_rate": 8.07710843373494e-05, "loss": 1.0935, "step": 3600 }, { "epoch": 35.92, "learning_rate": 7.956626506024096e-05, "loss": 1.0815, "step": 3700 }, { "epoch": 36.89, "learning_rate": 7.836144578313254e-05, "loss": 1.0856, "step": 3800 }, { "epoch": 37.86, "learning_rate": 7.71566265060241e-05, "loss": 1.0732, "step": 3900 }, { "epoch": 38.83, "learning_rate": 7.595180722891566e-05, "loss": 1.0635, "step": 4000 }, { "epoch": 38.83, "eval_loss": 0.2665168046951294, "eval_runtime": 133.7977, "eval_samples_per_second": 20.494, "eval_steps_per_second": 2.564, "eval_wer": 0.4391140561884446, "step": 4000 }, { "epoch": 39.81, "learning_rate": 7.474698795180723e-05, "loss": 1.0614, "step": 4100 }, { "epoch": 40.78, "learning_rate": 7.35421686746988e-05, "loss": 1.0457, "step": 4200 }, { "epoch": 41.75, "learning_rate": 7.233734939759036e-05, "loss": 1.039, "step": 4300 }, { "epoch": 42.72, "learning_rate": 7.113253012048193e-05, "loss": 1.0151, "step": 4400 }, { "epoch": 43.69, "learning_rate": 6.99277108433735e-05, "loss": 1.029, "step": 4500 }, { "epoch": 43.69, "eval_loss": 0.26156488060951233, "eval_runtime": 133.7699, "eval_samples_per_second": 20.498, "eval_steps_per_second": 2.564, "eval_wer": 0.4175100984886307, "step": 4500 }, { "epoch": 44.66, "learning_rate": 6.873493975903614e-05, "loss": 1.0254, "step": 4600 }, { "epoch": 45.63, "learning_rate": 6.753012048192771e-05, "loss": 1.0328, "step": 4700 }, { "epoch": 46.6, "learning_rate": 6.632530120481928e-05, "loss": 1.022, "step": 4800 }, { "epoch": 47.57, "learning_rate": 6.512048192771085e-05, "loss": 1.0021, "step": 4900 }, { "epoch": 48.54, "learning_rate": 6.391566265060241e-05, "loss": 1.0064, "step": 5000 }, { "epoch": 48.54, "eval_loss": 0.24684669077396393, "eval_runtime": 133.5, "eval_samples_per_second": 20.539, "eval_steps_per_second": 2.569, "eval_wer": 0.4051195933372668, "step": 5000 }, { "epoch": 49.51, "learning_rate": 6.271084337349398e-05, "loss": 0.9791, "step": 5100 }, { "epoch": 50.49, "learning_rate": 6.150602409638555e-05, "loss": 0.9722, "step": 5200 }, { "epoch": 51.46, "learning_rate": 6.030120481927711e-05, "loss": 0.9815, "step": 5300 }, { "epoch": 52.43, "learning_rate": 5.909638554216868e-05, "loss": 0.9633, "step": 5400 }, { "epoch": 53.4, "learning_rate": 5.789156626506025e-05, "loss": 0.9659, "step": 5500 }, { "epoch": 53.4, "eval_loss": 0.2394031286239624, "eval_runtime": 133.1725, "eval_samples_per_second": 20.59, "eval_steps_per_second": 2.576, "eval_wer": 0.38596650478827216, "step": 5500 }, { "epoch": 54.37, "learning_rate": 5.668674698795181e-05, "loss": 0.9544, "step": 5600 }, { "epoch": 55.34, "learning_rate": 5.5481927710843374e-05, "loss": 0.9581, "step": 5700 }, { "epoch": 56.31, "learning_rate": 5.427710843373495e-05, "loss": 0.9437, "step": 5800 }, { "epoch": 57.28, "learning_rate": 5.307228915662651e-05, "loss": 0.9378, "step": 5900 }, { "epoch": 58.25, "learning_rate": 5.186746987951807e-05, "loss": 0.9254, "step": 6000 }, { "epoch": 58.25, "eval_loss": 0.2373155653476715, "eval_runtime": 133.3175, "eval_samples_per_second": 20.567, "eval_steps_per_second": 2.573, "eval_wer": 0.3688558071982935, "step": 6000 }, { "epoch": 59.22, "learning_rate": 5.0662650602409644e-05, "loss": 0.9321, "step": 6100 }, { "epoch": 60.19, "learning_rate": 4.9457831325301205e-05, "loss": 0.9122, "step": 6200 }, { "epoch": 61.17, "learning_rate": 4.825301204819277e-05, "loss": 0.9148, "step": 6300 }, { "epoch": 62.14, "learning_rate": 4.704819277108434e-05, "loss": 0.9177, "step": 6400 }, { "epoch": 63.11, "learning_rate": 4.584337349397591e-05, "loss": 0.9209, "step": 6500 }, { "epoch": 63.11, "eval_loss": 0.23466718196868896, "eval_runtime": 134.3014, "eval_samples_per_second": 20.417, "eval_steps_per_second": 2.554, "eval_wer": 0.367040348568057, "step": 6500 }, { "epoch": 64.08, "learning_rate": 4.4638554216867476e-05, "loss": 0.8981, "step": 6600 }, { "epoch": 65.05, "learning_rate": 4.344578313253012e-05, "loss": 0.8927, "step": 6700 }, { "epoch": 66.02, "learning_rate": 4.224096385542169e-05, "loss": 0.8986, "step": 6800 }, { "epoch": 66.99, "learning_rate": 4.1036144578313255e-05, "loss": 0.8867, "step": 6900 }, { "epoch": 67.96, "learning_rate": 3.983132530120482e-05, "loss": 0.889, "step": 7000 }, { "epoch": 67.96, "eval_loss": 0.22911565005779266, "eval_runtime": 133.5899, "eval_samples_per_second": 20.526, "eval_steps_per_second": 2.568, "eval_wer": 0.36871964780102573, "step": 7000 }, { "epoch": 68.93, "learning_rate": 3.862650602409639e-05, "loss": 0.885, "step": 7100 }, { "epoch": 69.9, "learning_rate": 3.742168674698796e-05, "loss": 0.8772, "step": 7200 }, { "epoch": 70.87, "learning_rate": 3.62289156626506e-05, "loss": 0.8798, "step": 7300 }, { "epoch": 71.84, "learning_rate": 3.502409638554217e-05, "loss": 0.8808, "step": 7400 }, { "epoch": 72.82, "learning_rate": 3.3819277108433736e-05, "loss": 0.8859, "step": 7500 }, { "epoch": 72.82, "eval_loss": 0.22717151045799255, "eval_runtime": 134.7148, "eval_samples_per_second": 20.354, "eval_steps_per_second": 2.546, "eval_wer": 0.3615939726773476, "step": 7500 }, { "epoch": 73.79, "learning_rate": 3.2614457831325304e-05, "loss": 0.8713, "step": 7600 }, { "epoch": 74.76, "learning_rate": 3.140963855421687e-05, "loss": 0.8734, "step": 7700 }, { "epoch": 75.73, "learning_rate": 3.0204819277108436e-05, "loss": 0.8565, "step": 7800 }, { "epoch": 76.7, "learning_rate": 2.9e-05, "loss": 0.8492, "step": 7900 }, { "epoch": 77.67, "learning_rate": 2.7795180722891568e-05, "loss": 0.8441, "step": 8000 }, { "epoch": 77.67, "eval_loss": 0.22322185337543488, "eval_runtime": 134.4634, "eval_samples_per_second": 20.392, "eval_steps_per_second": 2.551, "eval_wer": 0.35383288703308674, "step": 8000 }, { "epoch": 78.64, "learning_rate": 2.6590361445783136e-05, "loss": 0.8516, "step": 8100 }, { "epoch": 79.61, "learning_rate": 2.5385542168674696e-05, "loss": 0.8451, "step": 8200 }, { "epoch": 80.58, "learning_rate": 2.4180722891566264e-05, "loss": 0.8346, "step": 8300 }, { "epoch": 81.55, "learning_rate": 2.2975903614457832e-05, "loss": 0.8378, "step": 8400 }, { "epoch": 82.52, "learning_rate": 2.17710843373494e-05, "loss": 0.8284, "step": 8500 }, { "epoch": 82.52, "eval_loss": 0.22235004603862762, "eval_runtime": 133.9778, "eval_samples_per_second": 20.466, "eval_steps_per_second": 2.56, "eval_wer": 0.33817455634729726, "step": 8500 }, { "epoch": 83.5, "learning_rate": 2.0566265060240967e-05, "loss": 0.8269, "step": 8600 }, { "epoch": 84.47, "learning_rate": 1.936144578313253e-05, "loss": 0.8186, "step": 8700 }, { "epoch": 85.44, "learning_rate": 1.8156626506024096e-05, "loss": 0.8243, "step": 8800 }, { "epoch": 86.41, "learning_rate": 1.6951807228915663e-05, "loss": 0.8279, "step": 8900 }, { "epoch": 87.38, "learning_rate": 1.574698795180723e-05, "loss": 0.8142, "step": 9000 }, { "epoch": 87.38, "eval_loss": 0.2192818820476532, "eval_runtime": 132.2621, "eval_samples_per_second": 20.732, "eval_steps_per_second": 2.593, "eval_wer": 0.33104888122361914, "step": 9000 }, { "epoch": 88.35, "learning_rate": 1.4542168674698795e-05, "loss": 0.8071, "step": 9100 }, { "epoch": 89.32, "learning_rate": 1.3337349397590363e-05, "loss": 0.8075, "step": 9200 }, { "epoch": 90.29, "learning_rate": 1.2132530120481929e-05, "loss": 0.8042, "step": 9300 }, { "epoch": 91.26, "learning_rate": 1.0927710843373493e-05, "loss": 0.7916, "step": 9400 }, { "epoch": 92.23, "learning_rate": 9.722891566265061e-06, "loss": 0.8012, "step": 9500 }, { "epoch": 92.23, "eval_loss": 0.21682003140449524, "eval_runtime": 133.9404, "eval_samples_per_second": 20.472, "eval_steps_per_second": 2.561, "eval_wer": 0.3276448962919257, "step": 9500 }, { "epoch": 93.2, "learning_rate": 8.518072289156627e-06, "loss": 0.8055, "step": 9600 }, { "epoch": 94.17, "learning_rate": 7.313253012048194e-06, "loss": 0.7955, "step": 9700 }, { "epoch": 95.15, "learning_rate": 6.108433734939759e-06, "loss": 0.7961, "step": 9800 }, { "epoch": 96.12, "learning_rate": 4.903614457831326e-06, "loss": 0.7843, "step": 9900 }, { "epoch": 97.09, "learning_rate": 3.6987951807228917e-06, "loss": 0.7781, "step": 10000 }, { "epoch": 97.09, "eval_loss": 0.21628263592720032, "eval_runtime": 133.5255, "eval_samples_per_second": 20.535, "eval_steps_per_second": 2.569, "eval_wer": 0.3240593654972087, "step": 10000 }, { "epoch": 98.06, "learning_rate": 2.493975903614458e-06, "loss": 0.7842, "step": 10100 }, { "epoch": 99.03, "learning_rate": 1.2891566265060241e-06, "loss": 0.7821, "step": 10200 }, { "epoch": 100.0, "learning_rate": 9.638554216867469e-08, "loss": 0.7779, "step": 10300 }, { "epoch": 100.0, "step": 10300, "total_flos": 5.823193156406256e+19, "train_loss": 1.3660302423273476, "train_runtime": 26867.6077, "train_samples_per_second": 12.253, "train_steps_per_second": 0.383 } ], "max_steps": 10300, "num_train_epochs": 100, "total_flos": 5.823193156406256e+19, "trial_name": null, "trial_params": null }