|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 10300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.950000000000001e-06, |
|
"loss": 13.4586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 5.8722, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4950000000000001e-05, |
|
"loss": 4.0954, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.995e-05, |
|
"loss": 3.572, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.495e-05, |
|
"loss": 3.2914, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 3.2282841205596924, |
|
"eval_runtime": 135.7815, |
|
"eval_samples_per_second": 20.194, |
|
"eval_steps_per_second": 2.526, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 2.995e-05, |
|
"loss": 3.2087, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.495e-05, |
|
"loss": 3.1802, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 3.995e-05, |
|
"loss": 3.1307, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 4.495e-05, |
|
"loss": 3.0922, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 4.995e-05, |
|
"loss": 3.0068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"eval_loss": 2.7939300537109375, |
|
"eval_runtime": 134.6432, |
|
"eval_samples_per_second": 20.365, |
|
"eval_steps_per_second": 2.547, |
|
"eval_wer": 0.997957609040984, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 5.495e-05, |
|
"loss": 2.5073, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 5.995000000000001e-05, |
|
"loss": 1.8326, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 6.494999999999999e-05, |
|
"loss": 1.598, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 6.995e-05, |
|
"loss": 1.5016, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 7.495e-05, |
|
"loss": 1.4306, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"eval_loss": 0.48574715852737427, |
|
"eval_runtime": 133.5239, |
|
"eval_samples_per_second": 20.536, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 0.6313711251304861, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 7.995e-05, |
|
"loss": 1.3756, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 8.495e-05, |
|
"loss": 1.3583, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 8.995e-05, |
|
"loss": 1.3058, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 9.495e-05, |
|
"loss": 1.2949, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 9.995e-05, |
|
"loss": 1.2831, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"eval_loss": 0.3678707182407379, |
|
"eval_runtime": 134.1491, |
|
"eval_samples_per_second": 20.44, |
|
"eval_steps_per_second": 2.557, |
|
"eval_wer": 0.6065901148277584, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.39, |
|
"learning_rate": 9.880722891566265e-05, |
|
"loss": 1.2725, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 9.76144578313253e-05, |
|
"loss": 1.2436, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 22.33, |
|
"learning_rate": 9.640963855421687e-05, |
|
"loss": 1.2363, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 23.3, |
|
"learning_rate": 9.521686746987952e-05, |
|
"loss": 1.2243, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"learning_rate": 9.402409638554217e-05, |
|
"loss": 1.2065, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.27, |
|
"eval_loss": 0.33028003573417664, |
|
"eval_runtime": 134.2277, |
|
"eval_samples_per_second": 20.428, |
|
"eval_steps_per_second": 2.555, |
|
"eval_wer": 0.5559842055099169, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 9.281927710843374e-05, |
|
"loss": 1.192, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 9.161445783132531e-05, |
|
"loss": 1.1816, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"learning_rate": 9.040963855421686e-05, |
|
"loss": 1.1869, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 8.920481927710844e-05, |
|
"loss": 1.1728, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 1.1449, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"eval_loss": 0.3007894456386566, |
|
"eval_runtime": 133.5503, |
|
"eval_samples_per_second": 20.532, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.46902373712159035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 30.1, |
|
"learning_rate": 8.679518072289157e-05, |
|
"loss": 1.1408, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 31.07, |
|
"learning_rate": 8.559036144578315e-05, |
|
"loss": 1.1319, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 32.04, |
|
"learning_rate": 8.43855421686747e-05, |
|
"loss": 1.1178, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 8.318072289156627e-05, |
|
"loss": 1.1122, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 33.98, |
|
"learning_rate": 8.197590361445784e-05, |
|
"loss": 1.0926, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.98, |
|
"eval_loss": 0.28173714876174927, |
|
"eval_runtime": 132.429, |
|
"eval_samples_per_second": 20.705, |
|
"eval_steps_per_second": 2.59, |
|
"eval_wer": 0.4618980619979122, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 34.95, |
|
"learning_rate": 8.07710843373494e-05, |
|
"loss": 1.0935, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 35.92, |
|
"learning_rate": 7.956626506024096e-05, |
|
"loss": 1.0815, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 36.89, |
|
"learning_rate": 7.836144578313254e-05, |
|
"loss": 1.0856, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 37.86, |
|
"learning_rate": 7.71566265060241e-05, |
|
"loss": 1.0732, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 38.83, |
|
"learning_rate": 7.595180722891566e-05, |
|
"loss": 1.0635, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 38.83, |
|
"eval_loss": 0.2665168046951294, |
|
"eval_runtime": 133.7977, |
|
"eval_samples_per_second": 20.494, |
|
"eval_steps_per_second": 2.564, |
|
"eval_wer": 0.4391140561884446, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 39.81, |
|
"learning_rate": 7.474698795180723e-05, |
|
"loss": 1.0614, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 40.78, |
|
"learning_rate": 7.35421686746988e-05, |
|
"loss": 1.0457, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 41.75, |
|
"learning_rate": 7.233734939759036e-05, |
|
"loss": 1.039, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 42.72, |
|
"learning_rate": 7.113253012048193e-05, |
|
"loss": 1.0151, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 43.69, |
|
"learning_rate": 6.99277108433735e-05, |
|
"loss": 1.029, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 43.69, |
|
"eval_loss": 0.26156488060951233, |
|
"eval_runtime": 133.7699, |
|
"eval_samples_per_second": 20.498, |
|
"eval_steps_per_second": 2.564, |
|
"eval_wer": 0.4175100984886307, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 44.66, |
|
"learning_rate": 6.873493975903614e-05, |
|
"loss": 1.0254, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 45.63, |
|
"learning_rate": 6.753012048192771e-05, |
|
"loss": 1.0328, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 46.6, |
|
"learning_rate": 6.632530120481928e-05, |
|
"loss": 1.022, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 47.57, |
|
"learning_rate": 6.512048192771085e-05, |
|
"loss": 1.0021, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 48.54, |
|
"learning_rate": 6.391566265060241e-05, |
|
"loss": 1.0064, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 48.54, |
|
"eval_loss": 0.24684669077396393, |
|
"eval_runtime": 133.5, |
|
"eval_samples_per_second": 20.539, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 0.4051195933372668, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 49.51, |
|
"learning_rate": 6.271084337349398e-05, |
|
"loss": 0.9791, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 50.49, |
|
"learning_rate": 6.150602409638555e-05, |
|
"loss": 0.9722, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 51.46, |
|
"learning_rate": 6.030120481927711e-05, |
|
"loss": 0.9815, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 52.43, |
|
"learning_rate": 5.909638554216868e-05, |
|
"loss": 0.9633, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"learning_rate": 5.789156626506025e-05, |
|
"loss": 0.9659, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 53.4, |
|
"eval_loss": 0.2394031286239624, |
|
"eval_runtime": 133.1725, |
|
"eval_samples_per_second": 20.59, |
|
"eval_steps_per_second": 2.576, |
|
"eval_wer": 0.38596650478827216, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 54.37, |
|
"learning_rate": 5.668674698795181e-05, |
|
"loss": 0.9544, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 55.34, |
|
"learning_rate": 5.5481927710843374e-05, |
|
"loss": 0.9581, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 56.31, |
|
"learning_rate": 5.427710843373495e-05, |
|
"loss": 0.9437, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 57.28, |
|
"learning_rate": 5.307228915662651e-05, |
|
"loss": 0.9378, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 58.25, |
|
"learning_rate": 5.186746987951807e-05, |
|
"loss": 0.9254, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 58.25, |
|
"eval_loss": 0.2373155653476715, |
|
"eval_runtime": 133.3175, |
|
"eval_samples_per_second": 20.567, |
|
"eval_steps_per_second": 2.573, |
|
"eval_wer": 0.3688558071982935, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 59.22, |
|
"learning_rate": 5.0662650602409644e-05, |
|
"loss": 0.9321, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 60.19, |
|
"learning_rate": 4.9457831325301205e-05, |
|
"loss": 0.9122, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 61.17, |
|
"learning_rate": 4.825301204819277e-05, |
|
"loss": 0.9148, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 62.14, |
|
"learning_rate": 4.704819277108434e-05, |
|
"loss": 0.9177, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 63.11, |
|
"learning_rate": 4.584337349397591e-05, |
|
"loss": 0.9209, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 63.11, |
|
"eval_loss": 0.23466718196868896, |
|
"eval_runtime": 134.3014, |
|
"eval_samples_per_second": 20.417, |
|
"eval_steps_per_second": 2.554, |
|
"eval_wer": 0.367040348568057, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 64.08, |
|
"learning_rate": 4.4638554216867476e-05, |
|
"loss": 0.8981, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 65.05, |
|
"learning_rate": 4.344578313253012e-05, |
|
"loss": 0.8927, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 66.02, |
|
"learning_rate": 4.224096385542169e-05, |
|
"loss": 0.8986, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"learning_rate": 4.1036144578313255e-05, |
|
"loss": 0.8867, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 67.96, |
|
"learning_rate": 3.983132530120482e-05, |
|
"loss": 0.889, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 67.96, |
|
"eval_loss": 0.22911565005779266, |
|
"eval_runtime": 133.5899, |
|
"eval_samples_per_second": 20.526, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.36871964780102573, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 68.93, |
|
"learning_rate": 3.862650602409639e-05, |
|
"loss": 0.885, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 69.9, |
|
"learning_rate": 3.742168674698796e-05, |
|
"loss": 0.8772, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 70.87, |
|
"learning_rate": 3.62289156626506e-05, |
|
"loss": 0.8798, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 71.84, |
|
"learning_rate": 3.502409638554217e-05, |
|
"loss": 0.8808, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 72.82, |
|
"learning_rate": 3.3819277108433736e-05, |
|
"loss": 0.8859, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 72.82, |
|
"eval_loss": 0.22717151045799255, |
|
"eval_runtime": 134.7148, |
|
"eval_samples_per_second": 20.354, |
|
"eval_steps_per_second": 2.546, |
|
"eval_wer": 0.3615939726773476, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 73.79, |
|
"learning_rate": 3.2614457831325304e-05, |
|
"loss": 0.8713, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 74.76, |
|
"learning_rate": 3.140963855421687e-05, |
|
"loss": 0.8734, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 75.73, |
|
"learning_rate": 3.0204819277108436e-05, |
|
"loss": 0.8565, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 76.7, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.8492, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 77.67, |
|
"learning_rate": 2.7795180722891568e-05, |
|
"loss": 0.8441, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 77.67, |
|
"eval_loss": 0.22322185337543488, |
|
"eval_runtime": 134.4634, |
|
"eval_samples_per_second": 20.392, |
|
"eval_steps_per_second": 2.551, |
|
"eval_wer": 0.35383288703308674, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 78.64, |
|
"learning_rate": 2.6590361445783136e-05, |
|
"loss": 0.8516, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 79.61, |
|
"learning_rate": 2.5385542168674696e-05, |
|
"loss": 0.8451, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 80.58, |
|
"learning_rate": 2.4180722891566264e-05, |
|
"loss": 0.8346, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 81.55, |
|
"learning_rate": 2.2975903614457832e-05, |
|
"loss": 0.8378, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 82.52, |
|
"learning_rate": 2.17710843373494e-05, |
|
"loss": 0.8284, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 82.52, |
|
"eval_loss": 0.22235004603862762, |
|
"eval_runtime": 133.9778, |
|
"eval_samples_per_second": 20.466, |
|
"eval_steps_per_second": 2.56, |
|
"eval_wer": 0.33817455634729726, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 83.5, |
|
"learning_rate": 2.0566265060240967e-05, |
|
"loss": 0.8269, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 84.47, |
|
"learning_rate": 1.936144578313253e-05, |
|
"loss": 0.8186, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 85.44, |
|
"learning_rate": 1.8156626506024096e-05, |
|
"loss": 0.8243, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 86.41, |
|
"learning_rate": 1.6951807228915663e-05, |
|
"loss": 0.8279, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 87.38, |
|
"learning_rate": 1.574698795180723e-05, |
|
"loss": 0.8142, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 87.38, |
|
"eval_loss": 0.2192818820476532, |
|
"eval_runtime": 132.2621, |
|
"eval_samples_per_second": 20.732, |
|
"eval_steps_per_second": 2.593, |
|
"eval_wer": 0.33104888122361914, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 88.35, |
|
"learning_rate": 1.4542168674698795e-05, |
|
"loss": 0.8071, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 89.32, |
|
"learning_rate": 1.3337349397590363e-05, |
|
"loss": 0.8075, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 90.29, |
|
"learning_rate": 1.2132530120481929e-05, |
|
"loss": 0.8042, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 91.26, |
|
"learning_rate": 1.0927710843373493e-05, |
|
"loss": 0.7916, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 92.23, |
|
"learning_rate": 9.722891566265061e-06, |
|
"loss": 0.8012, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 92.23, |
|
"eval_loss": 0.21682003140449524, |
|
"eval_runtime": 133.9404, |
|
"eval_samples_per_second": 20.472, |
|
"eval_steps_per_second": 2.561, |
|
"eval_wer": 0.3276448962919257, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 93.2, |
|
"learning_rate": 8.518072289156627e-06, |
|
"loss": 0.8055, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 94.17, |
|
"learning_rate": 7.313253012048194e-06, |
|
"loss": 0.7955, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 95.15, |
|
"learning_rate": 6.108433734939759e-06, |
|
"loss": 0.7961, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 96.12, |
|
"learning_rate": 4.903614457831326e-06, |
|
"loss": 0.7843, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 97.09, |
|
"learning_rate": 3.6987951807228917e-06, |
|
"loss": 0.7781, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 97.09, |
|
"eval_loss": 0.21628263592720032, |
|
"eval_runtime": 133.5255, |
|
"eval_samples_per_second": 20.535, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 0.3240593654972087, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 98.06, |
|
"learning_rate": 2.493975903614458e-06, |
|
"loss": 0.7842, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 99.03, |
|
"learning_rate": 1.2891566265060241e-06, |
|
"loss": 0.7821, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 9.638554216867469e-08, |
|
"loss": 0.7779, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 10300, |
|
"total_flos": 5.823193156406256e+19, |
|
"train_loss": 1.3660302423273476, |
|
"train_runtime": 26867.6077, |
|
"train_samples_per_second": 12.253, |
|
"train_steps_per_second": 0.383 |
|
} |
|
], |
|
"max_steps": 10300, |
|
"num_train_epochs": 100, |
|
"total_flos": 5.823193156406256e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|