{ "best_metric": 0.2620340585708618, "best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-bembaspeech-model/checkpoint-2600", "epoch": 4.497540407589599, "eval_steps": 200, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14054813773717498, "grad_norm": 3.1719706058502197, "learning_rate": 0.00029699999999999996, "loss": 1.5404, "step": 100 }, { "epoch": 0.28109627547434995, "grad_norm": 2.3361048698425293, "learning_rate": 0.00029860103626943, "loss": 0.7066, "step": 200 }, { "epoch": 0.28109627547434995, "eval_loss": 0.5066322684288025, "eval_runtime": 58.7073, "eval_samples_per_second": 25.414, "eval_steps_per_second": 3.185, "eval_wer": 0.7647720391037287, "step": 200 }, { "epoch": 0.42164441321152496, "grad_norm": 2.280653476715088, "learning_rate": 0.0002971879415920866, "loss": 0.5841, "step": 300 }, { "epoch": 0.5621925509486999, "grad_norm": 3.3147284984588623, "learning_rate": 0.00029577484691474326, "loss": 0.5538, "step": 400 }, { "epoch": 0.5621925509486999, "eval_loss": 0.4313148260116577, "eval_runtime": 57.0018, "eval_samples_per_second": 26.175, "eval_steps_per_second": 3.281, "eval_wer": 0.7232459555324855, "step": 400 }, { "epoch": 0.7027406886858749, "grad_norm": 2.074723243713379, "learning_rate": 0.00029436175223739985, "loss": 0.5065, "step": 500 }, { "epoch": 0.8432888264230499, "grad_norm": 1.6516566276550293, "learning_rate": 0.0002929486575600565, "loss": 0.4574, "step": 600 }, { "epoch": 0.8432888264230499, "eval_loss": 0.41018426418304443, "eval_runtime": 57.3352, "eval_samples_per_second": 26.022, "eval_steps_per_second": 3.262, "eval_wer": 0.6956484124924301, "step": 600 }, { "epoch": 0.9838369641602249, "grad_norm": 2.077942132949829, "learning_rate": 0.0002915355628827131, "loss": 0.4548, "step": 700 }, { "epoch": 1.1243851018973998, "grad_norm": 3.208420753479004, "learning_rate": 0.00029012246820536974, "loss": 0.4084, "step": 800 }, { "epoch": 1.1243851018973998, "eval_loss": 0.3528524935245514, "eval_runtime": 57.2723, "eval_samples_per_second": 26.051, "eval_steps_per_second": 3.265, "eval_wer": 0.6276494506445194, "step": 800 }, { "epoch": 1.264933239634575, "grad_norm": 2.2176339626312256, "learning_rate": 0.00028870937352802633, "loss": 0.4086, "step": 900 }, { "epoch": 1.4054813773717498, "grad_norm": 1.1724199056625366, "learning_rate": 0.000287296278850683, "loss": 0.388, "step": 1000 }, { "epoch": 1.4054813773717498, "eval_loss": 0.3004082143306732, "eval_runtime": 56.7843, "eval_samples_per_second": 26.275, "eval_steps_per_second": 3.293, "eval_wer": 0.5723678518903019, "step": 1000 }, { "epoch": 1.5460295151089247, "grad_norm": 1.3748365640640259, "learning_rate": 0.00028588318417333957, "loss": 0.3791, "step": 1100 }, { "epoch": 1.6865776528460998, "grad_norm": 1.1393488645553589, "learning_rate": 0.0002844700894959962, "loss": 0.3803, "step": 1200 }, { "epoch": 1.6865776528460998, "eval_loss": 0.3376190662384033, "eval_runtime": 56.8427, "eval_samples_per_second": 26.248, "eval_steps_per_second": 3.29, "eval_wer": 0.647720391037287, "step": 1200 }, { "epoch": 1.8271257905832747, "grad_norm": 2.872753143310547, "learning_rate": 0.0002830569948186528, "loss": 0.3644, "step": 1300 }, { "epoch": 1.9676739283204498, "grad_norm": 0.8893818855285645, "learning_rate": 0.00028164390014130946, "loss": 0.367, "step": 1400 }, { "epoch": 1.9676739283204498, "eval_loss": 0.29106405377388, "eval_runtime": 56.9876, "eval_samples_per_second": 26.181, "eval_steps_per_second": 3.281, "eval_wer": 0.5801539925599101, "step": 1400 }, { "epoch": 2.1082220660576247, "grad_norm": 0.9697826504707336, "learning_rate": 0.00028023080546396605, "loss": 0.3335, "step": 1500 }, { "epoch": 2.2487702037947996, "grad_norm": 1.1089781522750854, "learning_rate": 0.0002788177107866227, "loss": 0.3168, "step": 1600 }, { "epoch": 2.2487702037947996, "eval_loss": 0.31056511402130127, "eval_runtime": 57.0385, "eval_samples_per_second": 26.158, "eval_steps_per_second": 3.278, "eval_wer": 0.5724543645644087, "step": 1600 }, { "epoch": 2.3893183415319745, "grad_norm": 0.8601323962211609, "learning_rate": 0.0002774046161092793, "loss": 0.3268, "step": 1700 }, { "epoch": 2.52986647926915, "grad_norm": 0.7583444714546204, "learning_rate": 0.00027599152143193594, "loss": 0.3227, "step": 1800 }, { "epoch": 2.52986647926915, "eval_loss": 0.26542848348617554, "eval_runtime": 57.1567, "eval_samples_per_second": 26.104, "eval_steps_per_second": 3.272, "eval_wer": 0.5348213513279696, "step": 1800 }, { "epoch": 2.6704146170063248, "grad_norm": 1.2712171077728271, "learning_rate": 0.00027457842675459253, "loss": 0.3081, "step": 1900 }, { "epoch": 2.8109627547434997, "grad_norm": 0.7406817674636841, "learning_rate": 0.0002731653320772492, "loss": 0.3111, "step": 2000 }, { "epoch": 2.8109627547434997, "eval_loss": 0.26211297512054443, "eval_runtime": 56.777, "eval_samples_per_second": 26.278, "eval_steps_per_second": 3.294, "eval_wer": 0.5494419932520114, "step": 2000 }, { "epoch": 2.9515108924806746, "grad_norm": 1.053320050239563, "learning_rate": 0.0002717522373999058, "loss": 0.3181, "step": 2100 }, { "epoch": 3.0920590302178494, "grad_norm": 0.7663154006004333, "learning_rate": 0.00027033914272256237, "loss": 0.2823, "step": 2200 }, { "epoch": 3.0920590302178494, "eval_loss": 0.2665364444255829, "eval_runtime": 56.8072, "eval_samples_per_second": 26.264, "eval_steps_per_second": 3.292, "eval_wer": 0.5421749286270439, "step": 2200 }, { "epoch": 3.232607167955025, "grad_norm": 0.5670559406280518, "learning_rate": 0.000268926048045219, "loss": 0.2593, "step": 2300 }, { "epoch": 3.3731553056921997, "grad_norm": 0.800305187702179, "learning_rate": 0.0002675129533678756, "loss": 0.2603, "step": 2400 }, { "epoch": 3.3731553056921997, "eval_loss": 0.26233434677124023, "eval_runtime": 56.2939, "eval_samples_per_second": 26.504, "eval_steps_per_second": 3.322, "eval_wer": 0.5174323038325115, "step": 2400 }, { "epoch": 3.5137034434293746, "grad_norm": 0.581555187702179, "learning_rate": 0.00026609985869053225, "loss": 0.2835, "step": 2500 }, { "epoch": 3.6542515811665495, "grad_norm": 0.5515788793563843, "learning_rate": 0.00026468676401318885, "loss": 0.2735, "step": 2600 }, { "epoch": 3.6542515811665495, "eval_loss": 0.2620340585708618, "eval_runtime": 57.2283, "eval_samples_per_second": 26.071, "eval_steps_per_second": 3.268, "eval_wer": 0.5353404273726101, "step": 2600 }, { "epoch": 3.7947997189037244, "grad_norm": 0.6277577877044678, "learning_rate": 0.0002632736693358455, "loss": 0.2806, "step": 2700 }, { "epoch": 3.9353478566408997, "grad_norm": 0.5215665698051453, "learning_rate": 0.0002618605746585021, "loss": 0.2666, "step": 2800 }, { "epoch": 3.9353478566408997, "eval_loss": 0.2752685844898224, "eval_runtime": 57.0665, "eval_samples_per_second": 26.145, "eval_steps_per_second": 3.277, "eval_wer": 0.5450298468725668, "step": 2800 }, { "epoch": 4.075895994378074, "grad_norm": 0.745657205581665, "learning_rate": 0.0002604474799811587, "loss": 0.2479, "step": 2900 }, { "epoch": 4.2164441321152495, "grad_norm": 0.44574543833732605, "learning_rate": 0.00025903438530381533, "loss": 0.2248, "step": 3000 }, { "epoch": 4.2164441321152495, "eval_loss": 0.28806060552597046, "eval_runtime": 56.7213, "eval_samples_per_second": 26.304, "eval_steps_per_second": 3.297, "eval_wer": 0.5817977333679384, "step": 3000 }, { "epoch": 4.356992269852425, "grad_norm": 0.690984845161438, "learning_rate": 0.0002576212906264719, "loss": 0.2502, "step": 3100 }, { "epoch": 4.497540407589599, "grad_norm": 0.47614070773124695, "learning_rate": 0.00025620819594912857, "loss": 0.2408, "step": 3200 }, { "epoch": 4.497540407589599, "eval_loss": 0.2748269736766815, "eval_runtime": 57.1007, "eval_samples_per_second": 26.129, "eval_steps_per_second": 3.275, "eval_wer": 0.5323989964529804, "step": 3200 }, { "epoch": 4.497540407589599, "step": 3200, "total_flos": 9.439925733716597e+18, "train_loss": 0.3939369261264801, "train_runtime": 5287.5609, "train_samples_per_second": 64.55, "train_steps_per_second": 4.034 } ], "logging_steps": 100, "max_steps": 21330, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.439925733716597e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }