w2v-bert-bem-bembaspeech-model / trainer_state.json
csikasote's picture
End of training
189d715 verified
{
"best_metric": 0.2620340585708618,
"best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-bembaspeech-model/checkpoint-2600",
"epoch": 4.497540407589599,
"eval_steps": 200,
"global_step": 3200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14054813773717498,
"grad_norm": 3.1719706058502197,
"learning_rate": 0.00029699999999999996,
"loss": 1.5404,
"step": 100
},
{
"epoch": 0.28109627547434995,
"grad_norm": 2.3361048698425293,
"learning_rate": 0.00029860103626943,
"loss": 0.7066,
"step": 200
},
{
"epoch": 0.28109627547434995,
"eval_loss": 0.5066322684288025,
"eval_runtime": 58.7073,
"eval_samples_per_second": 25.414,
"eval_steps_per_second": 3.185,
"eval_wer": 0.7647720391037287,
"step": 200
},
{
"epoch": 0.42164441321152496,
"grad_norm": 2.280653476715088,
"learning_rate": 0.0002971879415920866,
"loss": 0.5841,
"step": 300
},
{
"epoch": 0.5621925509486999,
"grad_norm": 3.3147284984588623,
"learning_rate": 0.00029577484691474326,
"loss": 0.5538,
"step": 400
},
{
"epoch": 0.5621925509486999,
"eval_loss": 0.4313148260116577,
"eval_runtime": 57.0018,
"eval_samples_per_second": 26.175,
"eval_steps_per_second": 3.281,
"eval_wer": 0.7232459555324855,
"step": 400
},
{
"epoch": 0.7027406886858749,
"grad_norm": 2.074723243713379,
"learning_rate": 0.00029436175223739985,
"loss": 0.5065,
"step": 500
},
{
"epoch": 0.8432888264230499,
"grad_norm": 1.6516566276550293,
"learning_rate": 0.0002929486575600565,
"loss": 0.4574,
"step": 600
},
{
"epoch": 0.8432888264230499,
"eval_loss": 0.41018426418304443,
"eval_runtime": 57.3352,
"eval_samples_per_second": 26.022,
"eval_steps_per_second": 3.262,
"eval_wer": 0.6956484124924301,
"step": 600
},
{
"epoch": 0.9838369641602249,
"grad_norm": 2.077942132949829,
"learning_rate": 0.0002915355628827131,
"loss": 0.4548,
"step": 700
},
{
"epoch": 1.1243851018973998,
"grad_norm": 3.208420753479004,
"learning_rate": 0.00029012246820536974,
"loss": 0.4084,
"step": 800
},
{
"epoch": 1.1243851018973998,
"eval_loss": 0.3528524935245514,
"eval_runtime": 57.2723,
"eval_samples_per_second": 26.051,
"eval_steps_per_second": 3.265,
"eval_wer": 0.6276494506445194,
"step": 800
},
{
"epoch": 1.264933239634575,
"grad_norm": 2.2176339626312256,
"learning_rate": 0.00028870937352802633,
"loss": 0.4086,
"step": 900
},
{
"epoch": 1.4054813773717498,
"grad_norm": 1.1724199056625366,
"learning_rate": 0.000287296278850683,
"loss": 0.388,
"step": 1000
},
{
"epoch": 1.4054813773717498,
"eval_loss": 0.3004082143306732,
"eval_runtime": 56.7843,
"eval_samples_per_second": 26.275,
"eval_steps_per_second": 3.293,
"eval_wer": 0.5723678518903019,
"step": 1000
},
{
"epoch": 1.5460295151089247,
"grad_norm": 1.3748365640640259,
"learning_rate": 0.00028588318417333957,
"loss": 0.3791,
"step": 1100
},
{
"epoch": 1.6865776528460998,
"grad_norm": 1.1393488645553589,
"learning_rate": 0.0002844700894959962,
"loss": 0.3803,
"step": 1200
},
{
"epoch": 1.6865776528460998,
"eval_loss": 0.3376190662384033,
"eval_runtime": 56.8427,
"eval_samples_per_second": 26.248,
"eval_steps_per_second": 3.29,
"eval_wer": 0.647720391037287,
"step": 1200
},
{
"epoch": 1.8271257905832747,
"grad_norm": 2.872753143310547,
"learning_rate": 0.0002830569948186528,
"loss": 0.3644,
"step": 1300
},
{
"epoch": 1.9676739283204498,
"grad_norm": 0.8893818855285645,
"learning_rate": 0.00028164390014130946,
"loss": 0.367,
"step": 1400
},
{
"epoch": 1.9676739283204498,
"eval_loss": 0.29106405377388,
"eval_runtime": 56.9876,
"eval_samples_per_second": 26.181,
"eval_steps_per_second": 3.281,
"eval_wer": 0.5801539925599101,
"step": 1400
},
{
"epoch": 2.1082220660576247,
"grad_norm": 0.9697826504707336,
"learning_rate": 0.00028023080546396605,
"loss": 0.3335,
"step": 1500
},
{
"epoch": 2.2487702037947996,
"grad_norm": 1.1089781522750854,
"learning_rate": 0.0002788177107866227,
"loss": 0.3168,
"step": 1600
},
{
"epoch": 2.2487702037947996,
"eval_loss": 0.31056511402130127,
"eval_runtime": 57.0385,
"eval_samples_per_second": 26.158,
"eval_steps_per_second": 3.278,
"eval_wer": 0.5724543645644087,
"step": 1600
},
{
"epoch": 2.3893183415319745,
"grad_norm": 0.8601323962211609,
"learning_rate": 0.0002774046161092793,
"loss": 0.3268,
"step": 1700
},
{
"epoch": 2.52986647926915,
"grad_norm": 0.7583444714546204,
"learning_rate": 0.00027599152143193594,
"loss": 0.3227,
"step": 1800
},
{
"epoch": 2.52986647926915,
"eval_loss": 0.26542848348617554,
"eval_runtime": 57.1567,
"eval_samples_per_second": 26.104,
"eval_steps_per_second": 3.272,
"eval_wer": 0.5348213513279696,
"step": 1800
},
{
"epoch": 2.6704146170063248,
"grad_norm": 1.2712171077728271,
"learning_rate": 0.00027457842675459253,
"loss": 0.3081,
"step": 1900
},
{
"epoch": 2.8109627547434997,
"grad_norm": 0.7406817674636841,
"learning_rate": 0.0002731653320772492,
"loss": 0.3111,
"step": 2000
},
{
"epoch": 2.8109627547434997,
"eval_loss": 0.26211297512054443,
"eval_runtime": 56.777,
"eval_samples_per_second": 26.278,
"eval_steps_per_second": 3.294,
"eval_wer": 0.5494419932520114,
"step": 2000
},
{
"epoch": 2.9515108924806746,
"grad_norm": 1.053320050239563,
"learning_rate": 0.0002717522373999058,
"loss": 0.3181,
"step": 2100
},
{
"epoch": 3.0920590302178494,
"grad_norm": 0.7663154006004333,
"learning_rate": 0.00027033914272256237,
"loss": 0.2823,
"step": 2200
},
{
"epoch": 3.0920590302178494,
"eval_loss": 0.2665364444255829,
"eval_runtime": 56.8072,
"eval_samples_per_second": 26.264,
"eval_steps_per_second": 3.292,
"eval_wer": 0.5421749286270439,
"step": 2200
},
{
"epoch": 3.232607167955025,
"grad_norm": 0.5670559406280518,
"learning_rate": 0.000268926048045219,
"loss": 0.2593,
"step": 2300
},
{
"epoch": 3.3731553056921997,
"grad_norm": 0.800305187702179,
"learning_rate": 0.0002675129533678756,
"loss": 0.2603,
"step": 2400
},
{
"epoch": 3.3731553056921997,
"eval_loss": 0.26233434677124023,
"eval_runtime": 56.2939,
"eval_samples_per_second": 26.504,
"eval_steps_per_second": 3.322,
"eval_wer": 0.5174323038325115,
"step": 2400
},
{
"epoch": 3.5137034434293746,
"grad_norm": 0.581555187702179,
"learning_rate": 0.00026609985869053225,
"loss": 0.2835,
"step": 2500
},
{
"epoch": 3.6542515811665495,
"grad_norm": 0.5515788793563843,
"learning_rate": 0.00026468676401318885,
"loss": 0.2735,
"step": 2600
},
{
"epoch": 3.6542515811665495,
"eval_loss": 0.2620340585708618,
"eval_runtime": 57.2283,
"eval_samples_per_second": 26.071,
"eval_steps_per_second": 3.268,
"eval_wer": 0.5353404273726101,
"step": 2600
},
{
"epoch": 3.7947997189037244,
"grad_norm": 0.6277577877044678,
"learning_rate": 0.0002632736693358455,
"loss": 0.2806,
"step": 2700
},
{
"epoch": 3.9353478566408997,
"grad_norm": 0.5215665698051453,
"learning_rate": 0.0002618605746585021,
"loss": 0.2666,
"step": 2800
},
{
"epoch": 3.9353478566408997,
"eval_loss": 0.2752685844898224,
"eval_runtime": 57.0665,
"eval_samples_per_second": 26.145,
"eval_steps_per_second": 3.277,
"eval_wer": 0.5450298468725668,
"step": 2800
},
{
"epoch": 4.075895994378074,
"grad_norm": 0.745657205581665,
"learning_rate": 0.0002604474799811587,
"loss": 0.2479,
"step": 2900
},
{
"epoch": 4.2164441321152495,
"grad_norm": 0.44574543833732605,
"learning_rate": 0.00025903438530381533,
"loss": 0.2248,
"step": 3000
},
{
"epoch": 4.2164441321152495,
"eval_loss": 0.28806060552597046,
"eval_runtime": 56.7213,
"eval_samples_per_second": 26.304,
"eval_steps_per_second": 3.297,
"eval_wer": 0.5817977333679384,
"step": 3000
},
{
"epoch": 4.356992269852425,
"grad_norm": 0.690984845161438,
"learning_rate": 0.0002576212906264719,
"loss": 0.2502,
"step": 3100
},
{
"epoch": 4.497540407589599,
"grad_norm": 0.47614070773124695,
"learning_rate": 0.00025620819594912857,
"loss": 0.2408,
"step": 3200
},
{
"epoch": 4.497540407589599,
"eval_loss": 0.2748269736766815,
"eval_runtime": 57.1007,
"eval_samples_per_second": 26.129,
"eval_steps_per_second": 3.275,
"eval_wer": 0.5323989964529804,
"step": 3200
},
{
"epoch": 4.497540407589599,
"step": 3200,
"total_flos": 9.439925733716597e+18,
"train_loss": 0.3939369261264801,
"train_runtime": 5287.5609,
"train_samples_per_second": 64.55,
"train_steps_per_second": 4.034
}
],
"logging_steps": 100,
"max_steps": 21330,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.439925733716597e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}