sanchit-gandhi commited on Mar 27, 2024

Commit

d175c02

verified ·

1 Parent(s): 5d594ee

Training in progress, step 1000

Browse files

Files changed (18) hide show

.gitattributes +0 -1
all_results.json +8 -0
model.safetensors +1 -1
run.sh +1 -0
runs/Mar27_19-04-58_hf-dgx-01/events.out.tfevents.1711562712.hf-dgx-01.1894903.0 +3 -0
train_results.json +8 -0
trainer_state.json +1475 -0
training_args.bin +1 -1
wandb/debug-internal.log +0 -0
wandb/debug.log +26 -26
wandb/run-20240327_190513-7p2x8a0l/files/config.yaml +751 -0
wandb/run-20240327_190513-7p2x8a0l/files/output.log +1033 -0
wandb/run-20240327_190513-7p2x8a0l/files/requirements.txt +246 -0
wandb/run-20240327_190513-7p2x8a0l/files/wandb-metadata.json +739 -0
wandb/run-20240327_190513-7p2x8a0l/files/wandb-summary.json +1 -0
wandb/run-20240327_190513-7p2x8a0l/logs/debug-internal.log +0 -0
wandb/run-20240327_190513-7p2x8a0l/logs/debug.log +28 -0
wandb/run-20240327_190513-7p2x8a0l/run-7p2x8a0l.wandb +0 -0

.gitattributes CHANGED Viewed

@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-wandb/run-20240327_141033-golaq7b9/run-golaq7b9.wandb filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 22.52,
+    "train_loss": 0.17524469082718716,
+    "train_runtime": 15083.6622,
+    "train_samples": 7099,
+    "train_samples_per_second": 10.608,
+    "train_steps_per_second": 0.331
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:915a14d6cca1e5b43dd51645e21e653f0b7f7ef389ed27f01c02d53e9d5fbfaa
 size 3025686376

 version https://git-lfs.github.com/spec/v1
+oid sha256:82b28427eca5db81abd6c0b41b5a828e9deac65e6e8d7d071ed00be3850a7dda
 size 3025686376

run.sh CHANGED Viewed

@@ -16,6 +16,7 @@ python run_speech_recognition_seq2seq.py \
 	--eval_steps="1000" \
 	--save_strategy="steps" \
 	--save_steps="1000" \
 	--generation_max_length="225" \
 	--preprocessing_num_workers="1" \
 	--dataloader_num_workers="4" \

 	--eval_steps="1000" \
 	--save_strategy="steps" \
 	--save_steps="1000" \
+	--save_total_limit="1" \
 	--generation_max_length="225" \
 	--preprocessing_num_workers="1" \
 	--dataloader_num_workers="4" \

runs/Mar27_19-04-58_hf-dgx-01/events.out.tfevents.1711562712.hf-dgx-01.1894903.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42d2088d006f005c7eb61e37b06f361c2c53843da4442f53136b5f5981f55f50
+size 14123

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 22.52,
+    "train_loss": 0.17524469082718716,
+    "train_runtime": 15083.6622,
+    "train_samples": 7099,
+    "train_samples_per_second": 10.608,
+    "train_steps_per_second": 0.331
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1475 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 22.52252252252252,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.11,
+      "grad_norm": 29.428333282470703,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 11.9112,
+      "step": 25
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 12.572431564331055,
+      "learning_rate": 9e-06,
+      "loss": 5.9607,
+      "step": 50
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 6.247668743133545,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 2.7899,
+      "step": 75
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 5.499792098999023,
+      "learning_rate": 1.9e-05,
+      "loss": 1.934,
+      "step": 100
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 10.862707138061523,
+      "learning_rate": 2.4e-05,
+      "loss": 1.1845,
+      "step": 125
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 6.8538055419921875,
+      "learning_rate": 2.9e-05,
+      "loss": 0.7883,
+      "step": 150
+    },
+    {
+      "epoch": 0.79,
+      "grad_norm": 8.127602577209473,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.6147,
+      "step": 175
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 4.003240585327148,
+      "learning_rate": 3.9000000000000006e-05,
+      "loss": 0.5233,
+      "step": 200
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 3.650707483291626,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.453,
+      "step": 225
+    },
+    {
+      "epoch": 1.13,
+      "grad_norm": 4.5928239822387695,
+      "learning_rate": 4.9e-05,
+      "loss": 0.3913,
+      "step": 250
+    },
+    {
+      "epoch": 1.24,
+      "grad_norm": 4.008325576782227,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.3729,
+      "step": 275
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 4.239988327026367,
+      "learning_rate": 5.9e-05,
+      "loss": 0.3544,
+      "step": 300
+    },
+    {
+      "epoch": 1.46,
+      "grad_norm": 3.8822410106658936,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.3229,
+      "step": 325
+    },
+    {
+      "epoch": 1.58,
+      "grad_norm": 3.0306766033172607,
+      "learning_rate": 6.9e-05,
+      "loss": 0.3357,
+      "step": 350
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 2.7435803413391113,
+      "learning_rate": 7.4e-05,
+      "loss": 0.3148,
+      "step": 375
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 3.684567928314209,
+      "learning_rate": 7.900000000000001e-05,
+      "loss": 0.2912,
+      "step": 400
+    },
+    {
+      "epoch": 1.91,
+      "grad_norm": 2.486985206604004,
+      "learning_rate": 8.4e-05,
+      "loss": 0.3058,
+      "step": 425
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 2.5083959102630615,
+      "learning_rate": 8.900000000000001e-05,
+      "loss": 0.2651,
+      "step": 450
+    },
+    {
+      "epoch": 2.14,
+      "grad_norm": 4.557464599609375,
+      "learning_rate": 9.4e-05,
+      "loss": 0.2339,
+      "step": 475
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 3.3180325031280518,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 0.2337,
+      "step": 500
+    },
+    {
+      "epoch": 2.36,
+      "grad_norm": 2.496147632598877,
+      "learning_rate": 9.955555555555556e-05,
+      "loss": 0.2372,
+      "step": 525
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 2.2330338954925537,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 0.2219,
+      "step": 550
+    },
+    {
+      "epoch": 2.59,
+      "grad_norm": 3.0495846271514893,
+      "learning_rate": 9.844444444444444e-05,
+      "loss": 0.2323,
+      "step": 575
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 2.3662843704223633,
+      "learning_rate": 9.78888888888889e-05,
+      "loss": 0.2324,
+      "step": 600
+    },
+    {
+      "epoch": 2.82,
+      "grad_norm": 1.981231451034546,
+      "learning_rate": 9.733333333333335e-05,
+      "loss": 0.2088,
+      "step": 625
+    },
+    {
+      "epoch": 2.93,
+      "grad_norm": 2.484710454940796,
+      "learning_rate": 9.677777777777778e-05,
+      "loss": 0.2195,
+      "step": 650
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 1.7488161325454712,
+      "learning_rate": 9.622222222222222e-05,
+      "loss": 0.1868,
+      "step": 675
+    },
+    {
+      "epoch": 3.15,
+      "grad_norm": 2.266071081161499,
+      "learning_rate": 9.566666666666667e-05,
+      "loss": 0.1537,
+      "step": 700
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 1.6045178174972534,
+      "learning_rate": 9.511111111111112e-05,
+      "loss": 0.157,
+      "step": 725
+    },
+    {
+      "epoch": 3.38,
+      "grad_norm": 1.8283653259277344,
+      "learning_rate": 9.455555555555556e-05,
+      "loss": 0.1516,
+      "step": 750
+    },
+    {
+      "epoch": 3.49,
+      "grad_norm": 2.1718389987945557,
+      "learning_rate": 9.4e-05,
+      "loss": 0.1657,
+      "step": 775
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 2.778785467147827,
+      "learning_rate": 9.344444444444444e-05,
+      "loss": 0.1529,
+      "step": 800
+    },
+    {
+      "epoch": 3.72,
+      "grad_norm": 2.0423874855041504,
+      "learning_rate": 9.28888888888889e-05,
+      "loss": 0.153,
+      "step": 825
+    },
+    {
+      "epoch": 3.83,
+      "grad_norm": 1.7835185527801514,
+      "learning_rate": 9.233333333333333e-05,
+      "loss": 0.1514,
+      "step": 850
+    },
+    {
+      "epoch": 3.94,
+      "grad_norm": 2.091015100479126,
+      "learning_rate": 9.177777777777778e-05,
+      "loss": 0.151,
+      "step": 875
+    },
+    {
+      "epoch": 4.05,
+      "grad_norm": 1.47210693359375,
+      "learning_rate": 9.122222222222223e-05,
+      "loss": 0.1248,
+      "step": 900
+    },
+    {
+      "epoch": 4.17,
+      "grad_norm": 1.5700939893722534,
+      "learning_rate": 9.066666666666667e-05,
+      "loss": 0.0955,
+      "step": 925
+    },
+    {
+      "epoch": 4.28,
+      "grad_norm": 1.0798161029815674,
+      "learning_rate": 9.011111111111111e-05,
+      "loss": 0.0965,
+      "step": 950
+    },
+    {
+      "epoch": 4.39,
+      "grad_norm": 1.250017523765564,
+      "learning_rate": 8.955555555555556e-05,
+      "loss": 0.1029,
+      "step": 975
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 1.3333516120910645,
+      "learning_rate": 8.900000000000001e-05,
+      "loss": 0.1015,
+      "step": 1000
+    },
+    {
+      "epoch": 4.5,
+      "eval_loss": 0.3065292239189148,
+      "eval_runtime": 1302.6648,
+      "eval_samples_per_second": 2.397,
+      "eval_steps_per_second": 0.075,
+      "eval_wer": 0.3243838368229931,
+      "step": 1000
+    },
+    {
+      "epoch": 4.62,
+      "grad_norm": 2.2534544467926025,
+      "learning_rate": 8.844444444444445e-05,
+      "loss": 0.1098,
+      "step": 1025
+    },
+    {
+      "epoch": 4.73,
+      "grad_norm": 1.6706323623657227,
+      "learning_rate": 8.78888888888889e-05,
+      "loss": 0.1066,
+      "step": 1050
+    },
+    {
+      "epoch": 4.84,
+      "grad_norm": 1.9353983402252197,
+      "learning_rate": 8.733333333333333e-05,
+      "loss": 0.1033,
+      "step": 1075
+    },
+    {
+      "epoch": 4.95,
+      "grad_norm": 1.833392858505249,
+      "learning_rate": 8.677777777777778e-05,
+      "loss": 0.1041,
+      "step": 1100
+    },
+    {
+      "epoch": 5.07,
+      "grad_norm": 1.094043254852295,
+      "learning_rate": 8.622222222222222e-05,
+      "loss": 0.0782,
+      "step": 1125
+    },
+    {
+      "epoch": 5.18,
+      "grad_norm": 1.6280676126480103,
+      "learning_rate": 8.566666666666667e-05,
+      "loss": 0.0604,
+      "step": 1150
+    },
+    {
+      "epoch": 5.29,
+      "grad_norm": 1.2326525449752808,
+      "learning_rate": 8.511111111111112e-05,
+      "loss": 0.0665,
+      "step": 1175
+    },
+    {
+      "epoch": 5.41,
+      "grad_norm": 1.186036467552185,
+      "learning_rate": 8.455555555555556e-05,
+      "loss": 0.0679,
+      "step": 1200
+    },
+    {
+      "epoch": 5.52,
+      "grad_norm": 1.3472570180892944,
+      "learning_rate": 8.4e-05,
+      "loss": 0.0656,
+      "step": 1225
+    },
+    {
+      "epoch": 5.63,
+      "grad_norm": 2.1403074264526367,
+      "learning_rate": 8.344444444444445e-05,
+      "loss": 0.0674,
+      "step": 1250
+    },
+    {
+      "epoch": 5.74,
+      "grad_norm": 1.0580947399139404,
+      "learning_rate": 8.28888888888889e-05,
+      "loss": 0.0713,
+      "step": 1275
+    },
+    {
+      "epoch": 5.86,
+      "grad_norm": 1.0808650255203247,
+      "learning_rate": 8.233333333333333e-05,
+      "loss": 0.0713,
+      "step": 1300
+    },
+    {
+      "epoch": 5.97,
+      "grad_norm": 1.0721344947814941,
+      "learning_rate": 8.177777777777778e-05,
+      "loss": 0.0707,
+      "step": 1325
+    },
+    {
+      "epoch": 6.08,
+      "grad_norm": 1.7433174848556519,
+      "learning_rate": 8.122222222222222e-05,
+      "loss": 0.0492,
+      "step": 1350
+    },
+    {
+      "epoch": 6.19,
+      "grad_norm": 0.9549305438995361,
+      "learning_rate": 8.066666666666667e-05,
+      "loss": 0.0418,
+      "step": 1375
+    },
+    {
+      "epoch": 6.31,
+      "grad_norm": 1.4030609130859375,
+      "learning_rate": 8.011111111111111e-05,
+      "loss": 0.0382,
+      "step": 1400
+    },
+    {
+      "epoch": 6.42,
+      "grad_norm": 0.9085283279418945,
+      "learning_rate": 7.955555555555556e-05,
+      "loss": 0.0369,
+      "step": 1425
+    },
+    {
+      "epoch": 6.53,
+      "grad_norm": 1.0393314361572266,
+      "learning_rate": 7.900000000000001e-05,
+      "loss": 0.0403,
+      "step": 1450
+    },
+    {
+      "epoch": 6.64,
+      "grad_norm": 0.675774872303009,
+      "learning_rate": 7.844444444444446e-05,
+      "loss": 0.0414,
+      "step": 1475
+    },
+    {
+      "epoch": 6.76,
+      "grad_norm": 0.8051535487174988,
+      "learning_rate": 7.788888888888888e-05,
+      "loss": 0.0426,
+      "step": 1500
+    },
+    {
+      "epoch": 6.87,
+      "grad_norm": 1.4626388549804688,
+      "learning_rate": 7.733333333333333e-05,
+      "loss": 0.0436,
+      "step": 1525
+    },
+    {
+      "epoch": 6.98,
+      "grad_norm": 0.8418045043945312,
+      "learning_rate": 7.677777777777778e-05,
+      "loss": 0.0442,
+      "step": 1550
+    },
+    {
+      "epoch": 7.09,
+      "grad_norm": 1.3747352361679077,
+      "learning_rate": 7.622222222222223e-05,
+      "loss": 0.0281,
+      "step": 1575
+    },
+    {
+      "epoch": 7.21,
+      "grad_norm": 0.5290963649749756,
+      "learning_rate": 7.566666666666667e-05,
+      "loss": 0.0237,
+      "step": 1600
+    },
+    {
+      "epoch": 7.32,
+      "grad_norm": 1.2137552499771118,
+      "learning_rate": 7.511111111111111e-05,
+      "loss": 0.0249,
+      "step": 1625
+    },
+    {
+      "epoch": 7.43,
+      "grad_norm": 0.7687398791313171,
+      "learning_rate": 7.455555555555556e-05,
+      "loss": 0.0261,
+      "step": 1650
+    },
+    {
+      "epoch": 7.55,
+      "grad_norm": 1.1545344591140747,
+      "learning_rate": 7.4e-05,
+      "loss": 0.0249,
+      "step": 1675
+    },
+    {
+      "epoch": 7.66,
+      "grad_norm": 0.7673143148422241,
+      "learning_rate": 7.344444444444445e-05,
+      "loss": 0.0248,
+      "step": 1700
+    },
+    {
+      "epoch": 7.77,
+      "grad_norm": 0.9905190467834473,
+      "learning_rate": 7.28888888888889e-05,
+      "loss": 0.0254,
+      "step": 1725
+    },
+    {
+      "epoch": 7.88,
+      "grad_norm": 1.764397382736206,
+      "learning_rate": 7.233333333333335e-05,
+      "loss": 0.0297,
+      "step": 1750
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.9069448709487915,
+      "learning_rate": 7.177777777777777e-05,
+      "loss": 0.0275,
+      "step": 1775
+    },
+    {
+      "epoch": 8.11,
+      "grad_norm": 1.1385760307312012,
+      "learning_rate": 7.122222222222222e-05,
+      "loss": 0.0162,
+      "step": 1800
+    },
+    {
+      "epoch": 8.22,
+      "grad_norm": 0.5694571733474731,
+      "learning_rate": 7.066666666666667e-05,
+      "loss": 0.0149,
+      "step": 1825
+    },
+    {
+      "epoch": 8.33,
+      "grad_norm": 1.0839495658874512,
+      "learning_rate": 7.011111111111112e-05,
+      "loss": 0.0175,
+      "step": 1850
+    },
+    {
+      "epoch": 8.45,
+      "grad_norm": 0.7086426019668579,
+      "learning_rate": 6.955555555555556e-05,
+      "loss": 0.0189,
+      "step": 1875
+    },
+    {
+      "epoch": 8.56,
+      "grad_norm": 0.9548362493515015,
+      "learning_rate": 6.9e-05,
+      "loss": 0.0193,
+      "step": 1900
+    },
+    {
+      "epoch": 8.67,
+      "grad_norm": 0.9621508717536926,
+      "learning_rate": 6.844444444444445e-05,
+      "loss": 0.0186,
+      "step": 1925
+    },
+    {
+      "epoch": 8.78,
+      "grad_norm": 0.6629220843315125,
+      "learning_rate": 6.788888888888888e-05,
+      "loss": 0.0171,
+      "step": 1950
+    },
+    {
+      "epoch": 8.9,
+      "grad_norm": 0.7981088161468506,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 0.0175,
+      "step": 1975
+    },
+    {
+      "epoch": 9.01,
+      "grad_norm": 0.45495709776878357,
+      "learning_rate": 6.677777777777779e-05,
+      "loss": 0.0167,
+      "step": 2000
+    },
+    {
+      "epoch": 9.01,
+      "eval_loss": 0.3443203866481781,
+      "eval_runtime": 1269.8219,
+      "eval_samples_per_second": 2.459,
+      "eval_steps_per_second": 0.077,
+      "eval_wer": 0.2994668933013984,
+      "step": 2000
+    },
+    {
+      "epoch": 9.12,
+      "grad_norm": 1.0250108242034912,
+      "learning_rate": 6.622222222222224e-05,
+      "loss": 0.0124,
+      "step": 2025
+    },
+    {
+      "epoch": 9.23,
+      "grad_norm": 0.533909022808075,
+      "learning_rate": 6.566666666666666e-05,
+      "loss": 0.0128,
+      "step": 2050
+    },
+    {
+      "epoch": 9.35,
+      "grad_norm": 0.5022910237312317,
+      "learning_rate": 6.511111111111111e-05,
+      "loss": 0.0127,
+      "step": 2075
+    },
+    {
+      "epoch": 9.46,
+      "grad_norm": 1.3371328115463257,
+      "learning_rate": 6.455555555555556e-05,
+      "loss": 0.0116,
+      "step": 2100
+    },
+    {
+      "epoch": 9.57,
+      "grad_norm": 1.2396471500396729,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.0112,
+      "step": 2125
+    },
+    {
+      "epoch": 9.68,
+      "grad_norm": 1.2121708393096924,
+      "learning_rate": 6.344444444444445e-05,
+      "loss": 0.0107,
+      "step": 2150
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 1.3228121995925903,
+      "learning_rate": 6.28888888888889e-05,
+      "loss": 0.0108,
+      "step": 2175
+    },
+    {
+      "epoch": 9.91,
+      "grad_norm": 0.6204155683517456,
+      "learning_rate": 6.233333333333334e-05,
+      "loss": 0.0123,
+      "step": 2200
+    },
+    {
+      "epoch": 10.02,
+      "grad_norm": 0.4221612811088562,
+      "learning_rate": 6.177777777777779e-05,
+      "loss": 0.0117,
+      "step": 2225
+    },
+    {
+      "epoch": 10.14,
+      "grad_norm": 0.8225328922271729,
+      "learning_rate": 6.122222222222222e-05,
+      "loss": 0.008,
+      "step": 2250
+    },
+    {
+      "epoch": 10.25,
+      "grad_norm": 0.22648921608924866,
+      "learning_rate": 6.066666666666667e-05,
+      "loss": 0.0075,
+      "step": 2275
+    },
+    {
+      "epoch": 10.36,
+      "grad_norm": 1.0620574951171875,
+      "learning_rate": 6.011111111111112e-05,
+      "loss": 0.0077,
+      "step": 2300
+    },
+    {
+      "epoch": 10.47,
+      "grad_norm": 0.5009572505950928,
+      "learning_rate": 5.9555555555555554e-05,
+      "loss": 0.008,
+      "step": 2325
+    },
+    {
+      "epoch": 10.59,
+      "grad_norm": 0.6466513872146606,
+      "learning_rate": 5.9e-05,
+      "loss": 0.0098,
+      "step": 2350
+    },
+    {
+      "epoch": 10.7,
+      "grad_norm": 0.2255641371011734,
+      "learning_rate": 5.844444444444445e-05,
+      "loss": 0.0094,
+      "step": 2375
+    },
+    {
+      "epoch": 10.81,
+      "grad_norm": 0.838545560836792,
+      "learning_rate": 5.788888888888889e-05,
+      "loss": 0.0089,
+      "step": 2400
+    },
+    {
+      "epoch": 10.92,
+      "grad_norm": 0.6793853044509888,
+      "learning_rate": 5.7333333333333336e-05,
+      "loss": 0.0087,
+      "step": 2425
+    },
+    {
+      "epoch": 11.04,
+      "grad_norm": 0.548841655254364,
+      "learning_rate": 5.6777777777777786e-05,
+      "loss": 0.0069,
+      "step": 2450
+    },
+    {
+      "epoch": 11.15,
+      "grad_norm": 0.22741466760635376,
+      "learning_rate": 5.622222222222222e-05,
+      "loss": 0.0065,
+      "step": 2475
+    },
+    {
+      "epoch": 11.26,
+      "grad_norm": 0.4155316650867462,
+      "learning_rate": 5.566666666666667e-05,
+      "loss": 0.0058,
+      "step": 2500
+    },
+    {
+      "epoch": 11.37,
+      "grad_norm": 0.48344260454177856,
+      "learning_rate": 5.511111111111111e-05,
+      "loss": 0.005,
+      "step": 2525
+    },
+    {
+      "epoch": 11.49,
+      "grad_norm": 0.9006750583648682,
+      "learning_rate": 5.455555555555556e-05,
+      "loss": 0.0045,
+      "step": 2550
+    },
+    {
+      "epoch": 11.6,
+      "grad_norm": 0.9966240525245667,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.0047,
+      "step": 2575
+    },
+    {
+      "epoch": 11.71,
+      "grad_norm": 0.39858147501945496,
+      "learning_rate": 5.3444444444444455e-05,
+      "loss": 0.0053,
+      "step": 2600
+    },
+    {
+      "epoch": 11.82,
+      "grad_norm": 0.6118489503860474,
+      "learning_rate": 5.2888888888888885e-05,
+      "loss": 0.0053,
+      "step": 2625
+    },
+    {
+      "epoch": 11.94,
+      "grad_norm": 0.5074841976165771,
+      "learning_rate": 5.2333333333333336e-05,
+      "loss": 0.0057,
+      "step": 2650
+    },
+    {
+      "epoch": 12.05,
+      "grad_norm": 0.6888458728790283,
+      "learning_rate": 5.177777777777778e-05,
+      "loss": 0.0053,
+      "step": 2675
+    },
+    {
+      "epoch": 12.16,
+      "grad_norm": 0.7311161160469055,
+      "learning_rate": 5.122222222222223e-05,
+      "loss": 0.006,
+      "step": 2700
+    },
+    {
+      "epoch": 12.27,
+      "grad_norm": 0.47264620661735535,
+      "learning_rate": 5.0666666666666674e-05,
+      "loss": 0.0058,
+      "step": 2725
+    },
+    {
+      "epoch": 12.39,
+      "grad_norm": 0.6639235019683838,
+      "learning_rate": 5.011111111111111e-05,
+      "loss": 0.0052,
+      "step": 2750
+    },
+    {
+      "epoch": 12.5,
+      "grad_norm": 0.1161256805062294,
+      "learning_rate": 4.955555555555556e-05,
+      "loss": 0.0038,
+      "step": 2775
+    },
+    {
+      "epoch": 12.61,
+      "grad_norm": 0.4923400580883026,
+      "learning_rate": 4.9e-05,
+      "loss": 0.0036,
+      "step": 2800
+    },
+    {
+      "epoch": 12.73,
+      "grad_norm": 0.6149506568908691,
+      "learning_rate": 4.844444444444445e-05,
+      "loss": 0.0046,
+      "step": 2825
+    },
+    {
+      "epoch": 12.84,
+      "grad_norm": 0.16888651251792908,
+      "learning_rate": 4.7888888888888886e-05,
+      "loss": 0.0041,
+      "step": 2850
+    },
+    {
+      "epoch": 12.95,
+      "grad_norm": 1.0652014017105103,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.0041,
+      "step": 2875
+    },
+    {
+      "epoch": 13.06,
+      "grad_norm": 0.21759897470474243,
+      "learning_rate": 4.677777777777778e-05,
+      "loss": 0.003,
+      "step": 2900
+    },
+    {
+      "epoch": 13.18,
+      "grad_norm": 0.23394200205802917,
+      "learning_rate": 4.6222222222222224e-05,
+      "loss": 0.0034,
+      "step": 2925
+    },
+    {
+      "epoch": 13.29,
+      "grad_norm": 0.05768038332462311,
+      "learning_rate": 4.566666666666667e-05,
+      "loss": 0.0037,
+      "step": 2950
+    },
+    {
+      "epoch": 13.4,
+      "grad_norm": 0.08611828088760376,
+      "learning_rate": 4.511111111111112e-05,
+      "loss": 0.0034,
+      "step": 2975
+    },
+    {
+      "epoch": 13.51,
+      "grad_norm": 0.1028035581111908,
+      "learning_rate": 4.4555555555555555e-05,
+      "loss": 0.0032,
+      "step": 3000
+    },
+    {
+      "epoch": 13.51,
+      "eval_loss": 0.3575945198535919,
+      "eval_runtime": 1297.8448,
+      "eval_samples_per_second": 2.406,
+      "eval_steps_per_second": 0.076,
+      "eval_wer": 0.27779494707563934,
+      "step": 3000
+    },
+    {
+      "epoch": 13.63,
+      "grad_norm": 0.23182912170886993,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.0027,
+      "step": 3025
+    },
+    {
+      "epoch": 13.74,
+      "grad_norm": 0.100206658244133,
+      "learning_rate": 4.344444444444445e-05,
+      "loss": 0.0027,
+      "step": 3050
+    },
+    {
+      "epoch": 13.85,
+      "grad_norm": 0.9118719100952148,
+      "learning_rate": 4.2888888888888886e-05,
+      "loss": 0.003,
+      "step": 3075
+    },
+    {
+      "epoch": 13.96,
+      "grad_norm": 0.06793611496686935,
+      "learning_rate": 4.233333333333334e-05,
+      "loss": 0.003,
+      "step": 3100
+    },
+    {
+      "epoch": 14.08,
+      "grad_norm": 0.0683990940451622,
+      "learning_rate": 4.177777777777778e-05,
+      "loss": 0.0021,
+      "step": 3125
+    },
+    {
+      "epoch": 14.19,
+      "grad_norm": 0.19087089598178864,
+      "learning_rate": 4.1222222222222224e-05,
+      "loss": 0.0028,
+      "step": 3150
+    },
+    {
+      "epoch": 14.3,
+      "grad_norm": 0.14526407420635223,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.0025,
+      "step": 3175
+    },
+    {
+      "epoch": 14.41,
+      "grad_norm": 0.5902572870254517,
+      "learning_rate": 4.011111111111111e-05,
+      "loss": 0.0031,
+      "step": 3200
+    },
+    {
+      "epoch": 14.53,
+      "grad_norm": 0.1988796442747116,
+      "learning_rate": 3.9555555555555556e-05,
+      "loss": 0.0021,
+      "step": 3225
+    },
+    {
+      "epoch": 14.64,
+      "grad_norm": 0.178738534450531,
+      "learning_rate": 3.9000000000000006e-05,
+      "loss": 0.0031,
+      "step": 3250
+    },
+    {
+      "epoch": 14.75,
+      "grad_norm": 0.03732344135642052,
+      "learning_rate": 3.844444444444444e-05,
+      "loss": 0.0026,
+      "step": 3275
+    },
+    {
+      "epoch": 14.86,
+      "grad_norm": 0.047354888170957565,
+      "learning_rate": 3.7888888888888894e-05,
+      "loss": 0.0016,
+      "step": 3300
+    },
+    {
+      "epoch": 14.98,
+      "grad_norm": 0.058274924755096436,
+      "learning_rate": 3.733333333333334e-05,
+      "loss": 0.0019,
+      "step": 3325
+    },
+    {
+      "epoch": 15.09,
+      "grad_norm": 1.4180477857589722,
+      "learning_rate": 3.677777777777778e-05,
+      "loss": 0.0016,
+      "step": 3350
+    },
+    {
+      "epoch": 15.2,
+      "grad_norm": 0.03281378000974655,
+      "learning_rate": 3.6222222222222225e-05,
+      "loss": 0.0016,
+      "step": 3375
+    },
+    {
+      "epoch": 15.32,
+      "grad_norm": 0.2159404158592224,
+      "learning_rate": 3.566666666666667e-05,
+      "loss": 0.0026,
+      "step": 3400
+    },
+    {
+      "epoch": 15.43,
+      "grad_norm": 0.18890638649463654,
+      "learning_rate": 3.511111111111111e-05,
+      "loss": 0.0016,
+      "step": 3425
+    },
+    {
+      "epoch": 15.54,
+      "grad_norm": 0.022921651601791382,
+      "learning_rate": 3.4555555555555556e-05,
+      "loss": 0.0012,
+      "step": 3450
+    },
+    {
+      "epoch": 15.65,
+      "grad_norm": 0.02838265895843506,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.0014,
+      "step": 3475
+    },
+    {
+      "epoch": 15.77,
+      "grad_norm": 0.04957688972353935,
+      "learning_rate": 3.3444444444444443e-05,
+      "loss": 0.0012,
+      "step": 3500
+    },
+    {
+      "epoch": 15.88,
+      "grad_norm": 0.03910296410322189,
+      "learning_rate": 3.2888888888888894e-05,
+      "loss": 0.0008,
+      "step": 3525
+    },
+    {
+      "epoch": 15.99,
+      "grad_norm": 0.3031899034976959,
+      "learning_rate": 3.233333333333333e-05,
+      "loss": 0.0015,
+      "step": 3550
+    },
+    {
+      "epoch": 16.1,
+      "grad_norm": 0.026370937004685402,
+      "learning_rate": 3.177777777777778e-05,
+      "loss": 0.0009,
+      "step": 3575
+    },
+    {
+      "epoch": 16.22,
+      "grad_norm": 0.04645024240016937,
+      "learning_rate": 3.1222222222222225e-05,
+      "loss": 0.0014,
+      "step": 3600
+    },
+    {
+      "epoch": 16.33,
+      "grad_norm": 0.03346904739737511,
+      "learning_rate": 3.066666666666667e-05,
+      "loss": 0.001,
+      "step": 3625
+    },
+    {
+      "epoch": 16.44,
+      "grad_norm": 0.41791531443595886,
+      "learning_rate": 3.0111111111111113e-05,
+      "loss": 0.0019,
+      "step": 3650
+    },
+    {
+      "epoch": 16.55,
+      "grad_norm": 0.023621816188097,
+      "learning_rate": 2.955555555555556e-05,
+      "loss": 0.0009,
+      "step": 3675
+    },
+    {
+      "epoch": 16.67,
+      "grad_norm": 0.020701350644230843,
+      "learning_rate": 2.9e-05,
+      "loss": 0.0009,
+      "step": 3700
+    },
+    {
+      "epoch": 16.78,
+      "grad_norm": 0.018095409497618675,
+      "learning_rate": 2.8444444444444447e-05,
+      "loss": 0.0007,
+      "step": 3725
+    },
+    {
+      "epoch": 16.89,
+      "grad_norm": 0.03800148516893387,
+      "learning_rate": 2.788888888888889e-05,
+      "loss": 0.001,
+      "step": 3750
+    },
+    {
+      "epoch": 17.0,
+      "grad_norm": 0.0219491608440876,
+      "learning_rate": 2.733333333333333e-05,
+      "loss": 0.0012,
+      "step": 3775
+    },
+    {
+      "epoch": 17.12,
+      "grad_norm": 0.19971542060375214,
+      "learning_rate": 2.677777777777778e-05,
+      "loss": 0.001,
+      "step": 3800
+    },
+    {
+      "epoch": 17.23,
+      "grad_norm": 0.022324278950691223,
+      "learning_rate": 2.6222222222222226e-05,
+      "loss": 0.0005,
+      "step": 3825
+    },
+    {
+      "epoch": 17.34,
+      "grad_norm": 0.014598184265196323,
+      "learning_rate": 2.5666666666666666e-05,
+      "loss": 0.0007,
+      "step": 3850
+    },
+    {
+      "epoch": 17.45,
+      "grad_norm": 0.01482320111244917,
+      "learning_rate": 2.5111111111111113e-05,
+      "loss": 0.0008,
+      "step": 3875
+    },
+    {
+      "epoch": 17.57,
+      "grad_norm": 0.019341906532645226,
+      "learning_rate": 2.4555555555555557e-05,
+      "loss": 0.0005,
+      "step": 3900
+    },
+    {
+      "epoch": 17.68,
+      "grad_norm": 0.044308606535196304,
+      "learning_rate": 2.4e-05,
+      "loss": 0.0008,
+      "step": 3925
+    },
+    {
+      "epoch": 17.79,
+      "grad_norm": 0.01700867898762226,
+      "learning_rate": 2.3444444444444448e-05,
+      "loss": 0.0009,
+      "step": 3950
+    },
+    {
+      "epoch": 17.91,
+      "grad_norm": 0.01428561843931675,
+      "learning_rate": 2.288888888888889e-05,
+      "loss": 0.0004,
+      "step": 3975
+    },
+    {
+      "epoch": 18.02,
+      "grad_norm": 0.011909844353795052,
+      "learning_rate": 2.2333333333333335e-05,
+      "loss": 0.0004,
+      "step": 4000
+    },
+    {
+      "epoch": 18.02,
+      "eval_loss": 0.36695417761802673,
+      "eval_runtime": 1296.9402,
+      "eval_samples_per_second": 2.408,
+      "eval_steps_per_second": 0.076,
+      "eval_wer": 0.2677122769064359,
+      "step": 4000
+    },
+    {
+      "epoch": 18.13,
+      "grad_norm": 0.011953528970479965,
+      "learning_rate": 2.177777777777778e-05,
+      "loss": 0.0004,
+      "step": 4025
+    },
+    {
+      "epoch": 18.24,
+      "grad_norm": 0.013035556301474571,
+      "learning_rate": 2.1222222222222223e-05,
+      "loss": 0.0005,
+      "step": 4050
+    },
+    {
+      "epoch": 18.36,
+      "grad_norm": 0.011018014512956142,
+      "learning_rate": 2.0666666666666666e-05,
+      "loss": 0.0003,
+      "step": 4075
+    },
+    {
+      "epoch": 18.47,
+      "grad_norm": 0.011594709008932114,
+      "learning_rate": 2.011111111111111e-05,
+      "loss": 0.0004,
+      "step": 4100
+    },
+    {
+      "epoch": 18.58,
+      "grad_norm": 0.01165748666971922,
+      "learning_rate": 1.9555555555555557e-05,
+      "loss": 0.0003,
+      "step": 4125
+    },
+    {
+      "epoch": 18.69,
+      "grad_norm": 0.012751756235957146,
+      "learning_rate": 1.9e-05,
+      "loss": 0.0003,
+      "step": 4150
+    },
+    {
+      "epoch": 18.81,
+      "grad_norm": 0.01092427410185337,
+      "learning_rate": 1.8444444444444445e-05,
+      "loss": 0.0003,
+      "step": 4175
+    },
+    {
+      "epoch": 18.92,
+      "grad_norm": 0.010369419120252132,
+      "learning_rate": 1.788888888888889e-05,
+      "loss": 0.0007,
+      "step": 4200
+    },
+    {
+      "epoch": 19.03,
+      "grad_norm": 0.009451022371649742,
+      "learning_rate": 1.7333333333333336e-05,
+      "loss": 0.0004,
+      "step": 4225
+    },
+    {
+      "epoch": 19.14,
+      "grad_norm": 0.010264468379318714,
+      "learning_rate": 1.677777777777778e-05,
+      "loss": 0.0003,
+      "step": 4250
+    },
+    {
+      "epoch": 19.26,
+      "grad_norm": 0.009353878907859325,
+      "learning_rate": 1.6222222222222223e-05,
+      "loss": 0.0003,
+      "step": 4275
+    },
+    {
+      "epoch": 19.37,
+      "grad_norm": 0.007795905694365501,
+      "learning_rate": 1.5666666666666667e-05,
+      "loss": 0.0003,
+      "step": 4300
+    },
+    {
+      "epoch": 19.48,
+      "grad_norm": 0.009554468095302582,
+      "learning_rate": 1.5111111111111112e-05,
+      "loss": 0.0004,
+      "step": 4325
+    },
+    {
+      "epoch": 19.59,
+      "grad_norm": 0.009386077523231506,
+      "learning_rate": 1.4555555555555556e-05,
+      "loss": 0.0003,
+      "step": 4350
+    },
+    {
+      "epoch": 19.71,
+      "grad_norm": 0.007565716747194529,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 0.0004,
+      "step": 4375
+    },
+    {
+      "epoch": 19.82,
+      "grad_norm": 0.011739292182028294,
+      "learning_rate": 1.3444444444444445e-05,
+      "loss": 0.0003,
+      "step": 4400
+    },
+    {
+      "epoch": 19.93,
+      "grad_norm": 0.011955379508435726,
+      "learning_rate": 1.2888888888888889e-05,
+      "loss": 0.0004,
+      "step": 4425
+    },
+    {
+      "epoch": 20.05,
+      "grad_norm": 0.007369581609964371,
+      "learning_rate": 1.2333333333333334e-05,
+      "loss": 0.0004,
+      "step": 4450
+    },
+    {
+      "epoch": 20.16,
+      "grad_norm": 0.010209435597062111,
+      "learning_rate": 1.1777777777777778e-05,
+      "loss": 0.0003,
+      "step": 4475
+    },
+    {
+      "epoch": 20.27,
+      "grad_norm": 0.009368482045829296,
+      "learning_rate": 1.1222222222222224e-05,
+      "loss": 0.0003,
+      "step": 4500
+    },
+    {
+      "epoch": 20.38,
+      "grad_norm": 0.008915912359952927,
+      "learning_rate": 1.0666666666666667e-05,
+      "loss": 0.0003,
+      "step": 4525
+    },
+    {
+      "epoch": 20.5,
+      "grad_norm": 0.01048735436052084,
+      "learning_rate": 1.0111111111111111e-05,
+      "loss": 0.0003,
+      "step": 4550
+    },
+    {
+      "epoch": 20.61,
+      "grad_norm": 0.010569226928055286,
+      "learning_rate": 9.555555555555556e-06,
+      "loss": 0.0003,
+      "step": 4575
+    },
+    {
+      "epoch": 20.72,
+      "grad_norm": 0.008401792496442795,
+      "learning_rate": 9e-06,
+      "loss": 0.0003,
+      "step": 4600
+    },
+    {
+      "epoch": 20.83,
+      "grad_norm": 0.01062182616442442,
+      "learning_rate": 8.444444444444446e-06,
+      "loss": 0.0003,
+      "step": 4625
+    },
+    {
+      "epoch": 20.95,
+      "grad_norm": 0.007442856673151255,
+      "learning_rate": 7.88888888888889e-06,
+      "loss": 0.0004,
+      "step": 4650
+    },
+    {
+      "epoch": 21.06,
+      "grad_norm": 0.007747430354356766,
+      "learning_rate": 7.333333333333334e-06,
+      "loss": 0.0003,
+      "step": 4675
+    },
+    {
+      "epoch": 21.17,
+      "grad_norm": 0.008953329175710678,
+      "learning_rate": 6.777777777777779e-06,
+      "loss": 0.0003,
+      "step": 4700
+    },
+    {
+      "epoch": 21.28,
+      "grad_norm": 0.0087329912930727,
+      "learning_rate": 6.222222222222222e-06,
+      "loss": 0.0003,
+      "step": 4725
+    },
+    {
+      "epoch": 21.4,
+      "grad_norm": 0.007937785238027573,
+      "learning_rate": 5.666666666666667e-06,
+      "loss": 0.0003,
+      "step": 4750
+    },
+    {
+      "epoch": 21.51,
+      "grad_norm": 0.007708992809057236,
+      "learning_rate": 5.1111111111111115e-06,
+      "loss": 0.0003,
+      "step": 4775
+    },
+    {
+      "epoch": 21.62,
+      "grad_norm": 0.011778591200709343,
+      "learning_rate": 4.555555555555556e-06,
+      "loss": 0.0003,
+      "step": 4800
+    },
+    {
+      "epoch": 21.73,
+      "grad_norm": 0.00828944519162178,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0002,
+      "step": 4825
+    },
+    {
+      "epoch": 21.85,
+      "grad_norm": 0.007438404019922018,
+      "learning_rate": 3.4444444444444444e-06,
+      "loss": 0.0003,
+      "step": 4850
+    },
+    {
+      "epoch": 21.96,
+      "grad_norm": 0.007443991024047136,
+      "learning_rate": 2.888888888888889e-06,
+      "loss": 0.0003,
+      "step": 4875
+    },
+    {
+      "epoch": 22.07,
+      "grad_norm": 0.008769960142672062,
+      "learning_rate": 2.3333333333333336e-06,
+      "loss": 0.0003,
+      "step": 4900
+    },
+    {
+      "epoch": 22.18,
+      "grad_norm": 0.008519369177520275,
+      "learning_rate": 1.777777777777778e-06,
+      "loss": 0.0003,
+      "step": 4925
+    },
+    {
+      "epoch": 22.3,
+      "grad_norm": 0.007310151122510433,
+      "learning_rate": 1.2222222222222223e-06,
+      "loss": 0.0002,
+      "step": 4950
+    },
+    {
+      "epoch": 22.41,
+      "grad_norm": 0.0072664907202124596,
+      "learning_rate": 6.666666666666667e-07,
+      "loss": 0.0002,
+      "step": 4975
+    },
+    {
+      "epoch": 22.52,
+      "grad_norm": 0.00765978591516614,
+      "learning_rate": 1.1111111111111112e-07,
+      "loss": 0.0003,
+      "step": 5000
+    },
+    {
+      "epoch": 22.52,
+      "eval_loss": 0.3728739619255066,
+      "eval_runtime": 1237.2295,
+      "eval_samples_per_second": 2.524,
+      "eval_steps_per_second": 0.079,
+      "eval_wer": 0.2660897782585181,
+      "step": 5000
+    },
+    {
+      "epoch": 22.52,
+      "step": 5000,
+      "total_flos": 2.532745423355904e+20,
+      "train_loss": 0.17524469082718716,
+      "train_runtime": 15083.6622,
+      "train_samples_per_second": 10.608,
+      "train_steps_per_second": 0.331
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 23,
+  "save_steps": 1000,
+  "total_flos": 2.532745423355904e+20,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f27ea4e0d69ad73da18d7df2aac11132046f87eda8cb3c5ff28639d1fba157c7
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad2da03ce66289217d1fa50c2801f63f453b9bfc44d54b73414b3331a94379e0
 size 5048

wandb/debug-internal.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug.log CHANGED Viewed

@@ -1,28 +1,28 @@
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Current SDK version is 0.16.2
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Configure stats pid to 1482814
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/settings
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program_abspath': '/home/sanchit/distil-large-v3-hi-ft-frozen-encoder/run_speech_recognition_seq2seq.py', 'program': 'run_speech_recognition_seq2seq.py'}
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_init.py:_log_setup():526] Logging user logs to /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/run-20240327_141033-golaq7b9/logs/debug.log
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_init.py:_log_setup():527] Logging internal logs to /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/run-20240327_141033-golaq7b9/logs/debug-internal.log
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_init.py:init():566] calling init triggers
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
 config: {}
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_init.py:init():616] starting backend
-2024-03-27 14:10:33,211 INFO    MainThread:1482814 [wandb_init.py:init():620] setting up manager
-2024-03-27 14:10:33,212 INFO    MainThread:1482814 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2024-03-27 14:10:33,213 INFO    MainThread:1482814 [wandb_init.py:init():628] backend started and connected
-2024-03-27 14:10:33,217 INFO    MainThread:1482814 [wandb_init.py:init():720] updated telemetry
-2024-03-27 14:10:33,272 INFO    MainThread:1482814 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
-2024-03-27 14:10:33,578 INFO    MainThread:1482814 [wandb_run.py:_on_init():2254] communicating current version
-2024-03-27 14:10:33,602 INFO    MainThread:1482814 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.5 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-2024-03-27 14:10:33,602 INFO    MainThread:1482814 [wandb_init.py:init():804] starting run threads in backend
-2024-03-27 14:10:34,006 INFO    MainThread:1482814 [wandb_run.py:_console_start():2233] atexit reg
-2024-03-27 14:10:34,006 INFO    MainThread:1482814 [wandb_run.py:_redirect():2088] redirect: wrap_raw
-2024-03-27 14:10:34,006 INFO    MainThread:1482814 [wandb_run.py:_redirect():2153] Wrapping output streams.
-2024-03-27 14:10:34,007 INFO    MainThread:1482814 [wandb_run.py:_redirect():2178] Redirects installed.
-2024-03-27 14:10:34,007 INFO    MainThread:1482814 [wandb_init.py:init():847] run started, returning control to user process
-2024-03-27 14:10:34,009 INFO    MainThread:1482814 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 2, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distil-whisper/distil-large-v3', 'transformers_version': '4.40.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 5000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Mar27_14-10-22_hf-dgx-01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}

+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Current SDK version is 0.16.2
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Configure stats pid to 1894903
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/settings
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program_abspath': '/home/sanchit/distil-large-v3-hi-ft-frozen-encoder/run_speech_recognition_seq2seq.py', 'program': 'run_speech_recognition_seq2seq.py'}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:_log_setup():526] Logging user logs to /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/run-20240327_190513-7p2x8a0l/logs/debug.log
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:_log_setup():527] Logging internal logs to /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/run-20240327_190513-7p2x8a0l/logs/debug-internal.log
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():566] calling init triggers
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
 config: {}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():616] starting backend
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():620] setting up manager
+2024-03-27 19:05:13,646 INFO    MainThread:1894903 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-03-27 19:05:13,647 INFO    MainThread:1894903 [wandb_init.py:init():628] backend started and connected
+2024-03-27 19:05:13,651 INFO    MainThread:1894903 [wandb_init.py:init():720] updated telemetry
+2024-03-27 19:05:13,720 INFO    MainThread:1894903 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
+2024-03-27 19:05:14,027 INFO    MainThread:1894903 [wandb_run.py:_on_init():2254] communicating current version
+2024-03-27 19:05:14,056 INFO    MainThread:1894903 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.5 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+2024-03-27 19:05:14,056 INFO    MainThread:1894903 [wandb_init.py:init():804] starting run threads in backend
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_console_start():2233] atexit reg
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_redirect():2088] redirect: wrap_raw
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_redirect():2153] Wrapping output streams.
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_redirect():2178] Redirects installed.
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_init.py:init():847] run started, returning control to user process
+2024-03-27 19:05:14,729 INFO    MainThread:1894903 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 2, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distil-whisper/distil-large-v3', 'transformers_version': '4.40.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 5000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Mar27_19-04-58_hf-dgx-01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}

wandb/run-20240327_190513-7p2x8a0l/files/config.yaml ADDED Viewed

	@@ -0,0 +1,751 @@

+wandb_version: 1
+_wandb:
+  desc: null
+  value:
+    python_version: 3.8.10
+    cli_version: 0.16.2
+    framework: huggingface
+    huggingface_version: 4.40.0.dev0
+    is_jupyter_run: false
+    is_kaggle_kernel: false
+    start_time: 1711562713.647808
+    t:
+      1:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 98
+      - 100
+      2:
+      - 1
+      - 2
+      - 3
+      - 5
+      - 11
+      - 12
+      - 49
+      - 51
+      - 53
+      - 55
+      - 71
+      - 98
+      - 100
+      3:
+      - 7
+      - 23
+      4: 3.8.10
+      5: 0.16.2
+      6: 4.40.0.dev0
+      8:
+      - 5
+      9:
+        1: transformers_trainer
+      13: linux-x86_64
+    m:
+    - 1: train/global_step
+      6:
+      - 3
+    - 1: train/loss
+      5: 1
+      6:
+      - 1
+    - 1: train/grad_norm
+      5: 1
+      6:
+      - 1
+    - 1: train/learning_rate
+      5: 1
+      6:
+      - 1
+    - 1: train/epoch
+      5: 1
+      6:
+      - 1
+    - 1: eval/loss
+      5: 1
+      6:
+      - 1
+    - 1: eval/wer
+      5: 1
+      6:
+      - 1
+    - 1: eval/runtime
+      5: 1
+      6:
+      - 1
+    - 1: eval/samples_per_second
+      5: 1
+      6:
+      - 1
+    - 1: eval/steps_per_second
+      5: 1
+      6:
+      - 1
+vocab_size:
+  desc: null
+  value: 51866
+num_mel_bins:
+  desc: null
+  value: 128
+d_model:
+  desc: null
+  value: 1280
+encoder_layers:
+  desc: null
+  value: 32
+encoder_attention_heads:
+  desc: null
+  value: 20
+decoder_layers:
+  desc: null
+  value: 2
+decoder_attention_heads:
+  desc: null
+  value: 20
+decoder_ffn_dim:
+  desc: null
+  value: 5120
+encoder_ffn_dim:
+  desc: null
+  value: 5120
+dropout:
+  desc: null
+  value: 0.0
+attention_dropout:
+  desc: null
+  value: 0.0
+activation_dropout:
+  desc: null
+  value: 0.0
+activation_function:
+  desc: null
+  value: gelu
+init_std:
+  desc: null
+  value: 0.02
+encoder_layerdrop:
+  desc: null
+  value: 0.0
+decoder_layerdrop:
+  desc: null
+  value: 0.0
+use_cache:
+  desc: null
+  value: true
+num_hidden_layers:
+  desc: null
+  value: 32
+scale_embedding:
+  desc: null
+  value: false
+max_source_positions:
+  desc: null
+  value: 1500
+max_target_positions:
+  desc: null
+  value: 448
+classifier_proj_size:
+  desc: null
+  value: 256
+use_weighted_layer_sum:
+  desc: null
+  value: false
+apply_spec_augment:
+  desc: null
+  value: false
+mask_time_prob:
+  desc: null
+  value: 0.05
+mask_time_length:
+  desc: null
+  value: 10
+mask_time_min_masks:
+  desc: null
+  value: 2
+mask_feature_prob:
+  desc: null
+  value: 0.0
+mask_feature_length:
+  desc: null
+  value: 10
+mask_feature_min_masks:
+  desc: null
+  value: 0
+median_filter_width:
+  desc: null
+  value: 7
+return_dict:
+  desc: null
+  value: true
+output_hidden_states:
+  desc: null
+  value: false
+output_attentions:
+  desc: null
+  value: false
+torchscript:
+  desc: null
+  value: false
+torch_dtype:
+  desc: null
+  value: float16
+use_bfloat16:
+  desc: null
+  value: false
+tf_legacy_loss:
+  desc: null
+  value: false
+pruned_heads:
+  desc: null
+  value: {}
+tie_word_embeddings:
+  desc: null
+  value: true
+chunk_size_feed_forward:
+  desc: null
+  value: 0
+is_encoder_decoder:
+  desc: null
+  value: true
+is_decoder:
+  desc: null
+  value: false
+cross_attention_hidden_size:
+  desc: null
+  value: null
+add_cross_attention:
+  desc: null
+  value: false
+tie_encoder_decoder:
+  desc: null
+  value: false
+max_length:
+  desc: null
+  value: 448
+min_length:
+  desc: null
+  value: 0
+do_sample:
+  desc: null
+  value: false
+early_stopping:
+  desc: null
+  value: false
+num_beams:
+  desc: null
+  value: 1
+num_beam_groups:
+  desc: null
+  value: 1
+diversity_penalty:
+  desc: null
+  value: 0.0
+temperature:
+  desc: null
+  value: 1.0
+top_k:
+  desc: null
+  value: 50
+top_p:
+  desc: null
+  value: 1.0
+typical_p:
+  desc: null
+  value: 1.0
+repetition_penalty:
+  desc: null
+  value: 1.0
+length_penalty:
+  desc: null
+  value: 1.0
+no_repeat_ngram_size:
+  desc: null
+  value: 0
+encoder_no_repeat_ngram_size:
+  desc: null
+  value: 0
+bad_words_ids:
+  desc: null
+  value: null
+num_return_sequences:
+  desc: null
+  value: 1
+output_scores:
+  desc: null
+  value: false
+return_dict_in_generate:
+  desc: null
+  value: false
+forced_bos_token_id:
+  desc: null
+  value: null
+forced_eos_token_id:
+  desc: null
+  value: null
+remove_invalid_values:
+  desc: null
+  value: false
+exponential_decay_length_penalty:
+  desc: null
+  value: null
+suppress_tokens:
+  desc: null
+  value: null
+begin_suppress_tokens:
+  desc: null
+  value:
+  - 220
+  - 50257
+architectures:
+  desc: null
+  value:
+  - WhisperForConditionalGeneration
+finetuning_task:
+  desc: null
+  value: null
+id2label:
+  desc: null
+  value:
+    '0': LABEL_0
+    '1': LABEL_1
+label2id:
+  desc: null
+  value:
+    LABEL_0: 0
+    LABEL_1: 1
+tokenizer_class:
+  desc: null
+  value: null
+prefix:
+  desc: null
+  value: null
+bos_token_id:
+  desc: null
+  value: 50257
+pad_token_id:
+  desc: null
+  value: 50256
+eos_token_id:
+  desc: null
+  value: 50257
+sep_token_id:
+  desc: null
+  value: null
+decoder_start_token_id:
+  desc: null
+  value: 50258
+task_specific_params:
+  desc: null
+  value: null
+problem_type:
+  desc: null
+  value: null
+_name_or_path:
+  desc: null
+  value: distil-whisper/distil-large-v3
+transformers_version:
+  desc: null
+  value: 4.40.0.dev0
+model_type:
+  desc: null
+  value: whisper
+forced_decoder_ids:
+  desc: null
+  value: null
+output_dir:
+  desc: null
+  value: ./
+overwrite_output_dir:
+  desc: null
+  value: true
+do_train:
+  desc: null
+  value: true
+do_eval:
+  desc: null
+  value: true
+do_predict:
+  desc: null
+  value: false
+evaluation_strategy:
+  desc: null
+  value: steps
+prediction_loss_only:
+  desc: null
+  value: false
+per_device_train_batch_size:
+  desc: null
+  value: 32
+per_device_eval_batch_size:
+  desc: null
+  value: 32
+per_gpu_train_batch_size:
+  desc: null
+  value: null
+per_gpu_eval_batch_size:
+  desc: null
+  value: null
+gradient_accumulation_steps:
+  desc: null
+  value: 1
+eval_accumulation_steps:
+  desc: null
+  value: null
+eval_delay:
+  desc: null
+  value: 0
+learning_rate:
+  desc: null
+  value: 0.0001
+weight_decay:
+  desc: null
+  value: 0.0
+adam_beta1:
+  desc: null
+  value: 0.9
+adam_beta2:
+  desc: null
+  value: 0.999
+adam_epsilon:
+  desc: null
+  value: 1.0e-08
+max_grad_norm:
+  desc: null
+  value: 1.0
+num_train_epochs:
+  desc: null
+  value: 3.0
+max_steps:
+  desc: null
+  value: 5000
+lr_scheduler_type:
+  desc: null
+  value: linear
+lr_scheduler_kwargs:
+  desc: null
+  value: {}
+warmup_ratio:
+  desc: null
+  value: 0.0
+warmup_steps:
+  desc: null
+  value: 500
+log_level:
+  desc: null
+  value: passive
+log_level_replica:
+  desc: null
+  value: warning
+log_on_each_node:
+  desc: null
+  value: true
+logging_dir:
+  desc: null
+  value: ./runs/Mar27_19-04-58_hf-dgx-01
+logging_strategy:
+  desc: null
+  value: steps
+logging_first_step:
+  desc: null
+  value: false
+logging_steps:
+  desc: null
+  value: 25
+logging_nan_inf_filter:
+  desc: null
+  value: true
+save_strategy:
+  desc: null
+  value: steps
+save_steps:
+  desc: null
+  value: 1000
+save_total_limit:
+  desc: null
+  value: 1
+save_safetensors:
+  desc: null
+  value: true
+save_on_each_node:
+  desc: null
+  value: false
+save_only_model:
+  desc: null
+  value: false
+no_cuda:
+  desc: null
+  value: false
+use_cpu:
+  desc: null
+  value: false
+use_mps_device:
+  desc: null
+  value: false
+seed:
+  desc: null
+  value: 42
+data_seed:
+  desc: null
+  value: null
+jit_mode_eval:
+  desc: null
+  value: false
+use_ipex:
+  desc: null
+  value: false
+bf16:
+  desc: null
+  value: false
+fp16:
+  desc: null
+  value: true
+fp16_opt_level:
+  desc: null
+  value: O1
+half_precision_backend:
+  desc: null
+  value: auto
+bf16_full_eval:
+  desc: null
+  value: false
+fp16_full_eval:
+  desc: null
+  value: false
+tf32:
+  desc: null
+  value: null
+local_rank:
+  desc: null
+  value: 0
+ddp_backend:
+  desc: null
+  value: null
+tpu_num_cores:
+  desc: null
+  value: null
+tpu_metrics_debug:
+  desc: null
+  value: false
+debug:
+  desc: null
+  value: []
+dataloader_drop_last:
+  desc: null
+  value: false
+eval_steps:
+  desc: null
+  value: 1000
+dataloader_num_workers:
+  desc: null
+  value: 4
+dataloader_prefetch_factor:
+  desc: null
+  value: null
+past_index:
+  desc: null
+  value: -1
+run_name:
+  desc: null
+  value: ./
+disable_tqdm:
+  desc: null
+  value: false
+remove_unused_columns:
+  desc: null
+  value: true
+label_names:
+  desc: null
+  value: null
+load_best_model_at_end:
+  desc: null
+  value: false
+metric_for_best_model:
+  desc: null
+  value: null
+greater_is_better:
+  desc: null
+  value: null
+ignore_data_skip:
+  desc: null
+  value: false
+fsdp:
+  desc: null
+  value: []
+fsdp_min_num_params:
+  desc: null
+  value: 0
+fsdp_config:
+  desc: null
+  value:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_v2: false
+    xla_fsdp_grad_ckpt: false
+fsdp_transformer_layer_cls_to_wrap:
+  desc: null
+  value: null
+accelerator_config:
+  desc: null
+  value:
+    split_batches: false
+    dispatch_batches: null
+    even_batches: true
+    use_seedable_sampler: true
+deepspeed:
+  desc: null
+  value: null
+label_smoothing_factor:
+  desc: null
+  value: 0.0
+optim:
+  desc: null
+  value: adamw_torch
+optim_args:
+  desc: null
+  value: null
+adafactor:
+  desc: null
+  value: false
+group_by_length:
+  desc: null
+  value: false
+length_column_name:
+  desc: null
+  value: input_length
+report_to:
+  desc: null
+  value:
+  - tensorboard
+  - wandb
+ddp_find_unused_parameters:
+  desc: null
+  value: null
+ddp_bucket_cap_mb:
+  desc: null
+  value: null
+ddp_broadcast_buffers:
+  desc: null
+  value: null
+dataloader_pin_memory:
+  desc: null
+  value: true
+dataloader_persistent_workers:
+  desc: null
+  value: false
+skip_memory_metrics:
+  desc: null
+  value: true
+use_legacy_prediction_loop:
+  desc: null
+  value: false
+push_to_hub:
+  desc: null
+  value: true
+resume_from_checkpoint:
+  desc: null
+  value: null
+hub_model_id:
+  desc: null
+  value: null
+hub_strategy:
+  desc: null
+  value: every_save
+hub_token:
+  desc: null
+  value: <HUB_TOKEN>
+hub_private_repo:
+  desc: null
+  value: false
+hub_always_push:
+  desc: null
+  value: false
+gradient_checkpointing:
+  desc: null
+  value: true
+gradient_checkpointing_kwargs:
+  desc: null
+  value: null
+include_inputs_for_metrics:
+  desc: null
+  value: false
+fp16_backend:
+  desc: null
+  value: auto
+push_to_hub_model_id:
+  desc: null
+  value: null
+push_to_hub_organization:
+  desc: null
+  value: null
+push_to_hub_token:
+  desc: null
+  value: <PUSH_TO_HUB_TOKEN>
+mp_parameters:
+  desc: null
+  value: ''
+auto_find_batch_size:
+  desc: null
+  value: false
+full_determinism:
+  desc: null
+  value: false
+torchdynamo:
+  desc: null
+  value: null
+ray_scope:
+  desc: null
+  value: last
+ddp_timeout:
+  desc: null
+  value: 1800
+torch_compile:
+  desc: null
+  value: false
+torch_compile_backend:
+  desc: null
+  value: null
+torch_compile_mode:
+  desc: null
+  value: null
+dispatch_batches:
+  desc: null
+  value: null
+split_batches:
+  desc: null
+  value: null
+include_tokens_per_second:
+  desc: null
+  value: false
+include_num_input_tokens_seen:
+  desc: null
+  value: false
+neftune_noise_alpha:
+  desc: null
+  value: null
+optim_target_modules:
+  desc: null
+  value: null
+sortish_sampler:
+  desc: null
+  value: false
+predict_with_generate:
+  desc: null
+  value: true
+generation_max_length:
+  desc: null
+  value: 225
+generation_num_beams:
+  desc: null
+  value: null
+generation_config:
+  desc: null
+  value: null

wandb/run-20240327_190513-7p2x8a0l/files/output.log ADDED Viewed

	@@ -0,0 +1,1033 @@

+  0%|                                                                                          | 0/5000 [00:00<?, ?it/s]/home/sanchit/hf/lib/python3.8/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/home/sanchit/hf/lib/python3.8/site-packages/torch/utils/checkpoint.py:90: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
+  warnings.warn(
+[WARNING|logging.py:329] 2024-03-27 19:05:28,423 >> `use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`...
+  0%|▍                                                                              | 25/5000 [00:55<2:17:36,  1.66s/it]
+  1%|▊                                                                              | 49/5000 [01:35<2:16:42,  1.66s/it]
+  2%|█▏                                                                             | 75/5000 [02:18<2:16:00,  1.66s/it]
+  2%|█▌                                                                            | 100/5000 [02:59<2:12:59,  1.63s/it]
+  2%|█▉                                                                            | 124/5000 [03:39<2:14:58,  1.66s/it]
+  3%|██▎                                                                           | 149/5000 [04:21<2:14:36,  1.66s/it]
+  4%|██▋                                                                           | 175/5000 [05:04<2:09:58,  1.62s/it]
+  4%|███                                                                           | 200/5000 [05:45<2:12:57,  1.66s/it]
+  4%|███▌                                                                          | 225/5000 [06:40<4:28:32,  3.37s/it]
+  5%|███▉                                                                          | 250/5000 [07:21<2:11:07,  1.66s/it]
+  6%|████▎                                                                         | 275/5000 [08:03<2:10:40,  1.66s/it]
+  6%|████▋                                                                         | 300/5000 [08:44<2:09:22,  1.65s/it]
+  6%|█████                                                                         | 325/5000 [09:26<2:09:34,  1.66s/it]
+  7%|█████▍                                                                        | 350/5000 [10:08<2:09:00,  1.66s/it]
+  8%|█████▊                                                                        | 375/5000 [10:49<2:08:16,  1.66s/it]
+  8%|██████▏                                                                       | 399/5000 [11:29<2:07:37,  1.66s/it]
+  8%|██████▋                                                                       | 425/5000 [12:12<2:07:02,  1.67s/it]
+  9%|███████                                                                       | 450/5000 [13:07<2:57:10,  2.34s/it]
+ 10%|███████▍                                                                      | 475/5000 [13:48<2:05:43,  1.67s/it]
+ 10%|███████▊                                                                      | 500/5000 [14:30<2:05:03,  1.67s/it]
+ 10%|████████▏                                                                     | 524/5000 [15:10<2:04:10,  1.66s/it]
+ 11%|████████▌                                                                     | 549/5000 [15:51<2:03:08,  1.66s/it]
+ 12%|████████▉                                                                     | 575/5000 [16:35<2:02:47,  1.66s/it]
+ 12%|█████████▎                                                                    | 600/5000 [17:16<2:02:00,  1.66s/it]
+ 12%|█████████▊                                                                    | 625/5000 [17:58<2:01:53,  1.67s/it]
+ 13%|██████████                                                                    | 649/5000 [18:38<2:01:13,  1.67s/it]
+ 14%|██████████▌                                                                   | 675/5000 [19:33<2:15:32,  1.88s/it]
+ 14%|██████████▉                                                                   | 700/5000 [20:15<2:52:11,  2.40s/it]
+ 14%|███████████▎                                                                  | 725/5000 [21:00<1:59:02,  1.67s/it]
+ 15%|███████████▋                                                                  | 749/5000 [21:40<1:58:00,  1.67s/it]
+ 16%|████████████                                                                  | 775/5000 [22:23<1:57:00,  1.66s/it]
+ 16%|████████████▍                                                                 | 800/5000 [23:05<1:56:25,  1.66s/it]
+ 16%|████████████▊                                                                 | 824/5000 [23:45<1:55:40,  1.66s/it]
+ 17%|█████████████▏                                                                | 849/5000 [24:26<1:55:06,  1.66s/it]
+ 18%|█████████████▋                                                                | 875/5000 [25:10<1:54:31,  1.67s/it]
+ 18%|██████████████                                                                | 900/5000 [26:03<1:59:10,  1.74s/it]
+ 18%|██████████████▍                                                               | 924/5000 [26:43<1:52:44,  1.66s/it]
+ 19%|██████████████▊                                                               | 950/5000 [27:26<1:52:20,  1.66s/it]
+ 20%|███████████████▏                                                              | 975/5000 [28:08<1:51:49,  1.67s/it]
+ 20%|███████████████▍                                                             | 1000/5000 [28:49<1:50:58,  1.66s/it][INFO|trainer.py:768] 2024-03-27 19:34:04,504 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3544] 2024-03-27 19:34:04,506 >> ***** Running Evaluation *****
+[INFO|trainer.py:3546] 2024-03-27 19:34:04,507 >>   Num examples = 3123
+[INFO|trainer.py:3549] 2024-03-27 19:34:04,507 >>   Batch size = 32
+{'loss': 0.1035, 'grad_norm': 1.2479132413864136, 'learning_rate': 8.900000000000001e-05, 'epoch': 4.5}
+[INFO|generation_whisper.py:1111] 2024-03-27 19:34:16,924 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+                                                                                                                        [INFO|generation_whisper.py:1111] 2024-03-27 19:34:32,221 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  0%|                                                                                            | 0/98 [00:00<?, ?it/s][INFO|generation_whisper.py:1111] 2024-03-27 19:34:46,567 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  2%|█▋                                                                                  | 2/98 [00:14<11:28,  7.17s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:35:00,902 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  3%|██▌                                                                                 | 3/98 [00:28<16:04, 10.16s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:35:15,181 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  4%|███▍                                                                                | 4/98 [00:42<18:19, 11.69s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:35:29,903 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  5%|████▎                                                                               | 5/98 [00:57<19:45, 12.75s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:35:43,657 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  6%|█████▏                                                                              | 6/98 [01:11<20:03, 13.08s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:35:57,833 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  7%|██████                                                                              | 7/98 [01:25<20:22, 13.43s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:36:12,133 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  8%|██████▊                                                                             | 8/98 [01:39<20:33, 13.71s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:36:26,263 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+  9%|███████▋                                                                            | 9/98 [01:54<20:31, 13.84s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:36:40,303 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 10%|████████▍                                                                          | 10/98 [02:08<20:23, 13.90s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:36:54,524 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 11%|█████████▎                                                                         | 11/98 [02:22<20:17, 14.00s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:37:08,502 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 12%|██████████▏                                                                        | 12/98 [02:36<20:03, 13.99s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:37:22,202 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 13%|███████████                                                                        | 13/98 [02:49<19:41, 13.90s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:37:36,108 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 14%|███████████▊                                                                       | 14/98 [03:03<19:27, 13.90s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:37:50,111 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 15%|████████████▋                                                                      | 15/98 [03:17<19:16, 13.93s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:38:03,743 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 16%|█████████████▌                                                                     | 16/98 [03:31<18:55, 13.84s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:38:17,662 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 17%|██████████████▍                                                                    | 17/98 [03:45<18:43, 13.87s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:38:31,417 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 18%|███████████████▏                                                                   | 18/98 [03:59<18:26, 13.83s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:38:45,386 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 19%|████████████████                                                                   | 19/98 [04:13<18:15, 13.87s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:38:59,266 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 20%|████████████████▉                                                                  | 20/98 [04:27<18:02, 13.88s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:39:13,359 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 21%|█████████████████▊                                                                 | 21/98 [04:41<17:53, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:39:27,370 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 22%|██████████████████▋                                                                | 22/98 [04:55<17:41, 13.96s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:39:41,500 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 23%|███████████████████▍                                                               | 23/98 [05:09<17:30, 14.01s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:39:55,547 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 24%|████████████████████▎                                                              | 24/98 [05:23<17:17, 14.02s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:40:09,901 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 26%|█████████████████████▏                                                             | 25/98 [05:37<17:10, 14.12s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:40:24,028 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 27%|██████████████████████                                                             | 26/98 [05:51<16:56, 14.12s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:40:38,068 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 28%|██████████████████████▊                                                            | 27/98 [06:05<16:41, 14.10s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:40:52,103 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 29%|███████████████████████▋                                                           | 28/98 [06:19<16:25, 14.08s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:41:06,018 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 30%|████████████████████████▌                                                          | 29/98 [06:33<16:08, 14.03s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:41:20,035 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 31%|█████████████████████████▍                                                         | 30/98 [06:47<15:53, 14.03s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:41:33,986 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 32%|██████████████████████████▎                                                        | 31/98 [07:01<15:38, 14.00s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:41:47,777 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 33%|███████████████████████████                                                        | 32/98 [07:15<15:20, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:42:01,578 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 34%|███████████████████████████▉                                                       | 33/98 [07:29<15:03, 13.90s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:42:15,086 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 35%|████████████████████████████▊                                                      | 34/98 [07:42<14:41, 13.78s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:42:29,489 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 36%|█████████████████████████████▋                                                     | 35/98 [07:57<14:40, 13.97s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:42:43,167 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 37%|██████████████████████████████▍                                                    | 36/98 [08:10<14:20, 13.88s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:42:57,438 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 38%|███████████████████████████████▎                                                   | 37/98 [08:25<14:13, 14.00s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:43:11,313 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 39%|████████████████████████████████▏                                                  | 38/98 [08:39<13:57, 13.96s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:43:25,386 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 40%|█████████████████████████████████                                                  | 39/98 [08:53<13:45, 13.99s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:43:39,191 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 41%|█████████████████████████████████▉                                                 | 40/98 [09:06<13:28, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:43:53,137 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 42%|██████████████████████████████████▋                                                | 41/98 [09:20<13:14, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:44:06,953 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 43%|███████████████████████████████████▌                                               | 42/98 [09:34<12:58, 13.90s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:44:20,954 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 44%|████████████████████████████████████▍                                              | 43/98 [09:48<12:46, 13.93s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:44:34,912 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 45%|█████████████████████████████████████▎                                             | 44/98 [10:02<12:32, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:44:48,785 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 46%|██████████████████████████████████████                                             | 45/98 [10:16<12:17, 13.92s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:45:02,804 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 47%|██████████████████████████████████████▉                                            | 46/98 [10:30<12:05, 13.95s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:45:17,142 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 48%|███████████████████████████████████████▊                                           | 47/98 [10:44<11:57, 14.07s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:45:31,387 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 49%|████████████████████████████████████████▋                                          | 48/98 [10:59<11:45, 14.12s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:45:45,556 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 50%|█████████████████████████████████████████▌                                         | 49/98 [11:13<11:32, 14.13s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:45:57,797 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 51%|██████████████████████████████████████████▎                                        | 50/98 [11:25<10:51, 13.57s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:46:03,379 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 52%|███████████████████████████████████████████▏                                       | 51/98 [11:31<08:45, 11.17s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:46:09,863 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 53%|████████████████████████████████████████████                                       | 52/98 [11:37<07:29,  9.76s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:46:24,315 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 54%|████████████████████████████████████████████▉                                      | 53/98 [11:52<08:22, 11.17s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:46:38,546 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 55%|█████████████████████████████████████████████▋                                     | 54/98 [12:06<08:51, 12.09s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:46:52,468 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 56%|██████████████████████████████████████████████▌                                    | 55/98 [12:20<09:03, 12.64s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:47:06,484 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 57%|███████████████████████████████████████████████▍                                   | 56/98 [12:34<09:08, 13.05s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:47:20,369 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 58%|████████████████████████████████████████████████▎                                  | 57/98 [12:48<09:05, 13.30s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:47:34,273 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 59%|█████████████████████████████████████████████████                                  | 58/98 [13:02<08:59, 13.48s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:47:48,252 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 60%|█████████████████████████████████████████████████▉                                 | 59/98 [13:16<08:51, 13.63s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:48:02,183 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 61%|██████████████████████████████████████████████████▊                                | 60/98 [13:29<08:41, 13.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:48:15,984 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 62%|███████████████████████████████████████████████████▋                               | 61/98 [13:43<08:28, 13.75s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:48:29,807 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 63%|████████████████████████████████████████████████████▌                              | 62/98 [13:57<08:15, 13.77s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:48:43,858 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 64%|█████████████████████████████████████████████████████▎                             | 63/98 [14:11<08:04, 13.85s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:48:58,084 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 65%|██████████████████████████████████████████████████████▏                            | 64/98 [14:25<07:54, 13.97s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:49:11,939 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 66%|███████████████████████████████████████████████████████                            | 65/98 [14:39<07:39, 13.93s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:49:26,109 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 67%|███████████████████████████████████████████████████████▉                           | 66/98 [14:53<07:28, 14.00s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:49:40,079 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 68%|████████████████████████████████████████████████████████▋                          | 67/98 [15:07<07:13, 13.99s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:49:53,707 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 69%|█████████████████████████████████████████████████████████▌                         | 68/98 [15:21<06:56, 13.88s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:50:07,852 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 70%|██████████████████████████████████████████████████████████▍                        | 69/98 [15:35<06:44, 13.96s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:50:21,891 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 71%|███████████████████████████████████████████████████████████▎                       | 70/98 [15:49<06:31, 13.99s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:50:35,042 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 72%|████████████████████████████████████████████████████████████▏                      | 71/98 [16:02<06:10, 13.73s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:50:49,446 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 73%|████████████████████████████████████████████████████████████▉                      | 72/98 [16:17<06:02, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:51:03,382 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 74%|█████████████████████████████████████████████████████████████���                     | 73/98 [16:31<05:48, 13.94s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:51:17,823 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 76%|██████████████████████████████████████████████████████████████▋                    | 74/98 [16:45<05:38, 14.09s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:51:31,733 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 77%|███████████████████████████████████████████████████████████████▌                   | 75/98 [16:59<05:22, 14.03s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:51:45,415 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 78%|████████████████████████████████████████████████████████████████▎                  | 76/98 [17:13<05:06, 13.93s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:51:59,435 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 79%|█████████████████████████████████████████████████████████████████▏                 | 77/98 [17:27<04:53, 13.96s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:52:12,864 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 80%|██████████████████████████████████████████████████████████████████                 | 78/98 [17:40<04:35, 13.80s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:52:27,026 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 81%|██████████████████████████████████████████████████████████████████▉                | 79/98 [17:54<04:24, 13.91s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:52:40,949 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 82%|███████████████████████████████████████████████████████████████████▊               | 80/98 [18:08<04:10, 13.91s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:52:54,527 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 83%|████████████████████████████████████████████████████████████████████▌              | 81/98 [18:22<03:54, 13.81s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:53:08,717 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 84%|█████████████████████████████████████████████████████████████████████▍             | 82/98 [18:36<03:42, 13.93s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:53:22,833 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 85%|██████████████████████████████████████████████████████████████████████▎            | 83/98 [18:50<03:29, 13.98s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:53:36,626 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 86%|███████████████████████████████████████████████████████████████████████▏           | 84/98 [19:04<03:14, 13.93s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:53:50,526 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 87%|███████████████████████████████████████████████████████████████████████▉           | 85/98 [19:18<03:00, 13.92s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:54:03,498 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 88%|████████████████████████████████████████████████████████████████████████▊          | 86/98 [19:31<02:43, 13.63s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:54:17,511 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 89%|█████████████████████████████████████████████████████████████████████████▋         | 87/98 [19:45<02:31, 13.75s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:54:31,136 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 90%|██████████████████████████████████████████████████████████████████████████▌        | 88/98 [19:58<02:17, 13.71s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:54:44,966 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 91%|███████████████████████████████████████████████████████████████████████████▍       | 89/98 [20:12<02:03, 13.75s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:54:58,947 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 93%|█████████████████████████████████████████████████████████████████████████████      | 91/98 [20:40<01:36, 13.71s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:55:12,423 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 94%|█████████████████████████████████████████████████████████████████████████████▉     | 92/98 [20:53<01:22, 13.73s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:55:26,178 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 95%|██████████████████████████████████████████████████████████████████████████████▊    | 93/98 [21:07<01:08, 13.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:55:39,889 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 96%|███████████████████████████████████████████████████████████████████████████████▌   | 94/98 [21:21<00:54, 13.67s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:55:53,445 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 97%|████████████████████████████████████████████████████████████████████████████████▍  | 95/98 [21:35<00:41, 13.72s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:56:07,282 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 98%|█████████████████████████████████████████████████████████████████████████████████▎ | 96/98 [21:49<00:27, 13.80s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:56:21,266 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+ 99%|██████████████████████████████████████████████████████████████████████████████████▏| 97/98 [22:03<00:13, 13.87s/it][INFO|generation_whisper.py:1111] 2024-03-27 19:56:35,449 >> You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
+[WARNING|configuration_utils.py:447] 2024-03-27 19:56:44,906 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.of task=transcribe.
+[WARNING|configuration_utils.py:447] 2024-03-27 19:56:44,906 >> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41.of task=transcribe.
+Non-default generation parameters: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}
+[INFO|configuration_utils.py:471] 2024-03-27 19:56:44,906 >> Configuration saved in ./checkpoint-1000/config.json
+[INFO|configuration_utils.py:697] 2024-03-27 19:56:44,907 >> Configuration saved in ./checkpoint-1000/generation_config.json
+{'eval_loss': 0.30151915550231934, 'eval_wer': 0.3249633006258209, 'eval_runtime': 1360.3971, 'eval_samples_per_second': 2.296, 'eval_steps_per_second': 0.072, 'epoch': 4.5}
+[INFO|modeling_utils.py:2475] 2024-03-27 19:56:52,939 >> Model weights saved in ./checkpoint-1000/model.safetensors
+[INFO|feature_extraction_utils.py:424] 2024-03-27 19:56:52,940 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json
+[INFO|feature_extraction_utils.py:424] 2024-03-27 19:57:03,422 >> Feature extractor saved in ./preprocessor_config.json
+/home/sanchit/hf/lib/python3.8/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
+  warnings.warn(
+/home/sanchit/hf/lib/python3.8/site-packages/torch/utils/checkpoint.py:90: UserWarning: None of the inputs have requires_grad=True. Gradients will be None
+  warnings.warn(
+ 20%|██████████████▊                                                           | 1002/5000 [51:52<323:26:46, 291.25s/it]
+ 20%|██████████████▊                                                           | 1003/5000 [51:53<226:54:23, 204.37s/it]
+ 20%|██████████████▊                                                           | 1004/5000 [51:55<159:21:05, 143.56s/it]
+ 20%|██████████████▊                                                           | 1005/5000 [51:57<112:04:19, 100.99s/it]
+ 20%|███████████████▎                                                            | 1007/5000 [52:00<55:49:31, 50.33s/it]
+ 20%|███████████████▎                                                            | 1008/5000 [52:02<39:37:18, 35.73s/it]
+ 20%|███████████████▎                                                            | 1009/5000 [52:03<28:16:59, 25.51s/it]
+ 20%|███████████████▎                                                            | 1010/5000 [52:05<20:20:35, 18.35s/it]
+ 20%|███████████████▎                                                            | 1011/5000 [52:07<14:47:29, 13.35s/it]
+ 20%|███████████████▌                                                             | 1013/5000 [52:10<8:11:11,  7.39s/it]
+ 20%|███████████████▌                                                             | 1014/5000 [52:12<6:16:40,  5.67s/it]
+ 20%|███████████████▋                                                             | 1015/5000 [52:13<4:56:55,  4.47s/it]
+ 20%|███████████████▋                                                             | 1016/5000 [52:15<4:00:59,  3.63s/it]
+ 20%|███████████████▋                                                             | 1017/5000 [52:17<3:21:51,  3.04s/it]
+ 20%|███████████████▋                                                             | 1019/5000 [52:20<2:35:03,  2.34s/it]
+ 20%|███████████████▋                                                             | 1020/5000 [52:21<2:21:35,  2.13s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+{'loss': 0.1114, 'grad_norm': 1.8175023794174194, 'learning_rate': 8.844444444444445e-05, 'epoch': 4.62}
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|█████████���█████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+ 20%|███████████████▋                                                             | 1021/5000 [52:23<2:11:25,  1.98s/it]
+{'loss': 0.1059, 'grad_norm': 1.7170511484146118, 'learning_rate': 8.78888888888889e-05, 'epoch': 4.73}

wandb/run-20240327_190513-7p2x8a0l/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,246 @@

+absl-py==2.1.0
+accelerate==0.27.2
+aiohttp==3.9.3
+aiosignal==1.3.1
+anyio==4.2.0
+appdirs==1.4.4
+argon2-cffi-bindings==21.2.0
+argon2-cffi==23.1.0
+arrow==1.3.0
+asttokens==2.4.1
+astunparse==1.6.3
+async-lru==2.0.4
+async-timeout==4.0.3
+attrs==23.2.0
+audioread==3.0.1
+av==11.0.0
+babel==2.14.0
+backcall==0.2.0
+beautifulsoup4==4.12.3
+bitsandbytes==0.42.0
+bleach==6.1.0
+cached-property==1.5.2
+cachetools==5.3.2
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+chex==0.1.7
+click==8.1.7
+coloredlogs==15.0.1
+comm==0.2.1
+contourpy==1.1.1
+ctranslate2==4.1.0
+cycler==0.12.1
+datasets==2.18.0
+debugpy==1.8.0
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.7
+dm-tree==0.1.8
+docker-pycreds==0.4.0
+docstring-parser==0.15
+einops==0.7.0
+etils==1.3.0
+evaluate==0.4.1
+exceptiongroup==1.2.0
+executing==2.0.1
+fastjsonschema==2.19.1
+filelock==3.13.1
+flash-attn==2.5.3
+flatbuffers==23.5.26
+flax==0.7.2
+fonttools==4.48.1
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.2.0
+gast==0.4.0
+gitdb==4.0.11
+gitpython==3.1.41
+google-auth-oauthlib==1.0.0
+google-auth==2.27.0
+google-pasta==0.2.0
+grpcio==1.60.1
+h11==0.14.0
+h5py==3.10.0
+httpcore==1.0.2
+httpx==0.26.0
+huggingface-hub==0.21.4
+humanfriendly==10.0
+idna==3.6
+importlib-metadata==7.0.1
+importlib-resources==6.1.1
+iniconfig==2.0.0
+ipdb==0.13.13
+ipykernel==6.29.2
+ipython==8.12.3
+isoduration==20.11.0
+jax==0.4.13
+jaxlib==0.4.13
+jedi==0.19.1
+jinja2==3.1.2
+jiwer==3.0.3
+joblib==1.3.2
+json5==0.9.14
+jsonpointer==2.4
+jsonschema-specifications==2023.12.1
+jsonschema==4.21.1
+jupyter-client==8.6.0
+jupyter-core==5.7.1
+jupyter-events==0.9.0
+jupyter-lsp==2.2.2
+jupyter-server-terminals==0.5.2
+jupyter-server==2.12.5
+jupyterlab-pygments==0.3.0
+jupyterlab-server==2.25.2
+jupyterlab==4.1.0
+keras==2.13.1
+kiwisolver==1.4.5
+lazy-loader==0.3
+libclang==16.0.6
+librosa==0.10.1
+llvmlite==0.41.1
+markdown-it-py==3.0.0
+markdown==3.5.2
+markupsafe==2.1.3
+matplotlib-inline==0.1.6
+matplotlib==3.7.4
+mdurl==0.1.2
+mistune==3.0.2
+ml-dtypes==0.2.0
+more-itertools==10.2.0
+mpmath==1.2.1
+msclap==1.3.3
+msgpack==1.0.7
+multidict==6.0.5
+multiprocess==0.70.15
+nbclient==0.9.0
+nbconvert==7.16.0
+nbformat==5.9.2
+nest-asyncio==1.6.0
+networkx==3.0rc1
+ninja==1.11.1.1
+notebook-shim==0.2.3
+numba==0.58.1
+numpy==1.24.3
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+oauthlib==3.2.2
+onnxruntime==1.17.1
+openai-whisper==20231117
+opt-einsum==3.3.0
+optax==0.1.8
+orbax-checkpoint==0.2.3
+overrides==7.7.0
+packaging==23.2
+pandas==2.0.3
+pandocfilters==1.5.1
+parameterized==0.9.0
+parso==0.8.3
+peft==0.8.2
+pexpect==4.9.0
+pickleshare==0.7.5
+pillow==9.3.0
+pip==24.0
+pkg-resources==0.0.0
+pkgutil-resolve-name==1.3.10
+platformdirs==4.2.0
+pluggy==1.4.0
+pooch==1.8.0
+prometheus-client==0.19.0
+prompt-toolkit==3.0.43
+protobuf==4.25.2
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow-hotfix==0.6
+pyarrow==15.0.0
+pyasn1-modules==0.3.0
+pyasn1==0.5.1
+pycparser==2.21
+pygments==2.17.2
+pyparsing==3.1.1
+pytest==7.4.4
+python-dateutil==2.8.2
+python-json-logger==2.0.7
+pytorch-triton==3.0.0+901819d2b6
+pytz==2024.1
+pyyaml==6.0.1
+pyzmq==25.1.2
+rapidfuzz==3.6.1
+referencing==0.33.0
+regex==2023.12.25
+requests-oauthlib==1.3.1
+requests==2.31.0
+responses==0.18.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.0
+rpds-py==0.17.1
+rsa==4.9
+safetensors==0.4.2
+scikit-learn==1.3.2
+scipy==1.10.1
+send2trash==1.8.2
+sentry-sdk==1.40.0
+setproctitle==1.3.3
+setuptools==44.0.0
+shtab==1.7.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.0
+soundfile==0.12.1
+soupsieve==2.5
+soxr==0.3.7
+stack-data==0.6.3
+sympy==1.11.1
+tensorboard-data-server==0.7.2
+tensorboard==2.13.0
+tensorflow-cpu==2.13.1
+tensorflow-estimator==2.13.0
+tensorflow-io-gcs-filesystem==0.34.0
+tensorstore==0.1.45
+termcolor==2.4.0
+terminado==0.18.0
+threadpoolctl==3.2.0
+tiktoken==0.6.0
+tinycss2==1.2.1
+tokenizers==0.15.1
+tomli==2.0.1
+toolz==0.12.1
+torch==2.2.1
+torchaudio==2.2.1
+torchlibrosa==0.1.0
+torchvision==0.17.1
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.1
+transformers==4.39.0.dev0
+triton==2.2.0
+trl==0.7.11
+types-python-dateutil==2.8.19.20240106
+typing-extensions==4.9.0
+tyro==0.7.3
+tzdata==2023.4
+uri-template==1.3.0
+urllib3==2.2.0
+wandb==0.16.2
+wcwidth==0.2.13
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+werkzeug==3.0.1
+wheel==0.42.0
+wrapt==1.16.0
+xxhash==3.4.1
+yarl==1.9.4
+zipp==3.17.0

wandb/run-20240327_190513-7p2x8a0l/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,739 @@

+{
+    "os": "Linux-5.4.0-166-generic-x86_64-with-glibc2.29",
+    "python": "3.8.10",
+    "heartbeatAt": "2024-03-27T18:05:14.699269",
+    "startedAt": "2024-03-27T18:05:13.643873",
+    "docker": null,
+    "cuda": null,
+    "args": [
+        "--model_name_or_path=distil-whisper/distil-large-v3",
+        "--dataset_name=mozilla-foundation/common_voice_16_1",
+        "--dataset_config_name=hi",
+        "--language=hindi",
+        "--train_split_name=train+validation",
+        "--eval_split_name=test",
+        "--max_steps=5000",
+        "--output_dir=./",
+        "--per_device_train_batch_size=32",
+        "--per_device_eval_batch_size=32",
+        "--logging_steps=25",
+        "--learning_rate=1e-4",
+        "--warmup_steps=500",
+        "--evaluation_strategy=steps",
+        "--eval_steps=1000",
+        "--save_strategy=steps",
+        "--save_steps=1000",
+        "--save_total_limit=1",
+        "--generation_max_length=225",
+        "--preprocessing_num_workers=1",
+        "--dataloader_num_workers=4",
+        "--length_column_name=input_length",
+        "--max_duration_in_seconds=30",
+        "--text_column_name=sentence",
+        "--freeze_feature_encoder=False",
+        "--freeze_encoder",
+        "--gradient_checkpointing",
+        "--fp16",
+        "--overwrite_output_dir",
+        "--do_train",
+        "--do_eval",
+        "--predict_with_generate",
+        "--use_auth_token",
+        "--push_to_hub"
+    ],
+    "state": "running",
+    "program": "run_speech_recognition_seq2seq.py",
+    "codePathLocal": "run_speech_recognition_seq2seq.py",
+    "codePath": "run_speech_recognition_seq2seq.py",
+    "git": {
+        "remote": "https://huggingface.co/sanchit-gandhi/distil-large-v3-hi-ft-frozen-encoder",
+        "commit": "e7946df277d73ac75c34c2017b01c6d39e0275cd"
+    },
+    "email": "sanchit@huggingface.co",
+    "root": "/home/sanchit/distil-large-v3-hi-ft-frozen-encoder",
+    "host": "hf-dgx-01",
+    "username": "sanchit",
+    "executable": "/home/sanchit/hf/bin/python",
+    "cpu_count": 64,
+    "cpu_count_logical": 128,
+    "cpu_freq": {
+        "current": 2055.302406250001,
+        "min": 1500.0,
+        "max": 2250.0
+    },
+    "cpu_freq_per_core": [
+        {
+            "current": 1785.051,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1714.027,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1716.844,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2279.976,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3328.296,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2778.772,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1670.514,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1679.17,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.11,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3342.154,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1667.105,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1667.65,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1665.492,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3344.267,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1991.129,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1669.832,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1681.287,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1667.975,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1665.691,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3343.923,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1714.613,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1711.757,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2605.829,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1715.095,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1868.607,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1761.225,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1722.608,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2446.611,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1795.419,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1794.485,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1796.608,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1774.005,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3318.879,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3356.822,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1674.163,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1676.528,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3346.209,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1679.409,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1673.8,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1675.44,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1722.367,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1721.361,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1719.425,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2130.252,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1792.838,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1794.535,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1793.69,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1792.257,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1794.417,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1794.058,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1795.79,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1795.254,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1665.759,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3285.802,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3327.549,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.454,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2291.123,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1845.287,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1737.335,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1736.798,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1665.761,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2172.943,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2109.384,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.332,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2173.511,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2187.364,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2152.119,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3315.314,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3333.811,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1966.133,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1730.353,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2183.755,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1836.471,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3319.659,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.1,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3156.296,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1661.519,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3259.492,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2267.628,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.441,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.329,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1661.974,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1663.269,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3315.801,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1663.948,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1665.171,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3147.217,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1663.184,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2145.9,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2302.183,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1664.105,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1812.149,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1739.416,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1735.942,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1735.725,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1846.358,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3324.686,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3313.397,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1664.908,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1666.332,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3357.779,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2196.764,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1629.818,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1548.482,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1550.408,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1551.062,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1733.314,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2797.062,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1556.743,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2040.048,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1557.951,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1548.578,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2204.931,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2191.05,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2206.083,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2179.071,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2137.078,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2908.115,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3329.978,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2146.791,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1835.104,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3279.409,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2340.495,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 2350.105,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1860.834,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1928.589,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 3299.491,
+            "min": 1500.0,
+            "max": 2250.0
+        },
+        {
+            "current": 1897.289,
+            "min": 1500.0,
+            "max": 2250.0
+        }
+    ],
+    "disk": {
+        "/": {
+            "total": 1757.8785285949707,
+            "used": 1610.3510513305664
+        }
+    },
+    "gpu": "NVIDIA A100-SXM4-80GB",
+    "gpu_count": 5,
+    "gpu_devices": [
+        {
+            "name": "NVIDIA A100-SXM4-80GB",
+            "memory_total": 85899345920
+        },
+        {
+            "name": "NVIDIA A100-SXM4-80GB",
+            "memory_total": 85899345920
+        },
+        {
+            "name": "NVIDIA A100-SXM4-80GB",
+            "memory_total": 85899345920
+        },
+        {
+            "name": "NVIDIA DGX Display",
+            "memory_total": 4294967296
+        },
+        {
+            "name": "NVIDIA A100-SXM4-80GB",
+            "memory_total": 85899345920
+        }
+    ],
+    "memory": {
+        "total": 503.5396919250488
+    }
+}

wandb/run-20240327_190513-7p2x8a0l/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"train/loss": 0.1059, "train/grad_norm": 1.7170511484146118, "train/learning_rate": 8.78888888888889e-05, "train/epoch": 4.73, "train/global_step": 1050, "_timestamp": 1711565906.6150107, "_runtime": 3192.9672026634216, "_step": 42, "eval/loss": 0.30151915550231934, "eval/wer": 0.3249633006258209, "eval/runtime": 1360.3971, "eval/samples_per_second": 2.296, "eval/steps_per_second": 0.072}

wandb/run-20240327_190513-7p2x8a0l/logs/debug-internal.log ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20240327_190513-7p2x8a0l/logs/debug.log ADDED Viewed

	@@ -0,0 +1,28 @@

+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Current SDK version is 0.16.2
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Configure stats pid to 1894903
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/.config/wandb/settings
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Loading settings from /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/settings
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq.py', 'program_abspath': '/home/sanchit/distil-large-v3-hi-ft-frozen-encoder/run_speech_recognition_seq2seq.py', 'program': 'run_speech_recognition_seq2seq.py'}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:_log_setup():526] Logging user logs to /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/run-20240327_190513-7p2x8a0l/logs/debug.log
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:_log_setup():527] Logging internal logs to /home/sanchit/distil-large-v3-hi-ft-frozen-encoder/wandb/run-20240327_190513-7p2x8a0l/logs/debug-internal.log
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():566] calling init triggers
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():573] wandb.init called with sweep_config: {}
+config: {}
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():616] starting backend
+2024-03-27 19:05:13,645 INFO    MainThread:1894903 [wandb_init.py:init():620] setting up manager
+2024-03-27 19:05:13,646 INFO    MainThread:1894903 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-03-27 19:05:13,647 INFO    MainThread:1894903 [wandb_init.py:init():628] backend started and connected
+2024-03-27 19:05:13,651 INFO    MainThread:1894903 [wandb_init.py:init():720] updated telemetry
+2024-03-27 19:05:13,720 INFO    MainThread:1894903 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout
+2024-03-27 19:05:14,027 INFO    MainThread:1894903 [wandb_run.py:_on_init():2254] communicating current version
+2024-03-27 19:05:14,056 INFO    MainThread:1894903 [wandb_run.py:_on_init():2263] got version response upgrade_message: "wandb version 0.16.5 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
+2024-03-27 19:05:14,056 INFO    MainThread:1894903 [wandb_init.py:init():804] starting run threads in backend
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_console_start():2233] atexit reg
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_redirect():2088] redirect: wrap_raw
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_redirect():2153] Wrapping output streams.
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_run.py:_redirect():2178] Redirects installed.
+2024-03-27 19:05:14,727 INFO    MainThread:1894903 [wandb_init.py:init():847] run started, returning control to user process
+2024-03-27 19:05:14,729 INFO    MainThread:1894903 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 2, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distil-whisper/distil-large-v3', 'transformers_version': '4.40.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 0.0001, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 5000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Mar27_19-04-58_hf-dgx-01', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 4, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}

wandb/run-20240327_190513-7p2x8a0l/run-7p2x8a0l.wandb ADDED Viewed

Binary file (865 kB). View file