Saving train state of step 50

Files changed (5) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "openai/whisper-large-v3",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,

 {
+  "_name_or_path": "./distil-large-v3-init",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,

distil-whisper/events.out.tfevents.1713339458.mycena-3090.108763.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:90f93557093b773784b3994c6fdde2d1b34296f695318bb742b29bfb082c0a48
+size 88

distil-whisper/events.out.tfevents.1713339486.mycena-3090.109177.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0438dc8d20b30444cd5f1848ec0cf9c3536e9e50be3f530c956772bab72e203
+size 696

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a81674d4973bf8cbf2e1b696063c781f3cf2b595e3a93bf160dbb2ee4e3289f
+size 3025686376

run_distillation.py CHANGED Viewed

@@ -1297,7 +1297,10 @@ def main():
         # we do not want to group tokens when computing the metrics
         label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
-        # wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
         wer_ortho = 100 * chinese_wer(pred_str, label_str)
         # normalize everything and re-compute the WER

         # we do not want to group tokens when computing the metrics
         label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
+        print("pred_str: ", pred_str)
+        print("label_str: ", label_str)
+        print("!!!!!!!!!!!!!!!!!!!!!!!!!")
+        wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
         wer_ortho = 100 * chinese_wer(pred_str, label_str)
         # normalize everything and re-compute the WER