whisper-german

Runtime error

App Files Files Community

patrickvonplaten commited on Nov 12, 2023

Commit

81a9d24

•

1 Parent(s): 4487a27

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -20

app.py CHANGED Viewed

@@ -13,10 +13,10 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 use_flash_attention_2 = is_flash_attn_2_available()
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    "openai/whisper-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2
 )
 distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    "distil-whisper/distil-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2
 )
 if not use_flash_attention_2:
@@ -24,7 +24,7 @@ if not use_flash_attention_2:
     model = model.to_bettertransformer()
     distilled_model = distilled_model.to_bettertransformer()
-processor = AutoProcessor.from_pretrained("openai/whisper-large-v2")
 model.to(device)
 distilled_model.to(device)
@@ -38,7 +38,7 @@ pipe = pipeline(
     chunk_length_s=30,
     torch_dtype=torch_dtype,
     device=device,
-    generate_kwargs={"language": "en", "task": "transcribe"},
     return_timestamps=True
 )
 pipe_forward = pipe._forward
@@ -52,7 +52,7 @@ distil_pipe = pipeline(
     chunk_length_s=15,
     torch_dtype=torch_dtype,
     device=device,
-    generate_kwargs={"language": "en", "task": "transcribe"},
 )
 distil_pipe_forward = distil_pipe._forward
@@ -110,7 +110,7 @@ if __name__ == "__main__":
                     "
                   >
                     <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
-                      Whisper vs Distil-Whisper: Speed Comparison
                     </h1>
                   </div>
                 </div>
@@ -133,22 +133,11 @@ if __name__ == "__main__":
         audio = gr.components.Audio(type="filepath", label="Audio input")
         button = gr.Button("Transcribe")
         with gr.Row():
-            distil_runtime = gr.components.Textbox(label="Distil-Whisper Transcription Time (s)")
-            runtime = gr.components.Textbox(label="Whisper Transcription Time (s)")
-        with gr.Row():
-            distil_transcription = gr.components.Textbox(label="Distil-Whisper Transcription", show_copy_button=True)
-            transcription = gr.components.Textbox(label="Whisper Transcription", show_copy_button=True)
         button.click(
             fn=transcribe,
             inputs=audio,
-            outputs=[distil_transcription, distil_runtime, transcription, runtime],
-        )
-        gr.Markdown("## Examples")
-        gr.Examples(
-            [["./assets/example_1.wav"], ["./assets/example_2.wav"]],
-            audio,
-            outputs=[distil_transcription, distil_runtime, transcription, runtime],
-            fn=transcribe,
-            cache_examples=False,
         )
     demo.queue(max_size=10).launch()

 use_flash_attention_2 = is_flash_attn_2_available()
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    "openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2
 )
 distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    "primeline/whisper-large-v3-german", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=False, use_flash_attention_2=use_flash_attention_2
 )
 if not use_flash_attention_2:
     model = model.to_bettertransformer()
     distilled_model = distilled_model.to_bettertransformer()
+processor = AutoProcessor.from_pretrained("openai/whisper-large-v3")
 model.to(device)
 distilled_model.to(device)
     chunk_length_s=30,
     torch_dtype=torch_dtype,
     device=device,
+    generate_kwargs={"language": "de", "task": "transcribe"},
     return_timestamps=True
 )
 pipe_forward = pipe._forward
     chunk_length_s=15,
     torch_dtype=torch_dtype,
     device=device,
+    generate_kwargs={"language": "de", "task": "transcribe"},
 )
 distil_pipe_forward = distil_pipe._forward
                     "
                   >
                     <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
+                      Whisper-v3 vs Whisper-German-v3
                     </h1>
                   </div>
                 </div>
         audio = gr.components.Audio(type="filepath", label="Audio input")
         button = gr.Button("Transcribe")
         with gr.Row():
+            distil_transcription = gr.components.Textbox(label="Whisper-v3-German Transcription", show_copy_button=True)
+            transcription = gr.components.Textbox(label="Whisper-v3 Transcription", show_copy_button=True)
         button.click(
             fn=transcribe,
             inputs=audio,
+            outputs=[distil_transcription, transcription],
         )
     demo.queue(max_size=10).launch()