Spaces:

RachAmm
/

Wav2vec-vs-Whisper

Runtime error

App Files Files Community

Rachid Ammari commited on Oct 3, 2022

Commit

5621130

•

1 Parent(s): 99a5348

Added spanish model and example

Browse files

Files changed (2) hide show

app.py +9 -2
momiasartesecretodelantiguoegipto-nationalgeographicespana-ivoox73191074.mp3000.mp3 +0 -0

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import whisper
 wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french")
 whisper_model = whisper.load_model("base")
 def transcribe_audio(language=None, mic=None, file=None):
@@ -23,6 +24,8 @@ def load_models(lang):
         return wav2vec_en_model
     elif lang == 'fr':
         return wav2vec_fr_model
     else:
 			# default english
         return wav2vec_en_model
@@ -33,12 +36,16 @@ This Space allows easy comparisons for transcribed texts between Facebook's Wav2
 (Even if Whisper includes a language detection and even an automatic translation, here we have decided to select the language to speed up the transcription and to focus only on the quality of the transcriptions. The default language is english)
 """
 article = "Check out [the OpenAI Whisper model](https://github.com/openai/whisper) and [the Facebook Wav2vec model](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) that this demo is based off of."
-examples = [["en", None, "english_sentence.flac"], ["en", None, "6_Steps_To_Hit_ANY_Goal.mp3000.mp3"], ["fr", None, "2022-a-Droite-un-fauteuil-pour-trois-3034044.mp3000.mp3"], ["fr", None, "podcast-bdl-episode-5-mix-v2.mp3000.mp3"]]
 gr.Interface(
     fn=transcribe_audio,
     inputs=[
-        gr.Radio(label="Language", choices=["en", "fr"], value="en"),
         gr.Audio(source="microphone", type="filepath", optional=True),
         gr.Audio(source="upload", type="filepath", optional=True),
     ],

 wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french")
+wav2vec_es_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
 whisper_model = whisper.load_model("base")
 def transcribe_audio(language=None, mic=None, file=None):
         return wav2vec_en_model
     elif lang == 'fr':
         return wav2vec_fr_model
+    elif lang == 'es':
+        return wav2vec_es_model
     else:
 			# default english
         return wav2vec_en_model
 (Even if Whisper includes a language detection and even an automatic translation, here we have decided to select the language to speed up the transcription and to focus only on the quality of the transcriptions. The default language is english)
 """
 article = "Check out [the OpenAI Whisper model](https://github.com/openai/whisper) and [the Facebook Wav2vec model](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/) that this demo is based off of."
+examples = [["en", None, "english_sentence.flac"],
+            ["en", None, "6_Steps_To_Hit_ANY_Goal.mp3000.mp3"],
+            ["fr", None, "2022-a-Droite-un-fauteuil-pour-trois-3034044.mp3000.mp3"],
+            ["fr", None, "podcast-bdl-episode-5-mix-v2.mp3000.mp3"],
+            ["es", None, "momiasartesecretodelantiguoegipto-nationalgeographicespana-ivoox73191074.mp3000.mp3"]]
 gr.Interface(
     fn=transcribe_audio,
     inputs=[
+        gr.Radio(label="Language", choices=["en", "fr", "es"], value="en"),
         gr.Audio(source="microphone", type="filepath", optional=True),
         gr.Audio(source="upload", type="filepath", optional=True),
     ],

momiasartesecretodelantiguoegipto-nationalgeographicespana-ivoox73191074.mp3000.mp3 ADDED Viewed

Binary file (480 kB). View file