Rachid Ammari commited on
Commit
84024ab
1 Parent(s): f9b0a05

refactored wav2vec models loading

Browse files
Files changed (1) hide show
  1. app.py +7 -15
app.py CHANGED
@@ -2,9 +2,12 @@ from transformers import pipeline
2
  import gradio as gr
3
  import whisper
4
 
5
- wav2vec_en_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
6
- wav2vec_fr_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french")
7
- wav2vec_es_model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
 
 
 
8
  whisper_model = whisper.load_model("base")
9
 
10
  def transcribe_audio(language=None, mic=None, file=None):
@@ -14,22 +17,11 @@ def transcribe_audio(language=None, mic=None, file=None):
14
  audio = file
15
  else:
16
  return "You must either provide a mic recording or a file"
17
- wav2vec_model = load_models(language)
18
  transcription = wav2vec_model(audio)["text"]
19
  transcription2 = whisper_model.transcribe(audio, language=language)["text"]
20
  return transcription, transcription2
21
 
22
- def load_models(lang):
23
- if lang == 'en':
24
- return wav2vec_en_model
25
- elif lang == 'fr':
26
- return wav2vec_fr_model
27
- elif lang == 'es':
28
- return wav2vec_es_model
29
- else:
30
- # default english
31
- return wav2vec_en_model
32
-
33
  title = "Speech2text comparison (Wav2vec vs Whisper)"
34
  description = """
35
  This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n
 
2
  import gradio as gr
3
  import whisper
4
 
5
+
6
+ wav2vec_models = {
7
+ "en" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h"),
8
+ "fr" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french"),
9
+ "es" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
10
+ }
11
  whisper_model = whisper.load_model("base")
12
 
13
  def transcribe_audio(language=None, mic=None, file=None):
 
17
  audio = file
18
  else:
19
  return "You must either provide a mic recording or a file"
20
+ wav2vec_model = wav2vec_models[language]
21
  transcription = wav2vec_model(audio)["text"]
22
  transcription2 = whisper_model.transcribe(audio, language=language)["text"]
23
  return transcription, transcription2
24
 
 
 
 
 
 
 
 
 
 
 
 
25
  title = "Speech2text comparison (Wav2vec vs Whisper)"
26
  description = """
27
  This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n