Spaces:
Runtime error
Runtime error
Rachid Ammari
commited on
Commit
•
84024ab
1
Parent(s):
f9b0a05
refactored wav2vec models loading
Browse files
app.py
CHANGED
@@ -2,9 +2,12 @@ from transformers import pipeline
|
|
2 |
import gradio as gr
|
3 |
import whisper
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
8 |
whisper_model = whisper.load_model("base")
|
9 |
|
10 |
def transcribe_audio(language=None, mic=None, file=None):
|
@@ -14,22 +17,11 @@ def transcribe_audio(language=None, mic=None, file=None):
|
|
14 |
audio = file
|
15 |
else:
|
16 |
return "You must either provide a mic recording or a file"
|
17 |
-
wav2vec_model =
|
18 |
transcription = wav2vec_model(audio)["text"]
|
19 |
transcription2 = whisper_model.transcribe(audio, language=language)["text"]
|
20 |
return transcription, transcription2
|
21 |
|
22 |
-
def load_models(lang):
|
23 |
-
if lang == 'en':
|
24 |
-
return wav2vec_en_model
|
25 |
-
elif lang == 'fr':
|
26 |
-
return wav2vec_fr_model
|
27 |
-
elif lang == 'es':
|
28 |
-
return wav2vec_es_model
|
29 |
-
else:
|
30 |
-
# default english
|
31 |
-
return wav2vec_en_model
|
32 |
-
|
33 |
title = "Speech2text comparison (Wav2vec vs Whisper)"
|
34 |
description = """
|
35 |
This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n
|
|
|
2 |
import gradio as gr
|
3 |
import whisper
|
4 |
|
5 |
+
|
6 |
+
wav2vec_models = {
|
7 |
+
"en" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h"),
|
8 |
+
"fr" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french"),
|
9 |
+
"es" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
|
10 |
+
}
|
11 |
whisper_model = whisper.load_model("base")
|
12 |
|
13 |
def transcribe_audio(language=None, mic=None, file=None):
|
|
|
17 |
audio = file
|
18 |
else:
|
19 |
return "You must either provide a mic recording or a file"
|
20 |
+
wav2vec_model = wav2vec_models[language]
|
21 |
transcription = wav2vec_model(audio)["text"]
|
22 |
transcription2 = whisper_model.transcribe(audio, language=language)["text"]
|
23 |
return transcription, transcription2
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
title = "Speech2text comparison (Wav2vec vs Whisper)"
|
26 |
description = """
|
27 |
This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n
|