github-actions[bot] commited on
Commit
bdc2933
·
1 Parent(s): b399068

Sync with https://github.com/mozilla-ai/speech-to-text-finetune

Browse files
Files changed (1) hide show
  1. app.py +50 -74
app.py CHANGED
@@ -1,28 +1,22 @@
1
  import os
2
- from pathlib import Path
3
- from typing import Tuple
4
  import gradio as gr
 
5
  from transformers import pipeline, Pipeline
6
- from huggingface_hub import repo_exists
7
-
8
-
9
- from speech_to_text_finetune.config import LANGUAGES_NAME_TO_ID
10
 
11
  is_hf_space = os.getenv("IS_HF_SPACE")
12
- languages = LANGUAGES_NAME_TO_ID.keys()
13
  model_ids = [
14
  "",
15
- "openai/whisper-tiny",
16
- "openai/whisper-small",
17
- "openai/whisper-medium",
18
- "openai/whisper-large-v3",
19
- "openai/whisper-large-v3-turbo",
 
 
20
  ]
21
 
22
 
23
- def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, str]:
24
- if not Path(model_dir).is_dir():
25
- return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
27
  WhisperProcessor,
28
  WhisperTokenizer,
@@ -31,56 +25,53 @@ def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, s
31
  )
32
 
33
  processor = WhisperProcessor.from_pretrained(model_dir)
34
- tokenizer = WhisperTokenizer.from_pretrained(
35
- model_dir, language=language, task="transcribe"
36
- )
37
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
38
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
39
 
40
- return pipeline(
41
- task="automatic-speech-recognition",
42
- model=model,
43
- processor=processor,
44
- tokenizer=tokenizer,
45
- feature_extractor=feature_extractor,
46
- ), f"✅ Local model has been loaded from {model_dir}."
 
 
 
47
 
48
 
49
- def _load_hf_model(model_repo_id: str, language: str) -> Tuple[Pipeline | None, str]:
50
- if not repo_exists(model_repo_id):
51
- return (
52
- None,
53
- f"⚠️ Couldn't find {model_repo_id} on Hugging Face. If its a private repo, make sure you are logged in locally.",
54
  )
55
- return pipeline(
56
- "automatic-speech-recognition",
57
- model=model_repo_id,
58
- generate_kwargs={"language": language},
59
- ), f"✅ HF Model {model_repo_id} has been loaded."
60
 
61
 
62
- def load_model(
63
- language: str, dropdown_model_id: str, hf_model_id: str, local_model_id: str
64
- ) -> Tuple[Pipeline, str]:
 
 
 
 
65
  if dropdown_model_id and not hf_model_id and not local_model_id:
66
- yield None, f"Loading {dropdown_model_id}..."
67
- yield _load_hf_model(dropdown_model_id, language)
68
  elif hf_model_id and not local_model_id and not dropdown_model_id:
69
- yield None, f"Loading {hf_model_id}..."
70
- yield _load_hf_model(hf_model_id, language)
71
  elif local_model_id and not hf_model_id and not dropdown_model_id:
72
- yield None, f"Loading {local_model_id}..."
73
- yield _load_local_model(local_model_id, language)
74
  else:
75
- yield (
76
- None,
77
- "️️⚠️ Please select or fill at least and only one of the options above",
78
  )
79
- if not language:
80
- yield None, "⚠️ Please select a language from the dropdown"
81
-
82
-
83
- def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
84
  text = pipe(audio)["text"]
85
  return text
86
 
@@ -89,18 +80,12 @@ def setup_gradio_demo():
89
  with gr.Blocks() as demo:
90
  gr.Markdown(
91
  """ # 🗣️ Speech-to-Text Transcription
92
- ### 1. Select a language from the dropdown menu.
93
- ### 2. Select which model to load from one of the options below.
94
- ### 3. Load the model by clicking the Load model button.
95
- ### 4. Record a message or upload an audio file.
96
- ### 5. Click Transcribe to see the transcription generated by the model.
97
  """
98
  )
99
- ### Language & Model selection ###
100
-
101
- selected_lang = gr.Dropdown(
102
- choices=list(languages), value=None, label="Select a language"
103
- )
104
 
105
  with gr.Row():
106
  with gr.Column():
@@ -118,9 +103,6 @@ def setup_gradio_demo():
118
  placeholder="artifacts/my-whisper-tiny",
119
  )
120
 
121
- load_model_button = gr.Button("Load model")
122
- model_loaded = gr.Markdown()
123
-
124
  ### Transcription ###
125
  audio_input = gr.Audio(
126
  sources=["microphone", "upload"],
@@ -132,16 +114,10 @@ def setup_gradio_demo():
132
  transcribe_button = gr.Button("Transcribe")
133
  transcribe_output = gr.Text(label="Output")
134
 
135
- ### Event listeners ###
136
- model = gr.State()
137
- load_model_button.click(
138
- fn=load_model,
139
- inputs=[selected_lang, dropdown_model, user_model, local_model],
140
- outputs=[model, model_loaded],
141
- )
142
-
143
  transcribe_button.click(
144
- fn=transcribe, inputs=[model, audio_input], outputs=transcribe_output
 
 
145
  )
146
 
147
  demo.launch()
 
1
  import os
 
 
2
  import gradio as gr
3
+ import spaces
4
  from transformers import pipeline, Pipeline
 
 
 
 
5
 
6
  is_hf_space = os.getenv("IS_HF_SPACE")
 
7
  model_ids = [
8
  "",
9
+ "mozilla-ai/whisper-small-gl (Galician)",
10
+ "mozilla-ai/whisper-small-el (Greek)",
11
+ "openai/whisper-tiny (Multilingual)",
12
+ "openai/whisper-small (Multilingual)",
13
+ "openai/whisper-medium (Multilingual)",
14
+ "openai/whisper-large-v3 (Multilingual)",
15
+ "openai/whisper-large-v3-turbo (Multilingual)",
16
  ]
17
 
18
 
19
+ def _load_local_model(model_dir: str) -> Pipeline:
 
 
20
  from transformers import (
21
  WhisperProcessor,
22
  WhisperTokenizer,
 
25
  )
26
 
27
  processor = WhisperProcessor.from_pretrained(model_dir)
28
+ tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
 
 
29
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
30
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
31
 
32
+ try:
33
+ return pipeline(
34
+ task="automatic-speech-recognition",
35
+ model=model,
36
+ processor=processor,
37
+ tokenizer=tokenizer,
38
+ feature_extractor=feature_extractor,
39
+ )
40
+ except Exception as e:
41
+ return str(e)
42
 
43
 
44
+ def _load_hf_model(model_repo_id: str) -> Pipeline:
45
+ try:
46
+ return pipeline(
47
+ "automatic-speech-recognition",
48
+ model=model_repo_id,
49
  )
50
+ except Exception as e:
51
+ return str(e)
 
 
 
52
 
53
 
54
+ @spaces.GPU(duration=30)
55
+ def transcribe(
56
+ dropdown_model_id: str,
57
+ hf_model_id: str,
58
+ local_model_id: str,
59
+ audio: gr.Audio,
60
+ ) -> str:
61
  if dropdown_model_id and not hf_model_id and not local_model_id:
62
+ dropdown_model_id = dropdown_model_id.split(" (")[0]
63
+ pipe = _load_hf_model(dropdown_model_id)
64
  elif hf_model_id and not local_model_id and not dropdown_model_id:
65
+ pipe = _load_hf_model(hf_model_id)
 
66
  elif local_model_id and not hf_model_id and not dropdown_model_id:
67
+ pipe = _load_local_model(local_model_id)
 
68
  else:
69
+ return (
70
+ "⚠️ Error: Please select or fill at least and only one of the options above"
 
71
  )
72
+ if isinstance(pipe, str):
73
+ # Exception raised when loading
74
+ return f"⚠️ Error: {pipe}"
 
 
75
  text = pipe(audio)["text"]
76
  return text
77
 
 
80
  with gr.Blocks() as demo:
81
  gr.Markdown(
82
  """ # 🗣️ Speech-to-Text Transcription
83
+ ### 1. Select which model to use from one of the options below.
84
+ ### 2. Record a message or upload an audio file.
85
+ ### 3. Click Transcribe to see the transcription generated by the model.
 
 
86
  """
87
  )
88
+ ### Model selection ###
 
 
 
 
89
 
90
  with gr.Row():
91
  with gr.Column():
 
103
  placeholder="artifacts/my-whisper-tiny",
104
  )
105
 
 
 
 
106
  ### Transcription ###
107
  audio_input = gr.Audio(
108
  sources=["microphone", "upload"],
 
114
  transcribe_button = gr.Button("Transcribe")
115
  transcribe_output = gr.Text(label="Output")
116
 
 
 
 
 
 
 
 
 
117
  transcribe_button.click(
118
+ fn=transcribe,
119
+ inputs=[dropdown_model, user_model, local_model, audio_input],
120
+ outputs=transcribe_output,
121
  )
122
 
123
  demo.launch()