bofenghuang commited on
Commit
4503426
·
1 Parent(s): 79c3205
Files changed (1) hide show
  1. run_demo_multi_models.py +16 -9
run_demo_multi_models.py CHANGED
@@ -31,6 +31,7 @@ logger = logging.getLogger(__name__)
31
  logger.setLevel(logging.DEBUG)
32
 
33
  device = 0 if torch.cuda.is_available() else "cpu"
 
34
 
35
  cached_models = {}
36
 
@@ -38,6 +39,7 @@ def maybe_load_cached_pipeline(model_name):
38
  pipe = cached_models.get(model_name)
39
  if pipe is None:
40
  # load pipeline
 
41
  pipe = pipeline(
42
  task="automatic-speech-recognition",
43
  model=model_name,
@@ -105,11 +107,12 @@ demo = gr.Blocks()
105
  mf_transcribe = gr.Interface(
106
  fn=transcribe,
107
  inputs=[
108
- gr.inputs.Audio(source="microphone", type="filepath", optional=True),
109
- gr.inputs.Audio(source="upload", type="filepath", optional=True),
110
- gr.inputs.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
111
  ],
112
- outputs="text",
 
113
  layout="horizontal",
114
  theme="huggingface",
115
  title="Whisper Demo: Transcribe Audio",
@@ -120,10 +123,14 @@ mf_transcribe = gr.Interface(
120
  yt_transcribe = gr.Interface(
121
  fn=yt_transcribe,
122
  inputs=[
123
- gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
124
- gr.inputs.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
 
 
 
 
 
125
  ],
126
- outputs=["html", "text"],
127
  layout="horizontal",
128
  theme="huggingface",
129
  title="Whisper Demo: Transcribe YouTube",
@@ -134,5 +141,5 @@ yt_transcribe = gr.Interface(
134
  with demo:
135
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
136
 
137
- # demo.launch(server_name="0.0.0.0", debug=True, share=True)
138
- demo.launch(enable_queue=True)
 
31
  logger.setLevel(logging.DEBUG)
32
 
33
  device = 0 if torch.cuda.is_available() else "cpu"
34
+ logger.info(f"Model will be loaded on device {device}")
35
 
36
  cached_models = {}
37
 
 
39
  pipe = cached_models.get(model_name)
40
  if pipe is None:
41
  # load pipeline
42
+ # todo: set decoding option for pipeline
43
  pipe = pipeline(
44
  task="automatic-speech-recognition",
45
  model=model_name,
 
107
  mf_transcribe = gr.Interface(
108
  fn=transcribe,
109
  inputs=[
110
+ gr.Audio(source="microphone", type="filepath", optional=True, label="Record"),
111
+ gr.Audio(source="upload", type="filepath", optional=True, label="Upload File"),
112
+ gr.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
113
  ],
114
+ # outputs="text",
115
+ outputs=gr.Textbox(label="Transcription"),
116
  layout="horizontal",
117
  theme="huggingface",
118
  title="Whisper Demo: Transcribe Audio",
 
123
  yt_transcribe = gr.Interface(
124
  fn=yt_transcribe,
125
  inputs=[
126
+ gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
127
+ gr.Dropdown(choices=MODEL_NAMES, default=DEFAULT_MODEL_NAME, label="Whisper Model"),
128
+ ],
129
+ # outputs=["html", "text"],
130
+ outputs=[
131
+ gr.HTML(label="YouTube Page"),
132
+ gr.Textbox(label="Transcription"),
133
  ],
 
134
  layout="horizontal",
135
  theme="huggingface",
136
  title="Whisper Demo: Transcribe YouTube",
 
141
  with demo:
142
  gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
143
 
144
+ demo.launch(server_name="0.0.0.0", debug=True, share=True)
145
+ # demo.launch(enable_queue=True)