pablocst commited on
Commit
598487f
1 Parent(s): 86a86db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -17,8 +17,8 @@ def record_opt(msg):
17
 
18
 
19
  def speech_recognize(audio, model_name, hf_token, opt):
20
- opt += record_opt("转录开始 ...")
21
- yield "转录中,请稍等...", opt
22
  start = time.monotonic()
23
 
24
  with open(audio, "rb") as f:
@@ -32,10 +32,10 @@ def speech_recognize(audio, model_name, hf_token, opt):
32
  print(f">>> text is {text}")
33
  text = text['text']
34
  except:
35
- text = f"转录失败:\n{traceback.format_exc()}"
36
 
37
  cost = time.monotonic() - start
38
- opt += record_opt(f"转录结束,耗时{cost:.3f}s")
39
  yield text, opt
40
 
41
  import gradio as gr
@@ -44,18 +44,18 @@ with gr.Blocks() as demo:
44
  gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
45
  with gr.Row():
46
  gr.Markdown(
47
- """🤗 调用 huggingface API,使用 OpenAI Whisper 模型进行语音识别,也可以称为语音转文本(Speech to Text, STT)
48
 
49
- 👉 目的是练习使用 Gradio Audio 组件和探索使用 Huggingface Inference API
50
 
51
- > 💡提示:需要填写 Huggingface token 来调用 Huggingface Inference API
52
  """
53
  )
54
  with gr.Row():
55
  with gr.Column():
56
  audio = gr.Audio(source="microphone", type="filepath")
57
  model_name = gr.Dropdown(
58
- label="选择模型",
59
  choices=[
60
  "openai/whisper-large-v3",
61
  "openai/whisper-large-v2",
@@ -69,26 +69,26 @@ with gr.Blocks() as demo:
69
  )
70
  hf_token = gr.Textbox(label="Huggingface token")
71
  with gr.Column():
72
- output = gr.Textbox(label="转录结果")
73
- operation = gr.Textbox(label="组件操作历史")
74
  audio.start_recording(
75
- lambda x: x + record_opt("开始录音 ..."),
76
  inputs=operation, outputs=operation
77
  )
78
  audio.play(
79
- lambda x: x + record_opt("播放录音"),
80
  inputs=operation, outputs=operation
81
  )
82
  audio.pause(
83
- lambda x: x + record_opt("暂停播放"),
84
  inputs=operation, outputs=operation
85
  )
86
  audio.stop(
87
- lambda x: x + record_opt("停止播放"),
88
  inputs=operation, outputs=operation
89
  )
90
  audio.end(
91
- lambda x: x + record_opt("播放完毕"),
92
  inputs=operation, outputs=operation
93
  )
94
  audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])
 
17
 
18
 
19
  def speech_recognize(audio, model_name, hf_token, opt):
20
+ opt += record_opt("Transcription starts ...")
21
+ yield "Transcribing, please wait..", opt
22
  start = time.monotonic()
23
 
24
  with open(audio, "rb") as f:
 
32
  print(f">>> text is {text}")
33
  text = text['text']
34
  except:
35
+ text = f"Transcription failed:\n{traceback.format_exc()}"
36
 
37
  cost = time.monotonic() - start
38
+ opt += record_opt(f"Transcription ends, time consuming{cost:.3f}s")
39
  yield text, opt
40
 
41
  import gradio as gr
 
44
  gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
45
  with gr.Row():
46
  gr.Markdown(
47
+ """🤗 Call the huggingface API and use the OpenAI Whisper model for speech recognition, which can also be called speech to text(Speech to Text, STT)
48
 
49
+ 👉 The purpose is to practice using the Gradio Audio component and explore using the Huggingface Inference API
50
 
51
+ > 💡Tip: You need to fill in the Huggingface token to call the Huggingface Inference API
52
  """
53
  )
54
  with gr.Row():
55
  with gr.Column():
56
  audio = gr.Audio(source="microphone", type="filepath")
57
  model_name = gr.Dropdown(
58
+ label="Select model",
59
  choices=[
60
  "openai/whisper-large-v3",
61
  "openai/whisper-large-v2",
 
69
  )
70
  hf_token = gr.Textbox(label="Huggingface token")
71
  with gr.Column():
72
+ output = gr.Textbox(label="Transcription results")
73
+ operation = gr.Textbox(label="Component operation history")
74
  audio.start_recording(
75
+ lambda x: x + record_opt("Start recording ..."),
76
  inputs=operation, outputs=operation
77
  )
78
  audio.play(
79
+ lambda x: x + record_opt("Play recording"),
80
  inputs=operation, outputs=operation
81
  )
82
  audio.pause(
83
+ lambda x: x + record_opt("Pause playback"),
84
  inputs=operation, outputs=operation
85
  )
86
  audio.stop(
87
+ lambda x: x + record_opt("Stop play"),
88
  inputs=operation, outputs=operation
89
  )
90
  audio.end(
91
+ lambda x: x + record_opt("Finished playing"),
92
  inputs=operation, outputs=operation
93
  )
94
  audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])