jiuuee commited on
Commit
046c2b1
1 Parent(s): 71c5789

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -2
app.py CHANGED
@@ -1,3 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  '''
2
  import gradio as gr
3
  from transformers import pipeline
@@ -5,7 +26,7 @@ from transformers import pipeline
5
  asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b", device=0)
6
  qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
7
  tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0)
8
- '''
9
 
10
  import gradio as gr
11
  import json
@@ -187,7 +208,7 @@ with gr.Blocks(
187
  demo.queue()
188
  demo.launch()
189
 
190
- '''
191
 
192
 
193
  # Function to capture audio using Canary ASR
 
1
+ import gradio as gr
2
+ from nemo.collections.asr.models import ASRModel
3
+
4
+ # Load the NeMo ASR model
5
+ model = ASRModel.from_pretrained("nvidia/canary-1b")
6
+ model.eval()
7
+
8
+ def transcribe(audio):
9
+ if audio is None:
10
+ raise gr.InterfaceError("Please provide some input audio: either upload an audio file or use the microphone")
11
+
12
+ # Perform speech recognition
13
+ transcription = model.transcribe([audio])
14
+
15
+ return transcription[0]
16
+
17
+ audio_input = gr.components.Audio()
18
+
19
+ iface = gr.Interface(transcribe, audio_input, "text", title="ASR with NeMo Canary Model")
20
+ iface.launch()
21
+
22
  '''
23
  import gradio as gr
24
  from transformers import pipeline
 
26
  asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b", device=0)
27
  qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
28
  tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0)
29
+
30
 
31
  import gradio as gr
32
  import json
 
208
  demo.queue()
209
  demo.launch()
210
 
211
+
212
 
213
 
214
  # Function to capture audio using Canary ASR