BenDaouda commited on
Commit
5f34871
1 Parent(s): 314bad3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -1,15 +1,22 @@
 
 
1
  import gradio as gr
2
 
3
- title = "wolof_ASR"
4
- description = "Wolof ASR"
5
 
6
- iface = gr.Interface.load(
7
- "BenDaouda/wav2vec2-large-xls-r-300m-wolof-test-coloab",
8
- inputs=gr.Audio(source="microphone", type="filepath"),
9
- outputs="text",
10
- title=title,
11
- description=description,
12
- enable_queue=True
 
 
 
 
 
13
  )
14
 
15
  iface.launch()
 
1
+ from transformers import AutoModelForCTC, Wav2Vec2Tokenizer
2
+ import torch
3
  import gradio as gr
4
 
5
+ model = Wav2Vec2ForCTC.from_pretrained("BenDaouda/wav2vec2-large-xls-r-300m-wolof-test-coloab")
6
+ processor = Wav2Vec2Processor.from_pretrained("BenDaouda/wav2vec2-large-xls-r-300m-wolof-test-coloab")
7
 
8
+ def transcribe(audio):
9
+ input_values = tokenizer(audio, return_tensors="pt").input_values
10
+ with torch.no_grad():
11
+ logits = model(input_values).logits
12
+ predicted_ids = torch.argmax(logits, dim=-1)
13
+ transcription = tokenizer.batch_decode(predicted_ids)[0]
14
+ return transcription
15
+
16
+ iface = gr.Interface(
17
+ fn=transcribe,
18
+ inputs=gr.inputs.Audio(source="microphone", type="file", resample_to=16000),
19
+ outputs="text"
20
  )
21
 
22
  iface.launch()