|
import gradio as gr |
|
from transformers import pipeline |
|
import numpy as np |
|
|
|
transcriber_hindi = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-hindi") |
|
transcriber_bang = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec_v1_bengali") |
|
transcriber_odia = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-odia") |
|
|
|
|
|
def transcribe(audio,lang = "hindi"): |
|
|
|
|
|
sr, y = audio |
|
y = y.astype(np.float32) |
|
y /= np.max(np.abs(y)) |
|
if lang == "hindi": |
|
return transcriber_hindi({"sampling_rate": sr, "raw": y})["text"] |
|
if lang == "bangali": |
|
return transcriber_bang({"sampling_rate": sr, "raw": y})["text"] |
|
if lang == "odia": |
|
return transcriber_odia({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
|
|
demo = gr.Interface(fn=transcribe, inputs = [gr.Audio(source="microphone"), gr.Radio(["hindi","bangali","odia"])] , outputs = "text") |
|
|
|
demo.launch() |