import gradio as gr from transformers import pipeline import numpy as np transcriber_hindi = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-hindi") transcriber_bang = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec_v1_bengali") transcriber_odia = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-odia") def transcribe(audio,lang = "hindi"): sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) if lang == "hindi": return transcriber_hindi({"sampling_rate": sr, "raw": y})["text"] if lang == "bangali": return transcriber_bang({"sampling_rate": sr, "raw": y})["text"] if lang == "odia": return transcriber_odia({"sampling_rate": sr, "raw": y})["text"] demo = gr.Interface(fn=transcribe, inputs = [gr.Audio(source="microphone"), gr.Radio(["hindi","bangali","odia"])] , outputs = "text") demo.launch()