import gradio as gr import torch from transformers import pipeline #Load the ASR and Text Translation pipelines asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M") #Creating a function to transcribe speech. First we get the output as a text and then we feed that to the translator pipeline. def transcribe_speech(filepath, target_language): if not filepath: return "No audio found, please retry.", [] # Transcribe the speech output = asr_pipeline(filepath) transcription = output["text"] # Translate the transcribed text text_translated = translator(transcription, src_lang="eng_Latn", tgt_lang=target_language) return transcription, text_translated[0]['translation_text'] # Gradio interfaces for microphone and file upload mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=[ gr.Audio(sources="microphone", type="filepath"), gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng") ], outputs=[ gr.Textbox(label="Transcription", lines=3), gr.Textbox(label="Translation", lines=5) ], allow_flagging="never" ) file_transcribe = gr.Interface( fn=transcribe_speech, inputs=[ gr.Audio(sources="upload", type="filepath"), gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng") ], outputs=[ gr.Textbox(label="Transcription", lines=3), gr.Textbox(label="Translation", lines=5) ], allow_flagging="never" ) # Choices are presented as the model expects. The language codes are provded in "Languages in FLORES-200" in the link below #https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200 # Create the demo with tabbed interfaces demo = gr.Blocks() with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) # Launch the Gradio demo if __name__ == "__main__": demo.launch()