import os from dotenv import load_dotenv from huggingface_hub import snapshot_download import numpy as np import gradio as gr from transformers import pipeline load_dotenv() task_asr=os.getenv('TASK_ASR') model_id=os.getenv('MODEL_WHISPER1') model_file=os.getenv('MODEL_WHISPER1_FILE') def get_model_file(): cached_file=snapshot_download( repo_id=model_id, allow_patterns=model_file, local_dir=None ) return cached_file asr = pipeline(task_asr, model=model_id) def transcribe(audio): sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) return asr({"sampling_rate": sr, "raw": y})["text"] if __name__ == "__main__": with gr.Blocks() as demo: myspeech = gr.Audio(sources=["microphone"]) b1 = gr.Button("Click to Transcribe") mytranscription = gr.Textbox( label="speech transcription", autoscroll=True, max_lines=5 ) b1.click( fn=transcribe, inputs=myspeech, outputs=mytranscription ) demo.queue() demo.launch( share=False, server_name=os.getenv('GRADIO_SERVER_IP'), server_port=int(os.getenv('GRADIO_SERVER_PORT')) )