import os
from dotenv import load_dotenv
from huggingface_hub import snapshot_download 
import numpy as np
import gradio as gr
from transformers import pipeline

load_dotenv()

task_asr=os.getenv('TASK_ASR')
model_id=os.getenv('MODEL_WHISPER1')
model_file=os.getenv('MODEL_WHISPER1_FILE')

def get_model_file():
   cached_file=snapshot_download(
      repo_id=model_id,
      allow_patterns=model_file,
      local_dir=None
    )
   return cached_file


asr = pipeline(task_asr, model=model_id)

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    return asr({"sampling_rate": sr, "raw": y})["text"]

if __name__ == "__main__":
    with gr.Blocks() as demo:
      myspeech = gr.Audio(sources=["microphone"])
      b1 = gr.Button("Click to Transcribe")
      mytranscription = gr.Textbox(
        label="speech transcription",
        autoscroll=True, 
        max_lines=5
      )

      b1.click(
         fn=transcribe, 
         inputs=myspeech, 
         outputs=mytranscription
         )
    
    demo.queue() 
    demo.launch(
        share=False, 
        server_name=os.getenv('GRADIO_SERVER_IP'), 
        server_port=int(os.getenv('GRADIO_SERVER_PORT'))
    )