from transformers import pipeline
import gradio as gr
import pyewts

converter = pyewts.pyewts()


pipe = pipeline(model="spsither/whisper-small-r2-70k-2ep",device='cuda')  # change to "your-username/the-name-you-picked"


def transcribe(microphone, upload):
    if(microphone):
       audio = microphone  
    else:
       audio = upload
       
       
    text = pipe(audio)["text"]
    text = remove_repeated_words(text)
    state = converter.toUnicode(text)
    return state


# Set the starting state to an empty string

iface = gr.Interface(
    fn=transcribe,
    inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")],
    outputs="text",
    title="Whisper Small Tibetan",
    description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model.",
)

iface.launch()