import os
import gradio as gr
import spaces
from transformers import pipeline, Pipeline

is_hf_space = os.getenv("IS_HF_SPACE")
model_ids = [
    "",
    "mozilla-ai/whisper-small-gl (Galician)",
    "mozilla-ai/whisper-small-el (Greek)",
    "openai/whisper-tiny (Multilingual)",
    "openai/whisper-small (Multilingual)",
    "openai/whisper-medium (Multilingual)",
    "openai/whisper-large-v3 (Multilingual)",
    "openai/whisper-large-v3-turbo (Multilingual)",
]


def _load_local_model(model_dir: str) -> Pipeline:
    from transformers import (
        WhisperProcessor,
        WhisperTokenizer,
        WhisperFeatureExtractor,
        WhisperForConditionalGeneration,
    )

    processor = WhisperProcessor.from_pretrained(model_dir)
    tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
    feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
    model = WhisperForConditionalGeneration.from_pretrained(model_dir)

    try:
        return pipeline(
            task="automatic-speech-recognition",
            model=model,
            processor=processor,
            tokenizer=tokenizer,
            feature_extractor=feature_extractor,
        )
    except Exception as e:
        return str(e)


def _load_hf_model(model_repo_id: str) -> Pipeline:
    try:
        return pipeline(
            "automatic-speech-recognition",
            model=model_repo_id,
        )
    except Exception as e:
        return str(e)


@spaces.GPU(duration=30)
def transcribe(
    dropdown_model_id: str,
    hf_model_id: str,
    local_model_id: str,
    audio: gr.Audio,
) -> str:
    if dropdown_model_id and not hf_model_id and not local_model_id:
        dropdown_model_id = dropdown_model_id.split(" (")[0]
        pipe = _load_hf_model(dropdown_model_id)
    elif hf_model_id and not local_model_id and not dropdown_model_id:
        pipe = _load_hf_model(hf_model_id)
    elif local_model_id and not hf_model_id and not dropdown_model_id:
        pipe = _load_local_model(local_model_id)
    else:
        return (
            "⚠️ Error: Please select or fill at least and only one of the options above"
        )
    if isinstance(pipe, str):
        # Exception raised when loading
        return f"⚠️ Error: {pipe}"
    text = pipe(audio)["text"]
    return text


def setup_gradio_demo():
    with gr.Blocks() as demo:
        gr.Markdown(
            """ # 🗣️ Speech-to-Text Transcription
            ### 1. Select which model to use from one of the options below.
            ### 2. Record a message or upload an audio file.
            ### 3. Click Transcribe to see the transcription generated by the model.
            """
        )
        ### Model selection ###

        with gr.Row():
            with gr.Column():
                dropdown_model = gr.Dropdown(
                    choices=model_ids, label="Option 1: Select a model"
                )
            with gr.Column():
                user_model = gr.Textbox(
                    label="Option 2: Paste HF model id",
                    placeholder="my-username/my-whisper-tiny",
                )
            with gr.Column(visible=not is_hf_space):
                local_model = gr.Textbox(
                    label="Option 3: Paste local path to model directory",
                    placeholder="artifacts/my-whisper-tiny",
                )

        ### Transcription ###
        audio_input = gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="Record a message / Upload audio file",
            show_download_button=True,
            max_length=30,
        )
        transcribe_button = gr.Button("Transcribe")
        transcribe_output = gr.Text(label="Output")

        transcribe_button.click(
            fn=transcribe,
            inputs=[dropdown_model, user_model, local_model, audio_input],
            outputs=transcribe_output,
        )

    demo.launch(ssr_mode=False)


if __name__ == "__main__":
    setup_gradio_demo()