import gradio as gr
from transformers import pipeline
import numpy as np
import pandas as pd
import re
from collections import Counter
from functools import reduce

transcriber = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base.en",
    return_timestamps=True,
)


def transcribe_live(state, words_list, new_chunk):
    try:
        words_to_check_for = [word.strip().lower() for word in words_list.split(",")]
    except:
        gr.Warning("Please enter a valid list of words to check for")
        words_to_check_for = []

    stream = state.get("stream", None)
    previous_transcription = state.get("full_transcription", "")
    previous_counts_of_words = state.get(
        "counts_of_words", {word: 0 for word in words_to_check_for}
    )

    if new_chunk is None:
        gr.Info("You can start transcribing by clicking on the Record button")
        print("new chunk is None")
        return state, previous_counts_of_words, previous_transcription

    sr, y = new_chunk

    # Convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)

    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if stream is not None:
        stream = np.concatenate([stream, y])
    else:
        stream = y

    try:
        new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
    except Exception as e:
        gr.Error(f"Transcription failed. Error: {e}")
        print(f"Transcription failed. Error: {e}")
        return state, previous_counts_of_words, previous_transcription

    full_transcription_text = new_transcription["text"]

    full_transcription_text_lower = full_transcription_text.lower()

    # Use re to find all the words in the transcription, and their start and end indices
    matches: list[re.Match] = list(
        re.finditer(
            r"\b(" + "|".join(words_to_check_for) + r")\b",
            full_transcription_text_lower,
        )
    )

    counter = Counter(
        match.group(0) for match in matches if match.group(0) in words_to_check_for
    )

    new_counts_of_words = {word: counter.get(word, 0) for word in words_to_check_for}

    new_highlighted_transcription = {
        "text": full_transcription_text,
        "entities": [
            {
                "entity": "FILLER",
                "start": match.start(),
                "end": match.end(),
            }
            for match in matches
        ],
    }

    new_state = {
        "stream": stream,
        "full_transcription": full_transcription_text,
        "counts_of_words": new_counts_of_words,
        "highlighted_transcription": new_highlighted_transcription,
    }

    return (
        new_state,
        new_counts_of_words,
        full_transcription_text,
        new_highlighted_transcription,
    )


with gr.Blocks() as demo:
    state = gr.State(
        value={
            "stream": None,
            "full_transcription": "",
            "counts_of_words": {},
        }
    )
    filler_words = gr.Textbox(label="List of filer words", value="like, so, you know")
    recording = gr.Audio(streaming=True, label="Recording")

    word_counts = gr.JSON(label="Filler words count", value={})
    # word_counts = gr.BarPlot(label="Filler words count", value={})
    transcription = gr.Textbox(label="Transcription", value="", visible=False)

    highlighted_transcription = gr.HighlightedText(
        label="Transcription",
        value={
            "text": "",
            "entities": [],
        },
        color_map={"FILLER": "red"},
    )

    recording.stream(
        transcribe_live,
        inputs=[state, filler_words, recording],
        outputs=[state, word_counts, transcription, highlighted_transcription],
        stream_every=5,
        time_limit=-1,
    )

demo.launch(show_error=True)