from transformers import pipeline
import gradio as gr
from pytube import YouTube
import os

pipe = pipeline(model="dussen/whisper-small-nl-hc")
print(pipe)
def download_audio(url, output_path='downloads'):
    try:
        # Create a YouTube object
        yt = YouTube(url)

        # Get the audio stream with the highest quality
        audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
        audio_stream.download(output_path)
        print(f"Downloaded audio to {output_path}")

        # If a video.mp4 file already exists, delete it
        if os.path.exists(f"{output_path}/video.mp4"):
            os.remove(f"{output_path}/video.mp4")

        print("Downloading video...")

        # Change the name of the file to video.mp4
        default_filename = audio_stream.default_filename
        mp4_path = f"{output_path}/{default_filename}"
        mp3_path = f"{output_path}/video.mp3"
        os.rename(mp4_path, mp3_path)

        print("Downloaded video")


        print("Transcribing audio...")
        print("Type of audio: ", type(mp3_path))
        # Use the model to transcribe the audio
        text = pipe(mp3_path)["text"]
        print(f"Transcribed audio: {text}")
        # Delete the audio file
        os.remove(mp3_path)

        return text
    except Exception as e:
        print(f"Error: {e}")

def audio_to_text(audio):
    text = pipe(audio)["text"]
    print(text)
    return text

def radio_to_text(radio_url):
    # A radio livestream
    stream_url = radio_url

    r = requests.get(stream_url, stream=True)

    with open('stream.mp3', 'wb') as f:
        try:
            for block in r.iter_content(1024):
                f.write(block)
        except KeyboardInterrupt:
            pass

    text = pipe("stream.mp3")["text"]
    print(text)
    return text

iface_video_url = gr.Interface(
    fn=download_audio,
    inputs="text",
    outputs="text",
    title="Whisper Small Dutch - Use a YouTube URL",
    description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
)

iface_audio = gr.Interface(
    fn=audio_to_text,
    inputs=gr.Audio(sources=["microphone"], type="filepath"),
    outputs="text",
    title="Whisper Small Dutch - Use your microphone",
    description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
)

iface_radio = gr.Interface(
    fn=radio_to_text,
    inputs="text",
    outputs="text",
    title="Whisper Small Dutch - Use a radio URL",
    description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
)

app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])

if __name__ == "__main__":
    app.launch()