Whisper_dutch / app.py
SevenhuijsenM
Attempt for radio
167c051
raw
history blame
2.77 kB
from transformers import pipeline
import gradio as gr
from pytube import YouTube
import os
pipe = pipeline(model="dussen/whisper-small-nl-hc")
print(pipe)
def download_audio(url, output_path='downloads'):
try:
# Create a YouTube object
yt = YouTube(url)
# Get the audio stream with the highest quality
audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
audio_stream.download(output_path)
print(f"Downloaded audio to {output_path}")
# If a video.mp4 file already exists, delete it
if os.path.exists(f"{output_path}/video.mp4"):
os.remove(f"{output_path}/video.mp4")
print("Downloading video...")
# Change the name of the file to video.mp4
default_filename = audio_stream.default_filename
mp4_path = f"{output_path}/{default_filename}"
mp3_path = f"{output_path}/video.mp3"
os.rename(mp4_path, mp3_path)
print("Downloaded video")
print("Transcribing audio...")
print("Type of audio: ", type(mp3_path))
# Use the model to transcribe the audio
text = pipe(mp3_path)["text"]
print(f"Transcribed audio: {text}")
# Delete the audio file
os.remove(mp3_path)
return text
except Exception as e:
print(f"Error: {e}")
def audio_to_text(audio):
text = pipe(audio)["text"]
print(text)
return text
def radio_to_text(radio_url):
# A radio livestream
stream_url = radio_url
r = requests.get(stream_url, stream=True)
with open('stream.mp3', 'wb') as f:
try:
for block in r.iter_content(1024):
f.write(block)
except KeyboardInterrupt:
pass
text = pipe("stream.mp3")["text"]
print(text)
return text
iface_video_url = gr.Interface(
fn=download_audio,
inputs="text",
outputs="text",
title="Whisper Small Dutch - Use a YouTube URL",
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
)
iface_audio = gr.Interface(
fn=audio_to_text,
inputs=gr.Audio(sources=["microphone"], type="filepath"),
outputs="text",
title="Whisper Small Dutch - Use your microphone",
description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
)
iface_radio = gr.Interface(
fn=radio_to_text,
inputs="text",
outputs="text",
title="Whisper Small Dutch - Use a radio URL",
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
)
app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])
if __name__ == "__main__":
app.launch()