Spaces:
Running
Running
File size: 2,770 Bytes
e96b0df 3aff486 e96b0df b932632 3aff486 c1e7ebb 3aff486 c1e7ebb 3aff486 c1e7ebb 3aff486 c1e7ebb b932632 3aff486 c1e7ebb 3aff486 e96b0df b932632 e96b0df 167c051 3aff486 167c051 3aff486 932a9e1 3aff486 167c051 e96b0df 932a9e1 e96b0df 932a9e1 167c051 932a9e1 167c051 932a9e1 167c051 3aff486 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from transformers import pipeline
import gradio as gr
from pytube import YouTube
import os
pipe = pipeline(model="dussen/whisper-small-nl-hc")
print(pipe)
def download_audio(url, output_path='downloads'):
try:
# Create a YouTube object
yt = YouTube(url)
# Get the audio stream with the highest quality
audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
audio_stream.download(output_path)
print(f"Downloaded audio to {output_path}")
# If a video.mp4 file already exists, delete it
if os.path.exists(f"{output_path}/video.mp4"):
os.remove(f"{output_path}/video.mp4")
print("Downloading video...")
# Change the name of the file to video.mp4
default_filename = audio_stream.default_filename
mp4_path = f"{output_path}/{default_filename}"
mp3_path = f"{output_path}/video.mp3"
os.rename(mp4_path, mp3_path)
print("Downloaded video")
print("Transcribing audio...")
print("Type of audio: ", type(mp3_path))
# Use the model to transcribe the audio
text = pipe(mp3_path)["text"]
print(f"Transcribed audio: {text}")
# Delete the audio file
os.remove(mp3_path)
return text
except Exception as e:
print(f"Error: {e}")
def audio_to_text(audio):
text = pipe(audio)["text"]
print(text)
return text
def radio_to_text(radio_url):
# A radio livestream
stream_url = radio_url
r = requests.get(stream_url, stream=True)
with open('stream.mp3', 'wb') as f:
try:
for block in r.iter_content(1024):
f.write(block)
except KeyboardInterrupt:
pass
text = pipe("stream.mp3")["text"]
print(text)
return text
iface_video_url = gr.Interface(
fn=download_audio,
inputs="text",
outputs="text",
title="Whisper Small Dutch - Use a YouTube URL",
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
)
iface_audio = gr.Interface(
fn=audio_to_text,
inputs=gr.Audio(sources=["microphone"], type="filepath"),
outputs="text",
title="Whisper Small Dutch - Use your microphone",
description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
)
iface_radio = gr.Interface(
fn=radio_to_text,
inputs="text",
outputs="text",
title="Whisper Small Dutch - Use a radio URL",
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
)
app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])
if __name__ == "__main__":
app.launch()
|