Spaces:

dussen
/

Whisper_dutch

Sleeping

Whisper_dutch / app.py

SevenhuijsenM

Attempt for radio

167c051 12 months ago

2.77 kB

	from transformers import pipeline
	import gradio as gr
	from pytube import YouTube
	import os

	pipe = pipeline(model="dussen/whisper-small-nl-hc")
	print(pipe)
	def download_audio(url, output_path='downloads'):
	try:
	# Create a YouTube object
	yt = YouTube(url)

	# Get the audio stream with the highest quality
	audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
	audio_stream.download(output_path)
	print(f"Downloaded audio to {output_path}")

	# If a video.mp4 file already exists, delete it
	if os.path.exists(f"{output_path}/video.mp4"):
	os.remove(f"{output_path}/video.mp4")

	print("Downloading video...")

	# Change the name of the file to video.mp4
	default_filename = audio_stream.default_filename
	mp4_path = f"{output_path}/{default_filename}"
	mp3_path = f"{output_path}/video.mp3"
	os.rename(mp4_path, mp3_path)

	print("Downloaded video")


	print("Transcribing audio...")
	print("Type of audio: ", type(mp3_path))
	# Use the model to transcribe the audio
	text = pipe(mp3_path)["text"]
	print(f"Transcribed audio: {text}")
	# Delete the audio file
	os.remove(mp3_path)

	return text
	except Exception as e:
	print(f"Error: {e}")

	def audio_to_text(audio):
	text = pipe(audio)["text"]
	print(text)
	return text

	def radio_to_text(radio_url):
	# A radio livestream
	stream_url = radio_url

	r = requests.get(stream_url, stream=True)

	with open('stream.mp3', 'wb') as f:
	try:
	for block in r.iter_content(1024):
	f.write(block)
	except KeyboardInterrupt:
	pass

	text = pipe("stream.mp3")["text"]
	print(text)
	return text

	iface_video_url = gr.Interface(
	fn=download_audio,
	inputs="text",
	outputs="text",
	title="Whisper Small Dutch - Use a YouTube URL",
	description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	iface_audio = gr.Interface(
	fn=audio_to_text,
	inputs=gr.Audio(sources=["microphone"], type="filepath"),
	outputs="text",
	title="Whisper Small Dutch - Use your microphone",
	description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	iface_radio = gr.Interface(
	fn=radio_to_text,
	inputs="text",
	outputs="text",
	title="Whisper Small Dutch - Use a radio URL",
	description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])

	if __name__ == "__main__":
	app.launch()