Spaces:

dussen
/

Whisper_dutch

Running

Whisper_dutch / app.py

SevenhuijsenM

Attempt for radio

165d735 12 months ago

3.63 kB

	from transformers import pipeline
	import gradio as gr
	from pytube import YouTube
	import os
	import requests
	import time
	from openai import OpenAI
	client = OpenAI()

	pipe = pipeline(model="dussen/whisper-small-nl-hc")

	print(pipe)
	def download_audio(url, output_path='downloads'):
	try:
	# Create a YouTube object
	yt = YouTube(url)

	# Get the audio stream with the highest quality
	audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
	audio_stream.download(output_path)

	# If a video.mp4 file already exists, delete it
	if os.path.exists(f"{output_path}/video.mp4"):
	os.remove(f"{output_path}/video.mp4")

	# Change the name of the file to video.mp4
	default_filename = audio_stream.default_filename
	mp4_path = f"{output_path}/{default_filename}"
	mp3_path = f"{output_path}/video.mp3"
	os.rename(mp4_path, mp3_path)

	# Use the model to transcribe the audio
	text = pipe(mp3_path)["text"]

	# Delete the audio file
	os.remove(mp3_path)

	return text
	except Exception as e:
	print(f"Error: {e}")

	def audio_to_text(audio):
	text = pipe(audio)["text"]
	print(text)
	return text

	def radio_to_text(radio_url):
	r = requests.get(radio_url, stream=True)

	# Open it and after 10 seconds close the connection
	with open('stream.mp3', 'wb') as f:
	# Get the stopping time as a UNIX timestamp
	stop_after = time.time() + 10

	try:
	for block in r.iter_content(1024):
	f.write(block)
	if time.time() > stop_after:
	break
	except KeyboardInterrupt:
	pass
	text = pipe("stream.mp3")["text"]
	print(text)

	# Use chatGPT to summarise the text using a prompt that says whether it is news, an ad or a song
	prompt = f"Dit stuk komt uit een radio uitzending en is getranscribeerd door AI. Er kunnen fouten in zitten. Kan je eerst het categorie text geven uit `nieuws`, `muziek`, `advertentie` of rest`, en dan in max drie zinnen wat er gezegd is?{text}"

	# Limit the text to 3000 tokens
	prompt = prompt[:3584]

	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.7,
	max_tokens=512,
	top_p=1
	)
	text = f"Tekst van de AI die is getranscribeerd: {text}\n\n---\n\nSamenvatting door AI:\n\n{response.choices[0].message.content}"

	return text

	iface_video_url = gr.Interface(
	fn=download_audio,
	inputs="text",
	outputs="text",
	title="Whisper Small Dutch - Use a YouTube URL",
	description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	iface_audio = gr.Interface(
	fn=audio_to_text,
	inputs=gr.Audio(sources=["microphone"], type="filepath"),
	outputs="text",
	title="Whisper Small Dutch - Use your microphone",
	description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	iface_radio = gr.Interface(
	fn=radio_to_text,
	inputs="text",
	outputs="text",
	title="Whisper Small Dutch - Use a radio URL",
	description="Demo for dutch speech recognition using a fine-tuned Whisper small model. It gets information on what is playing on the given radio URL. It transcribes it and then summarises it using chatGPT.",
	)

	app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])

	if __name__ == "__main__":
	app.launch()