Spaces:

dussen
/

Whisper_dutch

Sleeping

Whisper_dutch / app.py

SevenhuijsenM

Implementation of AI

f47d361 about 1 year ago

3.6 kB

	from transformers import pipeline
	import gradio as gr
	from pytube import YouTube
	import os
	import requests
	import time
	from openai import OpenAI
	client = OpenAI()

	pipe = pipeline(model="dussen/whisper-small-nl-hc")

	print(pipe)
	def download_audio(url, output_path='downloads'):
	try:
	# Create a YouTube object
	yt = YouTube(url)

	# Get the audio stream with the highest quality
	audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
	audio_stream.download(output_path)

	# If a video.mp4 file already exists, delete it
	if os.path.exists(f"{output_path}/video.mp4"):
	os.remove(f"{output_path}/video.mp4")

	# Change the name of the file to video.mp4
	default_filename = audio_stream.default_filename
	mp4_path = f"{output_path}/{default_filename}"
	mp3_path = f"{output_path}/video.mp3"
	os.rename(mp4_path, mp3_path)

	# Use the model to transcribe the audio
	text = pipe(mp3_path)["text"]

	# Delete the audio file
	os.remove(mp3_path)

	return text
	except Exception as e:
	print(f"Error: {e}")

	def audio_to_text(audio):
	text = pipe(audio)["text"]
	print(text)
	return text

	def radio_to_text(radio_url):
	r = requests.get(radio_url, stream=True)

	# Open it and after 10 seconds close the connection
	with open('stream.mp3', 'wb') as f:
	# Get the stopping time as a UNIX timestamp
	stop_after = time.time() + 10

	try:
	for block in r.iter_content(1024):
	f.write(block)
	if time.time() > stop_after:
	break
	except KeyboardInterrupt:
	pass
	text = pipe("stream.mp3")["text"]
	print(text)

	# Use chatGPT to summarise the text using a prompt that says whether it is news, an ad or a song
	prompt = f"Dit stuk komt uit een radio uitzending en is getranscribeerd door AI. Er kunnen fouten in zitten. Kan je eerst het categorie text geven uit `nieuws`, `muziek`, `advertentie` of rest`, en dan in max drie zinnen wat er gezegd is?{text}"

	# Limit the text to 3000 tokens
	prompt = prompt[:3584]

	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.7,
	max_tokens=512,
	top_p=1
	)
	text = f"Tekst van de AI die is getranscribeerd: {text}\n\n---\n\nSamenvatting door AI:\n\n{response}"

	return text

	iface_video_url = gr.Interface(
	fn=download_audio,
	inputs="text",
	outputs="text",
	title="Whisper Small Dutch - Use a YouTube URL",
	description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	iface_audio = gr.Interface(
	fn=audio_to_text,
	inputs=gr.Audio(sources=["microphone"], type="filepath"),
	outputs="text",
	title="Whisper Small Dutch - Use your microphone",
	description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
	)

	iface_radio = gr.Interface(
	fn=radio_to_text,
	inputs="text",
	outputs="text",
	title="Whisper Small Dutch - Use a radio URL",
	description="Demo for dutch speech recognition using a fine-tuned Whisper small model. It gets information on what is playing on the given radio URL. It transcribes it and then summarises it using chatGPT.",
	)

	app = gr.TabbedInterface([iface_audio, iface_video_url, iface_radio], ["Audio to text", "Video to text", "Radio to text"])

	if __name__ == "__main__":
	app.launch()