Spaces:

micknikolic
/

speech-to-text

Sleeping

App Files Files Community

speech-to-text / app.py

micknikolic

Update app.py

e2ab019 8 months ago

raw

history blame contribute delete

No virus

2.34 kB

	import gradio as gr
	import time
	import io
	import librosa
	import torch
	import soundfile as sf

	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	#Instantiating the model object.

	model = AutoModelForSpeechSeq2Seq.from_pretrained(pretrained_model_name_or_path= "openai/whisper-large-v3",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	use_safetensors=True)

	model = model.to("cuda")

	#Instantiating the processor object.

	processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="openai/whisper-large-v3")

	#Instantiating the transformer class' pipeline object.

	pipe = pipeline(task="automatic-speech-recognition",
	model="openai/whisper-large-v3",
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=30,
	batch_size=16,
	return_timestamps=True,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device="cuda"
	)

	#Defining speech-to-text function.

	def convert(audio, state=""):
	"""
	This function performs speech to text conversion and will be used in Gradio's Interface function.
	Parameters:
	- audio: audio data as a bytes-like object.
	- state: a string representing the accumulated text from previous conversions.
	"""
	time.sleep(3)
	try:
	result = pipe(audio)
	transcribed_text = result['text']
	state += transcribed_text + " "
	except Exception as e:
	return f"Error processing audio: Please start recording!", state

	return state, state

	#Instantiating Gradio Interface.

	gr_interface = gr.Interface(
	fn = convert,
	title = "Automatic Speech-to-Text",
	description = "### Record your speech and watch it get converted to text!",
	inputs = [
	gr.Audio(
	label="Please Record Your Speech Here!",
	sources="microphone",
	type="filepath"),
	"state"],
	outputs = [
	"textbox",
	"state"
	],
	theme="dark",
	live=True
	)

	#Launching the app (share=True).

	gr_interface.launch()