Spaces:

archit11
/

shuka_demo

Running on Zero

shuka_demo / app.py

Update app.py

d649fba verified about 1 month ago

1.78 kB

	import transformers
	import gradio as gr
	import librosa
	import torch
	import spaces

	@spaces.GPU(duration=120)
	def transcribe_and_respond(audio_file):
	try:
	# Load the model pipeline
	pipe = transformers.pipeline(
	model='sarvamai/shuka_v1',
	trust_remote_code=True,
	device=0,
	torch_dtype=torch.bfloat16
	)

	# Load the audio file
	audio, sr = librosa.load(audio_file, sr=16000)

	# Print the path of the audio file
	print(f"Audio file path: {audio_file}")

	# Prepare turns with a placeholder for the audio
	turns = [
	{'role': 'system', 'content': 'Respond naturally and informatively.'},
	{'role': 'user', 'content': '<\|audio\|>'}
	]

	# Print the constructed prompt
	print(f"Constructed prompt: {turns}")

	# Run the pipeline with the audio and constructed prompt
	output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)

	# Print the output from the model
	print(f"Model output: {output}")

	# Return the output for the Gradio interface
	return output

	except Exception as e:
	return f"Error: {str(e)}"

	# Set up the Gradio interface
	iface = gr.Interface(
	fn=transcribe_and_respond,
	inputs=gr.Audio(sources="microphone", type="filepath"), # Accept audio input from microphone
	outputs="text", # Output as text
	title="Live Transcription and Response",
	description="Speak into your microphone, and the model will respond naturally and informatively.",
	live=True # Enable live processing
	)

	# Launch the interface
	if __name__ == "__main__":
	iface.launch()