Spaces:

jiuuee
/

my-alexa

Runtime error

App Files Files Community

my-alexa / app.py

jiuuee

Update app.py

beebab3 verified 8 months ago

raw

history blame

2.3 kB

	import gradio as gr
	from transformers import pipeline

	# Create pipelines for ASR, QA, and TTS
	asr_pipeline = pipeline("automatic-speech-recognition", model="canary/asr-small-librispeech", device=0) # Adjust device based on your hardware
	qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
	tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0) # Adjust device based on your hardware

	# Function to capture audio using Canary ASR
	def capture_audio():
	while True:
	print("Say, 'Hey, Alex'")
	# Use Canary ASR pipeline to capture audio
	audio_input = asr_pipeline(None)[0]['input_values']
	transcript = asr_pipeline(audio_input)[0]['transcription']
	if "hey alex" in transcript.lower():
	print("I hear you!")
	break
	print("Listening...")
	return audio_input

	# AI assistant function
	def ai_assistant(audio_input):
	# Perform automatic speech recognition (ASR)
	transcribed_text = asr_pipeline(audio_input)[0]['transcription']

	# Perform question answering (QA)
	question = transcribed_text
	# Provide the context for the question answering model
	context = "Friends is a popular American sitcom that aired from 1994 to 2004. The show revolves around a group of six friends living in New York City—Ross, Rachel, Chandler, Monica, Joey, and Phoebe—as they navigate various aspects of their personal and professional lives. Friends is known for its humor, memorable characters, and iconic catchphrases, making it a beloved and enduring cultural phenomenon."
	answer = qa_pipeline(question=question, context=context)

	# Convert the answer to speech using text-to-speech (TTS)
	tts_output = tts_pipeline(answer['answer'])

	# Output the speech
	return tts_output[0]['audio']

	if __name__ == "__main__":
	# Create a Gradio interface
	gr.Interface(ai_assistant,
	inputs=gr.inputs.Audio(capture= capture_audio, label="Speak Here"),
	outputs=gr.outputs.Audio(type="audio", label="Assistant's Response"),
	title="Alexander the Great AI Assistant",
	description="An AI Assistant. Say 'Hey Alex' to speak to Alexander").launch(inbrowser=True)