Spaces:

Satyam-Singh
/

IOT-Assistant

Sleeping

App Files Files Community

IOT-Assistant / app.py

Satyam-Singh

Update app.py

1ca46ec verified 6 months ago

raw

history blame

3.86 kB

	import gradio as gr
	import groq
	import io
	import numpy as np
	import soundfile as sf
	import pyttsx3 # Text-to-Speech engine

	# Define the API key directly in the code
	API_KEY = "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" # Replace this with your actual Groq API key

	def transcribe_audio(audio):
	if audio is None:
	return ""

	client = groq.Client(api_key=API_KEY)

	# Convert audio to the format expected by the model
	audio_data = audio[1] # Get the numpy array from the tuple
	buffer = io.BytesIO()
	sf.write(buffer, audio_data, audio[0], format='wav')
	buffer.seek(0)

	try:
	# Use Distil-Whisper English powered by Groq for transcription
	completion = client.audio.transcriptions.create(
	model="distil-whisper-large-v3-en",
	file=("audio.wav", buffer),
	response_format="text"
	)
	return completion
	except Exception as e:
	return f"Error in transcription: {str(e)}"

	def generate_response(transcription):
	if not transcription:
	return "No transcription available. Please try speaking again."

	client = groq.Client(api_key=API_KEY)

	try:
	# Use Llama 3 70B powered by Groq for text generation
	completion = client.chat.completions.create(
	model="llama3-70b-8192",
	messages=[{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": transcription}],
	)
	return completion.choices[0].message.content
	except Exception as e:
	return f"Error in response generation: {str(e)}"

	def text_to_speech(response_text):
	# Initialize the pyttsx3 engine for text-to-speech
	engine = pyttsx3.init()
	audio_buffer = io.BytesIO()
	engine.save_to_file(response_text, audio_buffer)
	engine.runAndWait()
	audio_buffer.seek(0)
	return audio_buffer

	def process_audio(audio):
	transcription = transcribe_audio(audio)
	response = generate_response(transcription)
	audio_response = text_to_speech(response)
	return transcription, response, audio_response

	custom_css = """
	.gradio-container {
	background-color: #f5f5f5;
	}
	.gr-button-primary {
	background-color: #f55036 !important;
	border-color: #f55036 !important;
	}
	.gr-button-secondary {
	color: #f55036 !important;
	border-color: #f55036 !important;
	}
	#groq-badge {
	position: fixed;
	bottom: 20px;
	right: 20px;
	z-index: 1000;
	}
	"""

	with gr.Blocks(theme=gr.themes.Default()) as demo:
	gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")

	with gr.Row():
	audio_input = gr.Audio(label="Speak!", type="numpy", streaming=True) # Enable real-time streaming

	with gr.Row():
	transcription_output = gr.Textbox(label="Transcription", interactive=False)
	response_output = gr.Textbox(label="AI Assistant Response", interactive=False)

	audio_output = gr.Audio(label="AI Response Audio", interactive=False)

	submit_button = gr.Button("Process", variant="primary")

	# Add the Groq badge
	gr.HTML("""
	<div id="groq-badge">
	<div style="color: #f55036; font-weight: bold;">POWERED BY LLAVA</div>
	</div>
	""")

	submit_button.click(
	process_audio,
	inputs=[audio_input],
	outputs=[transcription_output, response_output, audio_output]
	)

	gr.Markdown("""
	## How to use this app:
	1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
	2. The system will automatically transcribe your speech, generate a response, and play it as audio.
	3. The transcription and AI assistant response will appear in the respective text boxes.
	""")

	demo.launch()