Spaces:

Satyam-Singh
/

IOT-Assistant

Sleeping

App Files Files Community

Satyam-Singh commited on Nov 7, 2024

Commit

1ca46ec

verified ·

1 Parent(s): 323f6ed

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -6

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import groq
 import io
 import numpy as np
 import soundfile as sf
 # Define the API key directly in the code
 API_KEY = "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"  # Replace this with your actual Groq API key
@@ -47,10 +48,20 @@ def generate_response(transcription):
     except Exception as e:
         return f"Error in response generation: {str(e)}"
 def process_audio(audio):
     transcription = transcribe_audio(audio)
     response = generate_response(transcription)
-    return transcription, response
 custom_css = """
 .gradio-container {
@@ -76,12 +87,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
     with gr.Row():
-        audio_input = gr.Audio(label="Speak!", type="numpy")  # Audio input as numpy array
     with gr.Row():
-        transcription_output = gr.Textbox(label="Transcription")
-        response_output = gr.Textbox(label="AI Assistant Response")
     submit_button = gr.Button("Process", variant="primary")
     # Add the Groq badge
@@ -94,13 +107,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     submit_button.click(
         process_audio,
         inputs=[audio_input],
-        outputs=[transcription_output, response_output]
     )
     gr.Markdown("""
     ## How to use this app:
     1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
-    2. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
     3. The transcription and AI assistant response will appear in the respective text boxes.
     """)

 import io
 import numpy as np
 import soundfile as sf
+import pyttsx3  # Text-to-Speech engine
 # Define the API key directly in the code
 API_KEY = "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"  # Replace this with your actual Groq API key
     except Exception as e:
         return f"Error in response generation: {str(e)}"
+def text_to_speech(response_text):
+    # Initialize the pyttsx3 engine for text-to-speech
+    engine = pyttsx3.init()
+    audio_buffer = io.BytesIO()
+    engine.save_to_file(response_text, audio_buffer)
+    engine.runAndWait()
+    audio_buffer.seek(0)
+    return audio_buffer
 def process_audio(audio):
     transcription = transcribe_audio(audio)
     response = generate_response(transcription)
+    audio_response = text_to_speech(response)
+    return transcription, response, audio_response
 custom_css = """
 .gradio-container {
     gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
     with gr.Row():
+        audio_input = gr.Audio(label="Speak!", type="numpy", streaming=True)  # Enable real-time streaming
     with gr.Row():
+        transcription_output = gr.Textbox(label="Transcription", interactive=False)
+        response_output = gr.Textbox(label="AI Assistant Response", interactive=False)
+    audio_output = gr.Audio(label="AI Response Audio", interactive=False)
     submit_button = gr.Button("Process", variant="primary")
     # Add the Groq badge
     submit_button.click(
         process_audio,
         inputs=[audio_input],
+        outputs=[transcription_output, response_output, audio_output]
     )
     gr.Markdown("""
     ## How to use this app:
     1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
+    2. The system will automatically transcribe your speech, generate a response, and play it as audio.
     3. The transcription and AI assistant response will appear in the respective text boxes.
     """)