Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import groq
|
|
3 |
import io
|
4 |
import numpy as np
|
5 |
import soundfile as sf
|
|
|
6 |
|
7 |
# Define the API key directly in the code
|
8 |
API_KEY = "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" # Replace this with your actual Groq API key
|
@@ -47,10 +48,20 @@ def generate_response(transcription):
|
|
47 |
except Exception as e:
|
48 |
return f"Error in response generation: {str(e)}"
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
def process_audio(audio):
|
51 |
transcription = transcribe_audio(audio)
|
52 |
response = generate_response(transcription)
|
53 |
-
|
|
|
54 |
|
55 |
custom_css = """
|
56 |
.gradio-container {
|
@@ -76,12 +87,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|
76 |
gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
|
77 |
|
78 |
with gr.Row():
|
79 |
-
audio_input = gr.Audio(label="Speak!", type="numpy") #
|
80 |
|
81 |
with gr.Row():
|
82 |
-
transcription_output = gr.Textbox(label="Transcription")
|
83 |
-
response_output = gr.Textbox(label="AI Assistant Response")
|
84 |
|
|
|
|
|
85 |
submit_button = gr.Button("Process", variant="primary")
|
86 |
|
87 |
# Add the Groq badge
|
@@ -94,13 +107,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|
94 |
submit_button.click(
|
95 |
process_audio,
|
96 |
inputs=[audio_input],
|
97 |
-
outputs=[transcription_output, response_output]
|
98 |
)
|
99 |
|
100 |
gr.Markdown("""
|
101 |
## How to use this app:
|
102 |
1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
|
103 |
-
2.
|
104 |
3. The transcription and AI assistant response will appear in the respective text boxes.
|
105 |
""")
|
106 |
|
|
|
3 |
import io
|
4 |
import numpy as np
|
5 |
import soundfile as sf
|
6 |
+
import pyttsx3 # Text-to-Speech engine
|
7 |
|
8 |
# Define the API key directly in the code
|
9 |
API_KEY = "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" # Replace this with your actual Groq API key
|
|
|
48 |
except Exception as e:
|
49 |
return f"Error in response generation: {str(e)}"
|
50 |
|
51 |
+
def text_to_speech(response_text):
|
52 |
+
# Initialize the pyttsx3 engine for text-to-speech
|
53 |
+
engine = pyttsx3.init()
|
54 |
+
audio_buffer = io.BytesIO()
|
55 |
+
engine.save_to_file(response_text, audio_buffer)
|
56 |
+
engine.runAndWait()
|
57 |
+
audio_buffer.seek(0)
|
58 |
+
return audio_buffer
|
59 |
+
|
60 |
def process_audio(audio):
|
61 |
transcription = transcribe_audio(audio)
|
62 |
response = generate_response(transcription)
|
63 |
+
audio_response = text_to_speech(response)
|
64 |
+
return transcription, response, audio_response
|
65 |
|
66 |
custom_css = """
|
67 |
.gradio-container {
|
|
|
87 |
gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
|
88 |
|
89 |
with gr.Row():
|
90 |
+
audio_input = gr.Audio(label="Speak!", type="numpy", streaming=True) # Enable real-time streaming
|
91 |
|
92 |
with gr.Row():
|
93 |
+
transcription_output = gr.Textbox(label="Transcription", interactive=False)
|
94 |
+
response_output = gr.Textbox(label="AI Assistant Response", interactive=False)
|
95 |
|
96 |
+
audio_output = gr.Audio(label="AI Response Audio", interactive=False)
|
97 |
+
|
98 |
submit_button = gr.Button("Process", variant="primary")
|
99 |
|
100 |
# Add the Groq badge
|
|
|
107 |
submit_button.click(
|
108 |
process_audio,
|
109 |
inputs=[audio_input],
|
110 |
+
outputs=[transcription_output, response_output, audio_output]
|
111 |
)
|
112 |
|
113 |
gr.Markdown("""
|
114 |
## How to use this app:
|
115 |
1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
|
116 |
+
2. The system will automatically transcribe your speech, generate a response, and play it as audio.
|
117 |
3. The transcription and AI assistant response will appear in the respective text boxes.
|
118 |
""")
|
119 |
|