Satyam-Singh commited on
Commit
1ca46ec
·
verified ·
1 Parent(s): 323f6ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -3,6 +3,7 @@ import groq
3
  import io
4
  import numpy as np
5
  import soundfile as sf
 
6
 
7
  # Define the API key directly in the code
8
  API_KEY = "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" # Replace this with your actual Groq API key
@@ -47,10 +48,20 @@ def generate_response(transcription):
47
  except Exception as e:
48
  return f"Error in response generation: {str(e)}"
49
 
 
 
 
 
 
 
 
 
 
50
  def process_audio(audio):
51
  transcription = transcribe_audio(audio)
52
  response = generate_response(transcription)
53
- return transcription, response
 
54
 
55
  custom_css = """
56
  .gradio-container {
@@ -76,12 +87,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
76
  gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
77
 
78
  with gr.Row():
79
- audio_input = gr.Audio(label="Speak!", type="numpy") # Audio input as numpy array
80
 
81
  with gr.Row():
82
- transcription_output = gr.Textbox(label="Transcription")
83
- response_output = gr.Textbox(label="AI Assistant Response")
84
 
 
 
85
  submit_button = gr.Button("Process", variant="primary")
86
 
87
  # Add the Groq badge
@@ -94,13 +107,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
94
  submit_button.click(
95
  process_audio,
96
  inputs=[audio_input],
97
- outputs=[transcription_output, response_output]
98
  )
99
 
100
  gr.Markdown("""
101
  ## How to use this app:
102
  1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
103
- 2. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
104
  3. The transcription and AI assistant response will appear in the respective text boxes.
105
  """)
106
 
 
3
  import io
4
  import numpy as np
5
  import soundfile as sf
6
+ import pyttsx3 # Text-to-Speech engine
7
 
8
  # Define the API key directly in the code
9
  API_KEY = "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" # Replace this with your actual Groq API key
 
48
  except Exception as e:
49
  return f"Error in response generation: {str(e)}"
50
 
51
+ def text_to_speech(response_text):
52
+ # Initialize the pyttsx3 engine for text-to-speech
53
+ engine = pyttsx3.init()
54
+ audio_buffer = io.BytesIO()
55
+ engine.save_to_file(response_text, audio_buffer)
56
+ engine.runAndWait()
57
+ audio_buffer.seek(0)
58
+ return audio_buffer
59
+
60
  def process_audio(audio):
61
  transcription = transcribe_audio(audio)
62
  response = generate_response(transcription)
63
+ audio_response = text_to_speech(response)
64
+ return transcription, response, audio_response
65
 
66
  custom_css = """
67
  .gradio-container {
 
87
  gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
88
 
89
  with gr.Row():
90
+ audio_input = gr.Audio(label="Speak!", type="numpy", streaming=True) # Enable real-time streaming
91
 
92
  with gr.Row():
93
+ transcription_output = gr.Textbox(label="Transcription", interactive=False)
94
+ response_output = gr.Textbox(label="AI Assistant Response", interactive=False)
95
 
96
+ audio_output = gr.Audio(label="AI Response Audio", interactive=False)
97
+
98
  submit_button = gr.Button("Process", variant="primary")
99
 
100
  # Add the Groq badge
 
107
  submit_button.click(
108
  process_audio,
109
  inputs=[audio_input],
110
+ outputs=[transcription_output, response_output, audio_output]
111
  )
112
 
113
  gr.Markdown("""
114
  ## How to use this app:
115
  1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
116
+ 2. The system will automatically transcribe your speech, generate a response, and play it as audio.
117
  3. The transcription and AI assistant response will appear in the respective text boxes.
118
  """)
119