Satyam-Singh commited on
Commit
b9f656e
·
verified ·
1 Parent(s): 13808b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -38
app.py CHANGED
@@ -1,40 +1,108 @@
1
- import requests
2
  import gradio as gr
 
 
 
 
3
 
4
- def test_fastapi_connection(audio_file_path):
5
- # Define the URL of your FastAPI endpoint
6
- url = "https://llava-endpoint.fra1.zeabur.app/process-audio/" # Your Render endpoint
7
-
8
- # Prepare the file to be sent
9
- with open(audio_file_path, "rb") as file:
10
- files = {"file": file}
11
-
12
- try:
13
- # Send a POST request with the audio file
14
- response = requests.post(url, files=files)
15
- response.raise_for_status() # Raise an error for bad responses
16
-
17
- # Check if the response contains audio data
18
- if response.headers['Content-Type'] == 'audio/wav':
19
- # Return the audio file as output to Gradio
20
- return (response.content, "audio/wav")
21
- else:
22
- # Return any text response
23
- return response.text
24
-
25
- except requests.exceptions.HTTPError as http_err:
26
- return f"HTTP error occurred: {http_err}" # For HTTP errors
27
- except Exception as err:
28
- return f"Other error occurred: {err}" # For other errors
29
-
30
- # Create a Gradio interface
31
- iface = gr.Interface(
32
- fn=test_fastapi_connection,
33
- inputs=gr.Audio(type="filepath"), # Audio file upload as filepath
34
- outputs=[gr.Audio(type="file"), gr.Textbox()], # Audio or Text response
35
- title="FastAPI Audio Processing", # Optional title
36
- description="Upload an audio file and receive processed audio or response text from FastAPI endpoint."
37
- )
38
-
39
- if __name__ == "__main__":
40
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import groq
3
+ import io
4
+ import numpy as np
5
+ import soundfile as sf
6
 
7
+ # Define the API key directly in the code
8
+ API_KEY = "your-api-key-here" # Replace this with your actual Groq API key
9
+
10
+ def transcribe_audio(audio):
11
+ if audio is None:
12
+ return ""
13
+
14
+ client = groq.Client(api_key=API_KEY)
15
+
16
+ # Convert audio to the format expected by the model
17
+ audio_data = audio[1] # Get the numpy array from the tuple
18
+ buffer = io.BytesIO()
19
+ sf.write(buffer, audio_data, audio[0], format='wav')
20
+ buffer.seek(0)
21
+
22
+ try:
23
+ # Use Distil-Whisper English powered by Groq for transcription
24
+ completion = client.audio.transcriptions.create(
25
+ model="distil-whisper-large-v3-en",
26
+ file=("audio.wav", buffer),
27
+ response_format="text"
28
+ )
29
+ return completion
30
+ except Exception as e:
31
+ return f"Error in transcription: {str(e)}"
32
+
33
+ def generate_response(transcription):
34
+ if not transcription:
35
+ return "No transcription available. Please try speaking again."
36
+
37
+ client = groq.Client(api_key=API_KEY)
38
+
39
+ try:
40
+ # Use Llama 3 70B powered by Groq for text generation
41
+ completion = client.chat.completions.create(
42
+ model="llama3-70b-8192",
43
+ messages=[{"role": "system", "content": "You are a helpful assistant."},
44
+ {"role": "user", "content": transcription}],
45
+ )
46
+ return completion.choices[0].message.content
47
+ except Exception as e:
48
+ return f"Error in response generation: {str(e)}"
49
+
50
+ def process_audio(audio):
51
+ transcription = transcribe_audio(audio)
52
+ response = generate_response(transcription)
53
+ return transcription, response
54
+
55
+ # Custom CSS for the Groq badge and color scheme (feel free to edit however you wish)
56
+ custom_css = """
57
+ .gradio-container {
58
+ background-color: #f5f5f5;
59
+ }
60
+ .gr-button-primary {
61
+ background-color: #f55036 !important;
62
+ border-color: #f55036 !important;
63
+ }
64
+ .gr-button-secondary {
65
+ color: #f55036 !important;
66
+ border-color: #f55036 !important;
67
+ }
68
+ #groq-badge {
69
+ position: fixed;
70
+ bottom: 20px;
71
+ right: 20px;
72
+ z-index: 1000;
73
+ }
74
+ """
75
+
76
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
77
+ gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
78
+
79
+ with gr.Row():
80
+ audio_input = gr.Audio(label="Speak!", type="numpy") # Audio input as numpy array
81
+
82
+ with gr.Row():
83
+ transcription_output = gr.Textbox(label="Transcription")
84
+ response_output = gr.Textbox(label="AI Assistant Response")
85
+
86
+ submit_button = gr.Button("Process", variant="primary")
87
+
88
+ # Add the Groq badge
89
+ gr.HTML("""
90
+ <div id="groq-badge">
91
+ <div style="color: #f55036; font-weight: bold;">POWERED BY LLAVA</div>
92
+ </div>
93
+ """)
94
+
95
+ submit_button.click(
96
+ process_audio,
97
+ inputs=[audio_input],
98
+ outputs=[transcription_output, response_output]
99
+ )
100
+
101
+ gr.Markdown("""
102
+ ## How to use this app:
103
+ 1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
104
+ 2. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
105
+ 3. The transcription and AI assistant response will appear in the respective text boxes.
106
+ """)
107
+
108
+ demo.launch()