Spaces:

Satyam-Singh
/

IOT-Assistant

Sleeping

App Files Files Community

Satyam-Singh commited on Nov 7, 2024

Commit

b9f656e

verified ·

1 Parent(s): 13808b8

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -38

app.py CHANGED Viewed

@@ -1,40 +1,108 @@
-import requests
 import gradio as gr
-def test_fastapi_connection(audio_file_path):
-    # Define the URL of your FastAPI endpoint
-    url = "https://llava-endpoint.fra1.zeabur.app/process-audio/"  # Your Render endpoint
-    # Prepare the file to be sent
-    with open(audio_file_path, "rb") as file:
-        files = {"file": file}
-        try:
-            # Send a POST request with the audio file
-            response = requests.post(url, files=files)
-            response.raise_for_status()  # Raise an error for bad responses
-            # Check if the response contains audio data
-            if response.headers['Content-Type'] == 'audio/wav':
-                # Return the audio file as output to Gradio
-                return (response.content, "audio/wav")
-            else:
-                # Return any text response
-                return response.text
-        except requests.exceptions.HTTPError as http_err:
-            return f"HTTP error occurred: {http_err}"  # For HTTP errors
-        except Exception as err:
-            return f"Other error occurred: {err}"  # For other errors
-# Create a Gradio interface
-iface = gr.Interface(
-    fn=test_fastapi_connection,
-    inputs=gr.Audio(type="filepath"),  # Audio file upload as filepath
-    outputs=[gr.Audio(type="file"), gr.Textbox()],  # Audio or Text response
-    title="FastAPI Audio Processing",  # Optional title
-    description="Upload an audio file and receive processed audio or response text from FastAPI endpoint."
-)
-if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+import groq
+import io
+import numpy as np
+import soundfile as sf
+# Define the API key directly in the code
+API_KEY = "your-api-key-here"  # Replace this with your actual Groq API key
+def transcribe_audio(audio):
+    if audio is None:
+        return ""
+    client = groq.Client(api_key=API_KEY)
+    # Convert audio to the format expected by the model
+    audio_data = audio[1]  # Get the numpy array from the tuple
+    buffer = io.BytesIO()
+    sf.write(buffer, audio_data, audio[0], format='wav')
+    buffer.seek(0)
+    try:
+        # Use Distil-Whisper English powered by Groq for transcription
+        completion = client.audio.transcriptions.create(
+            model="distil-whisper-large-v3-en",
+            file=("audio.wav", buffer),
+            response_format="text"
+        )
+        return completion
+    except Exception as e:
+        return f"Error in transcription: {str(e)}"
+def generate_response(transcription):
+    if not transcription:
+        return "No transcription available. Please try speaking again."
+    client = groq.Client(api_key=API_KEY)
+    try:
+        # Use Llama 3 70B powered by Groq for text generation
+        completion = client.chat.completions.create(
+            model="llama3-70b-8192",
+            messages=[{"role": "system", "content": "You are a helpful assistant."},
+                      {"role": "user", "content": transcription}],
+        )
+        return completion.choices[0].message.content
+    except Exception as e:
+        return f"Error in response generation: {str(e)}"
+def process_audio(audio):
+    transcription = transcribe_audio(audio)
+    response = generate_response(transcription)
+    return transcription, response
+# Custom CSS for the Groq badge and color scheme (feel free to edit however you wish)
+custom_css = """
+.gradio-container {
+    background-color: #f5f5f5;
+}
+.gr-button-primary {
+    background-color: #f55036 !important;
+    border-color: #f55036 !important;
+}
+.gr-button-secondary {
+    color: #f55036 !important;
+    border-color: #f55036 !important;
+}
+#groq-badge {
+    position: fixed;
+    bottom: 20px;
+    right: 20px;
+    z-index: 1000;
+}
+"""
+with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown("# 🎙️ LLAVA Voice-Powered AI Assistant")
+    with gr.Row():
+        audio_input = gr.Audio(label="Speak!", type="numpy")  # Audio input as numpy array
+    with gr.Row():
+        transcription_output = gr.Textbox(label="Transcription")
+        response_output = gr.Textbox(label="AI Assistant Response")
+    submit_button = gr.Button("Process", variant="primary")
+    # Add the Groq badge
+    gr.HTML("""
+    <div id="groq-badge">
+        <div style="color: #f55036; font-weight: bold;">POWERED BY LLAVA</div>
+    </div>
+    """)
+    submit_button.click(
+        process_audio,
+        inputs=[audio_input],
+        outputs=[transcription_output, response_output]
+    )
+    gr.Markdown("""
+    ## How to use this app:
+    1. Click on the microphone icon and speak your message (or upload an audio file). Supported audio files include mp3, mp4, mpeg, mpga, m4a, wav, and webm file types.
+    2. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
+    3. The transcription and AI assistant response will appear in the respective text boxes.
+    """)
+demo.launch()