Spaces:

gabrielchua
/

hey-gemma

Sleeping

App Files Files Community

Gabriel C commited on Mar 12, 2024

Commit

1324088

•

1 Parent(s): 21285af

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import time
+import gradio as gr
+import numpy as np
+import soundfile as sf
+from groq import Groq
+from openai import OpenAI
+groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
+openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+def transcribe(audio_path):
+    """
+    Transcribe the audio segment using Whisper.
+    """
+    with open(audio_path, 'rb') as audio_file:
+        transcription = openai_client.audio.transcriptions.create(
+            file=audio_file,
+            language="en",
+            model="whisper-1"
+        )
+    return transcription.text
+def autocomplete(text):
+    """
+    Autocomplete the text using Gemma.
+    """
+    if text != "":
+        response = groq_client.chat.completions.create(
+            model='gemma-7b-it',
+            messages=[{"role": "system", "content": "You are a friendly assistant."},
+                      {"role": "user", "content": text}]
+            )
+        return response.choices[0].message.content
+def process_audio(input_audio):
+    """
+    Process the audio input by transcribing and completing the sentences.
+    """
+    # Now you can use the audio_file_path with soundfile.read()
+    audio_data, sample_rate = sf.read(input_audio)
+    # Ensure mono audio
+    if len(audio_data.shape) > 1:
+        audio_data = np.mean(audio_data, axis=1)
+    transcription_list = []
+    for start in range(0, len(audio_data), sample_rate):
+        end = start + sample_rate
+        segment = audio_data[start:end]
+        # Temporarily saving each segment to a file (Whisper requires a file input)
+        segment_filename = f"/tmp/audio_segment_{start}.wav"
+        sf.write(segment_filename, segment, sample_rate)
+        # Transcribe the audio segment
+        transcription = transcribe(segment_filename)
+        transcription_list.append(transcription)
+    # # Send the transcription for completion
+    completion_result = autocomplete(transcription)
+    text = f"Qn: {transcription} \n \n Ans: {completion_result}"
+    return text
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(sources="microphone", streaming=True, type="filepath"),
+    outputs=gr.Markdown(),
+    title="Dear Gemma",
+    description="Talk to the AI assistant. It completes your sentences in real time.",
+    live=True,
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    interface.launch()