Spaces:

gabrielchua
/

hey-gemma

Sleeping

App Files Files Community

Gabriel C commited on Mar 17, 2024

Commit

1003643

•

1 Parent(s): b57b0c3

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -50

app.py CHANGED Viewed

@@ -1,83 +1,62 @@
 import os
-import time
 import gradio as gr
 import numpy as np
-import soundfile as sf
 from groq import Groq
-from openai import OpenAI
 groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
-openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
-def transcribe(audio_path):
     """
-    Transcribe the audio segment using Whisper.
     """
-    with open(audio_path, 'rb') as audio_file:
-        transcription = openai_client.audio.transcriptions.create(
-            file=audio_file,
-            language="en",
-            model="whisper-1"
-        )
-    return transcription.text
-def autocomplete(text):
     """
     Autocomplete the text using Gemma.
     """
     if text != "":
         response = groq_client.chat.completions.create(
             model='gemma-7b-it',
-            messages=[{"role": "system", "content": "You are a friendly assistant."},
                       {"role": "user", "content": text}]
             )
         return response.choices[0].message.content
-def process_audio(input_audio):
     """
     Process the audio input by transcribing and completing the sentences.
     """
-    # Now you can use the audio_file_path with soundfile.read()
-    audio_data, sample_rate = sf.read(input_audio)
-    # Ensure mono audio
-    if len(audio_data.shape) > 1:
-        audio_data = np.mean(audio_data, axis=1)
-    transcription_list = []
-    for start in range(0, len(audio_data), sample_rate):
-        end = start + sample_rate
-        segment = audio_data[start:end]
-        # Temporarily saving each segment to a file (Whisper requires a file input)
-        segment_filename = f"/tmp/audio_segment_{start}.wav"
-        sf.write(segment_filename, segment, sample_rate)
-        # Transcribe the audio segment
-        transcription = transcribe(segment_filename)
-        transcription_list.append(transcription)
-    # # Send the transcription for completion
-    completion_result = autocomplete(transcription)
-    text = f"Qn: {transcription} \n \n Ans: {completion_result}"
-    return text
-# Define the Gradio interface
-interface = gr.Interface(
-    fn=process_audio,
-    inputs=gr.Audio(sources="microphone", streaming=True, type="filepath"),
-    outputs=gr.Markdown(),
     title="Dear Gemma",
-    description="Talk to the AI assistant. It completes your sentences in real time.",
     live=True,
     allow_flagging="never"
 )
-if __name__ == "__main__":
-    interface.launch()

 import os
 import gradio as gr
 import numpy as np
 from groq import Groq
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
+def transcribe(stream, new_chunk):
     """
+    Transcribes using whisper
     """
+    sr, y = new_chunk
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
+def autocomplete(text):
     """
     Autocomplete the text using Gemma.
     """
     if text != "":
         response = groq_client.chat.completions.create(
             model='gemma-7b-it',
+            messages=[{"role": "system", "content": "You are a friendly assistant named Gemma."},
                       {"role": "user", "content": text}]
             )
         return response.choices[0].message.content
+def process_audio(input_audio, new_chunk):
     """
     Process the audio input by transcribing and completing the sentences.
+    Accumulate results to return to Gradio interface.
     """
+    stream, transcription = transcribe(input_audio, new_chunk)
+    text = autocomplete(transcription)
+    print (transcription, text)
+    return stream, text
+demo = gr.Interface(
+    fn = process_audio,
+    inputs = ["state", gr.Audio(sources=["microphone"], streaming=True)],
+    outputs = ["state", gr.Markdown()],
     title="Dear Gemma",
+    description="Talk to the AI assistant.",
     live=True,
     allow_flagging="never"
 )
+demo.launch()