llama-3.2-3b-voice-webrtc

Running

App Files Files Community

freddyaboulton HF staff commited on Nov 1, 2024

Commit

2558f9d

1 Parent(s): 2d5961d

edit

Browse files

Files changed (2) hide show

app.py +10 -63
requirements.txt +1 -3

app.py CHANGED Viewed

@@ -1,64 +1,18 @@
 import gradio as gr
 import numpy as np
 import io
 from pydub import AudioSegment
-import tempfile
 import openai
 import time
-from dataclasses import dataclass, field
-from threading import Lock
 import base64
-@dataclass
-class AppState:
-    stream: np.ndarray | None = None
-    sampling_rate: int = 0
-    pause_detected: bool = False
-    conversation: list = field(default_factory=list)
-    client: openai.OpenAI = None
-    output_format: str = "mp3"
-    stopped: bool = False
-# Global lock for thread safety
-state_lock = Lock()
 def create_client(api_key):
     return openai.OpenAI(
         base_url="https://llama3-1-8b.lepton.run/api/v1/",
         api_key=api_key
     )
-def determine_pause(audio, sampling_rate, state):
-    # Take the last 1 second of audio
-    pause_length = int(sampling_rate * 1)  # 1 second
-    if len(audio) < pause_length:
-        return False
-    last_audio = audio[-pause_length:]
-    amplitude = np.abs(last_audio)
-    # Calculate the average amplitude in the last 1 second
-    avg_amplitude = np.mean(amplitude)
-    silence_threshold = 0.01  # Adjust this threshold as needed
-    if avg_amplitude < silence_threshold:
-        return True
-    else:
-        return False
-def process_audio(audio: tuple, state: AppState):
-    if state.stream is None:
-        state.stream = audio[1]
-        state.sampling_rate = audio[0]
-    else:
-        state.stream = np.concatenate((state.stream, audio[1]))
-    pause_detected = determine_pause(state.stream, state.sampling_rate, state)
-    state.pause_detected = pause_detected
-    if state.pause_detected:
-        # Stop recording
-        return gr.update(recording=False), state
-    else:
-        return None, state
 def update_or_append_conversation(conversation, id, role, content):
     # Find if there's an existing message with the given id
@@ -69,7 +23,8 @@ def update_or_append_conversation(conversation, id, role, content):
     # If not found, append a new message
     conversation.append({"id": id, "role": role, "content": content})
-def generate_response_and_audio(audio_bytes: bytes, state: AppState):
     if state.client is None:
         raise gr.Error("Please enter a valid API key first.")
@@ -123,19 +78,16 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
-def response(state: AppState):
-    if not state.pause_detected:
-        return gr.update(), gr.update(), state
-    if state.stream is None or len(state.stream) == 0:
-        return gr.update(), gr.update(), state
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
-        state.stream.tobytes(),
-        frame_rate=state.sampling_rate,
-        sample_width=state.stream.dtype.itemsize,
-        channels=(1 if len(state.stream.shape) == 1 else state.stream.shape[1]),
     )
     segment.export(audio_buffer, format="wav")
@@ -167,11 +119,6 @@ def start_recording_user(state: AppState):
     else:
         return gr.update(recording=False)
-def set_api_key(api_key, state):
-    if not api_key:
-        raise gr.Error("Please enter a valid API key.")
-    state.client = create_client(api_key)
-    return "API key set successfully!", state
 def update_format(format, state):
     state.output_format = format

 import gradio as gr
+from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
 import numpy as np
 import io
 from pydub import AudioSegment
 import openai
 import time
 import base64
 def create_client(api_key):
     return openai.OpenAI(
         base_url="https://llama3-1-8b.lepton.run/api/v1/",
         api_key=api_key
     )
 def update_or_append_conversation(conversation, id, role, content):
     # Find if there's an existing message with the given id
     # If not found, append a new message
     conversation.append({"id": id, "role": role, "content": content})
+def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[str], client: OpenAI, output_format):
     if state.client is None:
         raise gr.Error("Please enter a valid API key first.")
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
+def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
+             gradio_conversation: list[dict], client: OpenAI, output_format: str):
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
+        audio[1].tobytes(),
+        frame_rate=audio[0],
+        sample_width=audio[1].dtype.itemsize,
+        channels=1,
     )
     segment.export(audio_buffer, format="wav")
     else:
         return gr.update(recording=False)
 def update_format(format, state):
     state.output_format = format

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
-https://gradio-builds.s3.amazonaws.com/cffe9a7ab7f71e76d7214dc57c6278ffaf5bcdf9/gradio-5.0.0b1-py3-none-any.whl
-numpy
-pydub
 openai


1	+ gradio_webrtc[vad]==0.0.11


2	openai