freddyaboulton HF staff commited on
Commit
4a472df
·
verified ·
1 Parent(s): 6d6fdc5

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +8 -5
  2. app.py +113 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,12 +1,15 @@
1
  ---
2
- title: Llm Voice Chat
3
- emoji: 🔥
4
  colorFrom: purple
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.16.2
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: LLM Voice Chat
3
+ emoji: 💻
4
  colorFrom: purple
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.16.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Talk to an LLM with ElevenLabs
12
+ tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY]
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastrtc import (
2
+ ReplyOnPause,
3
+ AdditionalOutputs,
4
+ Stream,
5
+ aggregate_bytes_to_16bit,
6
+ get_twilio_turn_credentials,
7
+ WebRTCError,
8
+ stt,
9
+ audio_to_bytes,
10
+ )
11
+ import numpy as np
12
+ import gradio as gr
13
+ from gradio.utils import get_space
14
+ from groq import Groq
15
+ from elevenlabs import ElevenLabs
16
+ from dotenv import load_dotenv
17
+ import time
18
+ import os
19
+ from fastapi import FastAPI
20
+
21
+ load_dotenv()
22
+ groq_client = Groq()
23
+ tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
24
+
25
+
26
+ # See "Talk to Claude" in Cookbook for an example of how to keep
27
+ # track of the chat history.
28
+ def response(
29
+ audio: tuple[int, np.ndarray],
30
+ chatbot: list[dict] | None = None,
31
+ ):
32
+ try:
33
+ chatbot = chatbot or []
34
+ messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
35
+ start = time.time()
36
+ # text = stt(audio)
37
+ text = groq_client.audio.transcriptions.create(
38
+ file=("audio-file.mp3", audio_to_bytes(audio)),
39
+ model="whisper-large-v3-turbo",
40
+ response_format="verbose_json",
41
+ ).text
42
+ print("transcription", time.time() - start)
43
+ print("prompt", text)
44
+ chatbot.append({"role": "user", "content": text})
45
+ yield AdditionalOutputs(chatbot)
46
+ messages.append({"role": "user", "content": text})
47
+ response_text = (
48
+ groq_client.chat.completions.create(
49
+ model="llama-3.1-8b-instant",
50
+ max_tokens=512,
51
+ messages=messages, # type: ignore
52
+ )
53
+ .choices[0]
54
+ .message.content
55
+ )
56
+
57
+ chatbot.append({"role": "assistant", "content": response_text})
58
+
59
+ iterator = tts_client.text_to_speech.convert_as_stream(
60
+ text=response_text, # type: ignore
61
+ voice_id="JBFqnCBsd6RMkjVDRZzb",
62
+ model_id="eleven_multilingual_v2",
63
+ output_format="pcm_24000",
64
+ )
65
+ for chunk in aggregate_bytes_to_16bit(iterator):
66
+ audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
67
+ yield (24000, audio_array)
68
+ yield AdditionalOutputs(chatbot)
69
+ except Exception as e:
70
+ import traceback
71
+
72
+ traceback.print_exc()
73
+ raise WebRTCError(traceback.format_exc())
74
+
75
+
76
+ chatbot = gr.Chatbot(type="messages")
77
+ stream = Stream(
78
+ modality="audio",
79
+ mode="send-receive",
80
+ handler=ReplyOnPause(response, input_sample_rate=16000),
81
+ additional_outputs_handler=lambda a, b: b,
82
+ additional_inputs=[chatbot],
83
+ additional_outputs=[chatbot],
84
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
85
+ concurrency_limit=20 if get_space() else None,
86
+ )
87
+ for id, block in stream.ui.blocks.items():
88
+ if isinstance(block, gr.HTML):
89
+ stream.ui.blocks[id] = gr.HTML(
90
+ """
91
+ <h1 style='text-align: center'>
92
+ LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)
93
+ </h1>
94
+ """
95
+ )
96
+
97
+ # Mount the STREAM UI to the FastAPI app
98
+ # Because I don't want to build the UI manually
99
+ app = FastAPI()
100
+ gr.mount_gradio_app(app, stream.ui, path="/")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ import os
105
+
106
+ if (mode := os.getenv("MODE")) == "UI":
107
+ stream.ui.launch(server_port=7860)
108
+ elif mode == "PHONE":
109
+ stream.fastphone(host="0.0.0.0", port=7860)
110
+ else:
111
+ import uvicorn
112
+
113
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastrtc[stopword]
2
+ python-dotenv
3
+ openai
4
+ twilio
5
+ groq
6
+ elevenlabs