freddyaboulton HF staff commited on
Commit
6072a57
Β·
verified Β·
1 Parent(s): 606dee0

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +7 -52
app.py CHANGED
@@ -1,66 +1,21 @@
1
- import os
2
- import time
3
-
4
  import numpy as np
5
- from dotenv import load_dotenv
6
  from fastapi import FastAPI
7
  from fastapi.responses import RedirectResponse
8
- from fastrtc import (
9
- ReplyOnPause,
10
- Stream,
11
- get_stt_model,
12
- get_tts_model,
13
- )
14
  from gradio.utils import get_space
15
- from numpy.typing import NDArray
16
- from openai import OpenAI
17
-
18
- load_dotenv()
19
-
20
- sambanova_client = OpenAI(
21
- api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1"
22
- )
23
-
24
- stt_model = get_stt_model()
25
- tts_model = get_tts_model()
26
 
27
- chat_history = [
28
- {
29
- "role": "system",
30
- "content": (
31
- "You are a helpful assistant having a spoken conversation."
32
- "Please keep your answers short and concise."
33
- ),
34
- }
35
- ]
36
 
37
-
38
- def echo(audio: tuple[int, NDArray[np.int16]]):
39
- prompt = stt_model.stt(audio)
40
- print("prompt", prompt)
41
- chat_history.append({"role": "user", "content": prompt})
42
- start_time = time.time()
43
- response = sambanova_client.chat.completions.create(
44
- model="Meta-Llama-3.2-3B-Instruct",
45
- messages=chat_history,
46
- max_tokens=200,
47
- )
48
- end_time = time.time()
49
- print("time taken inference", end_time - start_time)
50
- prompt = response.choices[0].message.content
51
- chat_history.append({"role": "assistant", "content": prompt})
52
- start_time = time.time()
53
- for audio_chunk in tts_model.stream_tts_sync(prompt):
54
- yield audio_chunk
55
- end_time = time.time()
56
- print("time taken tts", end_time - start_time)
57
 
58
 
59
  stream = Stream(
60
- handler=ReplyOnPause(echo),
61
  modality="audio",
62
  mode="send-receive",
63
- rtc_configuration=None, # get_twilio_turn_credentials() if get_space() else None,
64
  concurrency_limit=20 if get_space() else None,
65
  )
66
 
 
 
 
 
1
  import numpy as np
 
2
  from fastapi import FastAPI
3
  from fastapi.responses import RedirectResponse
4
+ from fastrtc import ReplyOnPause, Stream, get_twilio_turn_credentials
 
 
 
 
 
5
  from gradio.utils import get_space
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
 
7
 
8
+ def detection(audio: tuple[int, np.ndarray]):
9
+ # Implement any iterator that yields audio
10
+ # See "LLM Voice Chat" for a more complete example
11
+ yield audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  stream = Stream(
15
+ handler=ReplyOnPause(detection),
16
  modality="audio",
17
  mode="send-receive",
18
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
19
  concurrency_limit=20 if get_space() else None,
20
  )
21