IliaLarchenko commited on
Commit
3447ff0
1 Parent(s): 3a90dc2

Added streaming to audio output

Browse files
Files changed (2) hide show
  1. api/audio.py +37 -22
  2. app.py +2 -4
api/audio.py CHANGED
@@ -1,4 +1,5 @@
1
  import io
 
2
  import wave
3
 
4
  import requests
@@ -59,27 +60,41 @@ class TTSManager:
59
  def __init__(self, config):
60
  self.config = config
61
 
62
- def text_to_speech(self, text):
63
- try:
64
- if self.config.tts.type == "OPENAI_API":
65
- client = OpenAI(base_url=self.config.tts.url, api_key=self.config.tts.key)
66
- response = client.audio.speech.create(model=self.config.tts.name, voice="alloy", response_format="opus", input=text)
67
- elif self.config.tts.type == "HF_API":
68
- headers = {"Authorization": "Bearer " + self.config.tts.key}
69
- response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
70
- if response.status_code != 200:
71
- error_details = response.json().get("error", "No error message provided")
72
- raise APIError("TTS Error: HF API error", status_code=response.status_code, details=error_details)
73
- except APIError as e:
74
- raise
75
- except Exception as e:
76
- raise APIError(f"TTS Error: Unexpected error: {e}")
77
-
78
- return response.content
79
-
80
  def read_last_message(self, chat_display):
81
  if chat_display:
82
- last_message = chat_display[-1][1]
83
- if last_message is not None:
84
- return self.text_to_speech(last_message)
85
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import io
2
+ import os
3
  import wave
4
 
5
  import requests
 
60
  def __init__(self, config):
61
  self.config = config
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def read_last_message(self, chat_display):
64
  if chat_display:
65
+ text = chat_display[-1][1]
66
+
67
+ headers = {"Authorization": "Bearer " + self.config.tts.key}
68
+ try:
69
+ if self.config.tts.type == "OPENAI_API":
70
+ data = {"model": self.config.tts.name, "input": text, "voice": "alloy", "response_format": "opus"}
71
+
72
+ if os.environ.get("STREAMING", False):
73
+ with requests.post(self.config.tts.url, headers=headers, json=data, stream=True) as response:
74
+ if response.status_code != 200:
75
+ error_details = response.json().get("error", "No error message provided")
76
+ raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
77
+ else:
78
+ yield from response.iter_content(chunk_size=1024)
79
+ else:
80
+ response = requests.post(self.config.tts.url, headers=headers, json=data)
81
+ if response.status_code != 200:
82
+ error_details = response.json().get("error", "No error message provided")
83
+ raise APIError("TTS Error: OPENAI API error", status_code=response.status_code, details=error_details)
84
+ return response.content
85
+ elif self.config.tts.type == "HF_API":
86
+ if os.environ.get("STREAMING", False):
87
+ raise APIError("Streaming not supported for HF API TTS")
88
+ else:
89
+ response = requests.post(self.config.tts.url, headers=headers, json={"inputs": text})
90
+ if response.status_code != 200:
91
+ error_details = response.json().get("error", "No error message provided")
92
+ raise APIError("TTS Error: HF API error", status_code=response.status_code, details=error_details)
93
+ return response.content
94
+
95
+ except APIError as e:
96
+ raise
97
+ except Exception as e:
98
+ raise APIError(f"TTS Error: Unexpected error: {e}")
99
+ else:
100
+ return None
app.py CHANGED
@@ -53,7 +53,7 @@ with gr.Blocks(title="AI Interviewer") as demo:
53
  gr.Markdown(instruction["demo"])
54
 
55
  started_coding = gr.State(False)
56
- audio_output = gr.Audio(label="Play audio", autoplay=True, visible=False, interactive=False)
57
  with gr.Tab("Instruction") as instruction_tab:
58
  with gr.Row():
59
  with gr.Column(scale=2):
@@ -167,8 +167,6 @@ with gr.Blocks(title="AI Interviewer") as demo:
167
  outputs=[chat_history, chat, message, previous_code],
168
  )
169
 
170
- chat.change(fn=tts.read_last_message, inputs=[chat], outputs=[audio_output], trigger_mode="once")
171
-
172
- # audio_output.stop(fn=lambda: None, inputs=None, outputs=[audio_output])
173
 
174
  demo.launch(show_api=False)
 
53
  gr.Markdown(instruction["demo"])
54
 
55
  started_coding = gr.State(False)
56
+ audio_output = gr.Audio(label="Play audio", autoplay=True, visible=False, streaming=os.environ.get("STREAMING", False))
57
  with gr.Tab("Instruction") as instruction_tab:
58
  with gr.Row():
59
  with gr.Column(scale=2):
 
167
  outputs=[chat_history, chat, message, previous_code],
168
  )
169
 
170
+ chat.change(fn=tts.read_last_message, inputs=[chat], outputs=[audio_output])
 
 
171
 
172
  demo.launch(show_api=False)