IliaLarchenko commited on
Commit
ac13632
β€’
1 Parent(s): 8138173

STT refactoring

Browse files
Files changed (2) hide show
  1. api/audio.py +15 -1
  2. app.py +4 -8
api/audio.py CHANGED
@@ -2,6 +2,7 @@ import io
2
  import os
3
  import wave
4
 
 
5
  import requests
6
 
7
  from openai import OpenAI
@@ -30,8 +31,10 @@ class STTManager:
30
  def __init__(self, config):
31
  self.config = config
32
  self.streaming = os.getenv("STREAMING", False)
 
 
33
 
34
- def speech_to_text(self, audio, chat_display):
35
  audio = numpy_audio_to_bytes(audio[1])
36
  try:
37
  if self.config.stt.type == "OPENAI_API":
@@ -52,6 +55,17 @@ class STTManager:
52
  except Exception as e:
53
  raise APIError(f"STT Error: Unexpected error: {e}")
54
 
 
 
 
 
 
 
 
 
 
 
 
55
  chat_display.append([transcription, None])
56
  return chat_display
57
 
 
2
  import os
3
  import wave
4
 
5
+ import numpy as np
6
  import requests
7
 
8
  from openai import OpenAI
 
31
  def __init__(self, config):
32
  self.config = config
33
  self.streaming = os.getenv("STREAMING", False)
34
+ self.status = self.test_stt()
35
+ self.streaming = False
36
 
37
+ def speech_to_text(self, audio):
38
  audio = numpy_audio_to_bytes(audio[1])
39
  try:
40
  if self.config.stt.type == "OPENAI_API":
 
55
  except Exception as e:
56
  raise APIError(f"STT Error: Unexpected error: {e}")
57
 
58
+ return transcription
59
+
60
+ def test_stt(self):
61
+ try:
62
+ self.speech_to_text((48000, np.zeros(10000)))
63
+ return True
64
+ except:
65
+ return False
66
+
67
+ def add_user_message(self, audio, chat_display):
68
+ transcription = self.speech_to_text(audio)
69
  chat_display.append([transcription, None])
70
  return chat_display
71
 
app.py CHANGED
@@ -8,7 +8,7 @@ from config import config
8
  from docs.instruction import instruction
9
  from resources.data import fixed_messages, topics_list
10
  from resources.prompts import prompts
11
- from utils.ui import add_candidate_message, add_interviewer_message
12
 
13
  llm = LLMManager(config, prompts)
14
  tts = TTSManager(config)
@@ -72,11 +72,8 @@ with gr.Blocks(title="AI Interviewer") as demo:
72
  tts_status = get_status_color(tts)
73
  gr.Markdown(f"TTS status: {tts_status}{space}{config.tts.name}")
74
 
75
- try:
76
- text_test = stt.speech_to_text(audio_test, False)
77
- gr.Markdown(f"STT status: 🟒{space} {config.stt.name}")
78
- except:
79
- gr.Markdown(f"STT status: πŸ”΄{space} {config.stt.name}")
80
 
81
  llm_status = get_status_color(llm)
82
  gr.Markdown(f"LLM status: {llm_status}{space}{config.llm.name}")
@@ -134,7 +131,6 @@ with gr.Blocks(title="AI Interviewer") as demo:
134
  end_btn = gr.Button("Finish the interview", interactive=False)
135
  chat = gr.Chatbot(label="Chat", show_label=False, show_share_button=False)
136
  audio_input = gr.Audio(interactive=False, **default_audio_params)
137
- # message = gr.Textbox(label="Message", lines=3, visible=False)
138
 
139
  with gr.Accordion("Feedback", open=True) as feedback_acc:
140
  feedback = gr.Markdown()
@@ -167,7 +163,7 @@ with gr.Blocks(title="AI Interviewer") as demo:
167
  fn=llm.end_interview, inputs=[description, chat_history], outputs=[feedback]
168
  )
169
 
170
- audio_input.stop_recording(fn=stt.speech_to_text, inputs=[audio_input, chat], outputs=[chat]).then(
171
  fn=lambda: None, outputs=[audio_input]
172
  ).then(
173
  fn=llm.send_request,
 
8
  from docs.instruction import instruction
9
  from resources.data import fixed_messages, topics_list
10
  from resources.prompts import prompts
11
+ from utils.ui import add_interviewer_message
12
 
13
  llm = LLMManager(config, prompts)
14
  tts = TTSManager(config)
 
72
  tts_status = get_status_color(tts)
73
  gr.Markdown(f"TTS status: {tts_status}{space}{config.tts.name}")
74
 
75
+ stt_status = get_status_color(stt)
76
+ gr.Markdown(f"STT status: {stt_status}{space}{config.stt.name}")
 
 
 
77
 
78
  llm_status = get_status_color(llm)
79
  gr.Markdown(f"LLM status: {llm_status}{space}{config.llm.name}")
 
131
  end_btn = gr.Button("Finish the interview", interactive=False)
132
  chat = gr.Chatbot(label="Chat", show_label=False, show_share_button=False)
133
  audio_input = gr.Audio(interactive=False, **default_audio_params)
 
134
 
135
  with gr.Accordion("Feedback", open=True) as feedback_acc:
136
  feedback = gr.Markdown()
 
163
  fn=llm.end_interview, inputs=[description, chat_history], outputs=[feedback]
164
  )
165
 
166
+ audio_input.stop_recording(fn=stt.add_user_message, inputs=[audio_input, chat], outputs=[chat]).then(
167
  fn=lambda: None, outputs=[audio_input]
168
  ).then(
169
  fn=llm.send_request,