sasan commited on
Commit
338197e
·
1 Parent(s): de203b2

Refactor save_and_transcribe_audio function for better code organization

Browse files
Files changed (2) hide show
  1. kitt/core/stt.py +11 -5
  2. space.py +3 -3
kitt/core/stt.py CHANGED
@@ -22,12 +22,9 @@ def save_audio_as_wav(data, sample_rate, file_path):
22
  )
23
 
24
 
25
- def save_and_transcribe_audio(audio):
26
  sample_rate, data = audio
27
  try:
28
- # add timestamp to file name
29
- filename = f"recordings/audio{time.time()}.wav"
30
- save_audio_as_wav(data, sample_rate, filename)
31
  data = data.astype(np.float32)
32
  data /= np.max(np.abs(data))
33
  text = transcriber({"sampling_rate": sample_rate, "raw": data})["text"]
@@ -36,4 +33,13 @@ def save_and_transcribe_audio(audio):
36
  except Exception as e:
37
  logger.error(f"Error: {e}")
38
  raise Exception("Error transcribing audio.")
39
- return text
 
 
 
 
 
 
 
 
 
 
22
  )
23
 
24
 
25
+ def transcribe_audio(audio):
26
  sample_rate, data = audio
27
  try:
 
 
 
28
  data = data.astype(np.float32)
29
  data /= np.max(np.abs(data))
30
  text = transcriber({"sampling_rate": sample_rate, "raw": data})["text"]
 
33
  except Exception as e:
34
  logger.error(f"Error: {e}")
35
  raise Exception("Error transcribing audio.")
36
+ return text
37
+
38
+
39
+ def save_and_transcribe_audio(audio, save=True):
40
+ sample_rate, data = audio
41
+ # add timestamp to file name
42
+ filename = f"recordings/audio{time.time()}.wav"
43
+ if save:
44
+ save_audio_as_wav(data, sample_rate, filename)
45
+ return transcribe_audio(audio)
space.py CHANGED
@@ -9,7 +9,7 @@ from kitt.core import tts_gradio
9
  from kitt.core import utils as kitt_utils
10
  from kitt.core import voice_options
11
  from kitt.core.model import generate_function_call as process_query
12
- from kitt.core.stt import save_and_transcribe_audio
13
  from kitt.core.tts import prep_for_tts, run_melo_tts, run_tts_replicate
14
  from kitt.skills import (
15
  code_interpreter,
@@ -182,7 +182,7 @@ def update_vehicle_status(trip_progress, origin, destination, state):
182
 
183
 
184
  def save_and_transcribe_run_model(audio, voice_character, state):
185
- text = save_and_transcribe_audio(audio)
186
  out_text, out_voice, vehicle_status, state, update_proxy = run_model(
187
  text, voice_character, state
188
  )
@@ -452,7 +452,7 @@ def create_demo(tts_server: bool = False, model="llama3"):
452
  ],
453
  )
454
  input_audio_debug.stop_recording(
455
- fn=save_and_transcribe_audio,
456
  inputs=[input_audio_debug],
457
  outputs=[input_text_debug],
458
  )
 
9
  from kitt.core import utils as kitt_utils
10
  from kitt.core import voice_options
11
  from kitt.core.model import generate_function_call as process_query
12
+ from kitt.core.stt import transcribe_audio
13
  from kitt.core.tts import prep_for_tts, run_melo_tts, run_tts_replicate
14
  from kitt.skills import (
15
  code_interpreter,
 
182
 
183
 
184
  def save_and_transcribe_run_model(audio, voice_character, state):
185
+ text = transcribe_audio(audio)
186
  out_text, out_voice, vehicle_status, state, update_proxy = run_model(
187
  text, voice_character, state
188
  )
 
452
  ],
453
  )
454
  input_audio_debug.stop_recording(
455
+ fn=transcribe_audio,
456
  inputs=[input_audio_debug],
457
  outputs=[input_text_debug],
458
  )