not-lain commited on
Commit
25734b6
1 Parent(s): 8b809c0

implemented the audio function

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -228,13 +228,15 @@ def process_and_query(text, image,audio):
228
  text = process_image_with_openai(image)
229
 
230
  if audio is not None:
231
- audio = audio[0].numpy()
232
- audio = audio.astype(np.float32)
233
- audio = audio / np.max(np.abs(audio))
234
- audio = audio * 32768
235
- audio = audio.astype(np.int16)
236
- audio = audio.tobytes()
237
- audio = base64.b64encode(audio).decode('utf-8')
 
 
238
 
239
  # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
240
  vectara_response_json = query_vectara(text)
 
228
  text = process_image_with_openai(image)
229
 
230
  if audio is not None:
231
+ # audio = audio[0].numpy()
232
+ # audio = audio.astype(np.float32)
233
+ # audio = audio / np.max(np.abs(audio))
234
+ # audio = audio * 32768
235
+ # audio = audio.astype(np.int16)
236
+ # audio = audio.tobytes()
237
+ # audio = base64.b64encode(audio).decode('utf-8')
238
+ text = process_speech(audio)
239
+ return text
240
 
241
  # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
242
  vectara_response_json = query_vectara(text)