OuroborosM commited on
Commit
fce2699
·
1 Parent(s): 32f0c73

add speech to text

Browse files
Files changed (1) hide show
  1. app.py +23 -0
app.py CHANGED
@@ -318,7 +318,29 @@ def text_to_speech_2(text):
318
  except requests.exceptions.RequestException as e:
319
  print(f"Error: {e}")
320
  return None
 
 
 
 
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  Text2Sound_tool = Tool(
323
  name = "Text_To_Sound_REST_API",
324
  # func = Text2Sound,
@@ -958,6 +980,7 @@ with gr.Blocks() as demo:
958
  upload_button.upload(func_upload_file, [upload_button, chatbot], chatbot)
959
  agentchoice.change(SetAgent, agentchoice, None)
960
  frash_logs.click(read_logs, None, logs)
 
961
  # voice_output.end(ClearAudio, None, voice_output)
962
  # def clear_voice():
963
  # print("clear audio ...")
 
318
  except requests.exceptions.RequestException as e:
319
  print(f"Error: {e}")
320
  return None
321
+
322
+ def speech_to_text(Filename_Audio_input_single):
323
+ print("Start speech to text ....")
324
+ access_token = get_azure_access_token()
325
 
326
+ if not access_token:
327
+ return None
328
+
329
+ try:
330
+ endpoint = f"https://eastus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US"
331
+ headers={
332
+ "Authorization": f"Bearer {access_token}",
333
+ "Content-Type": "audio/wav",}
334
+
335
+ response = requests.post(endpoint, headers=headers, data=open(Filename_Audio_input_single, "rb"))
336
+ print("Speech to Text: ", response.text)
337
+ text_from_audio = response.txt
338
+ return text_from_audio
339
+ except requests.exceptions.RequestException as e:
340
+ print(f"Error speech_to_text: {e}")
341
+ return None
342
+
343
+
344
  Text2Sound_tool = Tool(
345
  name = "Text_To_Sound_REST_API",
346
  # func = Text2Sound,
 
980
  upload_button.upload(func_upload_file, [upload_button, chatbot], chatbot)
981
  agentchoice.change(SetAgent, agentchoice, None)
982
  frash_logs.click(read_logs, None, logs)
983
+ voice_input.stop_recording(speech_to_text, voice_input, None)
984
  # voice_output.end(ClearAudio, None, voice_output)
985
  # def clear_voice():
986
  # print("clear audio ...")