Spaces:
Runtime error
Runtime error
Commit
·
fce2699
1
Parent(s):
32f0c73
add speech to text
Browse files
app.py
CHANGED
@@ -318,7 +318,29 @@ def text_to_speech_2(text):
|
|
318 |
except requests.exceptions.RequestException as e:
|
319 |
print(f"Error: {e}")
|
320 |
return None
|
|
|
|
|
|
|
|
|
321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
Text2Sound_tool = Tool(
|
323 |
name = "Text_To_Sound_REST_API",
|
324 |
# func = Text2Sound,
|
@@ -958,6 +980,7 @@ with gr.Blocks() as demo:
|
|
958 |
upload_button.upload(func_upload_file, [upload_button, chatbot], chatbot)
|
959 |
agentchoice.change(SetAgent, agentchoice, None)
|
960 |
frash_logs.click(read_logs, None, logs)
|
|
|
961 |
# voice_output.end(ClearAudio, None, voice_output)
|
962 |
# def clear_voice():
|
963 |
# print("clear audio ...")
|
|
|
318 |
except requests.exceptions.RequestException as e:
|
319 |
print(f"Error: {e}")
|
320 |
return None
|
321 |
+
|
322 |
+
def speech_to_text(Filename_Audio_input_single):
|
323 |
+
print("Start speech to text ....")
|
324 |
+
access_token = get_azure_access_token()
|
325 |
|
326 |
+
if not access_token:
|
327 |
+
return None
|
328 |
+
|
329 |
+
try:
|
330 |
+
endpoint = f"https://eastus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US"
|
331 |
+
headers={
|
332 |
+
"Authorization": f"Bearer {access_token}",
|
333 |
+
"Content-Type": "audio/wav",}
|
334 |
+
|
335 |
+
response = requests.post(endpoint, headers=headers, data=open(Filename_Audio_input_single, "rb"))
|
336 |
+
print("Speech to Text: ", response.text)
|
337 |
+
text_from_audio = response.txt
|
338 |
+
return text_from_audio
|
339 |
+
except requests.exceptions.RequestException as e:
|
340 |
+
print(f"Error speech_to_text: {e}")
|
341 |
+
return None
|
342 |
+
|
343 |
+
|
344 |
Text2Sound_tool = Tool(
|
345 |
name = "Text_To_Sound_REST_API",
|
346 |
# func = Text2Sound,
|
|
|
980 |
upload_button.upload(func_upload_file, [upload_button, chatbot], chatbot)
|
981 |
agentchoice.change(SetAgent, agentchoice, None)
|
982 |
frash_logs.click(read_logs, None, logs)
|
983 |
+
voice_input.stop_recording(speech_to_text, voice_input, None)
|
984 |
# voice_output.end(ClearAudio, None, voice_output)
|
985 |
# def clear_voice():
|
986 |
# print("clear audio ...")
|