youngtsai commited on
Commit
635f86b
·
1 Parent(s): dc702e1

def detect_language(text):

Browse files
Files changed (1) hide show
  1. app.py +58 -43
app.py CHANGED
@@ -3,7 +3,8 @@ import json
3
  import os
4
  import openai
5
  import re
6
- import azure.cognitiveservices.speech as speechsdk
 
7
 
8
 
9
 
@@ -119,55 +120,69 @@ def main_function(password: str, theme: str, language: str, method: str, rounds:
119
 
120
  return chatbot_dialogue, audio_path, file_name
121
 
 
 
 
 
 
 
 
 
 
 
122
  def dialogue_to_audio(dialogue, role1_gender, role2_gender):
123
- # Configure Azure Speech Service
124
- speech_config = speechsdk.SpeechConfig(subscription=AZURE_API_KEY, region=AZURE_REGION)
125
- filename="dialogue_output.wav"
126
- audio_config = speechsdk.audio.AudioOutputConfig(filename=filename)
127
- speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
128
-
129
- # Map genders to Azure TTS voices (This is for demonstration purposes; you may wish to have more sophisticated voice mapping.)
130
- voice_map = {
131
- "male": {
132
- "中文": "zh-CN-HanHanNeural",
133
- "英文": "en-US-GuyNeural"
 
 
 
 
 
 
 
134
  },
135
- "female": {
136
- "中文": "zh-CN-XiaoxiaoNeural",
137
- "英文": "en-US-JessaNeural"
138
  }
139
  }
140
-
141
- # Convert dialogue list to text
142
- dialogue_text = ""
143
- for entry in dialogue:
144
- role = entry["role"]
145
- content = entry["content"]
146
-
147
- # Set voice based on role and language
148
- if role == role1_gender:
149
- voice = voice_map[role1_gender][content[-2:]]
150
- else:
151
- voice = voice_map[role2_gender][content[-2:]]
152
-
153
- # Append SSML-formatted content
154
- dialogue_text += f"<voice name='{voice}'>{content[:-3]}</voice> "
155
-
156
- ssml = f"""
157
- <speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'>
158
- {dialogue_text}
159
- </speak>
160
- """
161
 
162
- # Perform synthesis
163
- result = speech_synthesizer.speak_ssml(ssml)
164
- if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
165
- print("Audio synthesized successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  else:
167
- print("Error synthesizing audio:", result.reason)
168
 
169
- # Return the path to the audio file
170
- return filename
171
 
172
 
173
 
 
3
  import os
4
  import openai
5
  import re
6
+ from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioConfig
7
+
8
 
9
 
10
 
 
120
 
121
  return chatbot_dialogue, audio_path, file_name
122
 
123
+
124
+ def detect_language(text):
125
+ """
126
+ Simple function to detect if text is more likely English or Chinese.
127
+ """
128
+ for char in text:
129
+ if '\u4e00' <= char <= '\u9fff': # range for Chinese characters
130
+ return "zh"
131
+ return "en"
132
+
133
  def dialogue_to_audio(dialogue, role1_gender, role2_gender):
134
+ """
135
+ Converts the given dialogue into an audio file using Azure's Text-to-Speech service.
136
+
137
+ Parameters:
138
+ - dialogue (list): List of dictionaries containing the structured dialogue.
139
+ - role1_gender (str): Gender of role1. Can be "male" or "female".
140
+ - role2_gender (str): Gender of role2. Can be "male" or "female".
141
+
142
+ Returns:
143
+ - str: File path to the generated audio file.
144
+ """
145
+ # Set up Azure Speech SDK
146
+ speech_config = SpeechConfig(subscription=AZURE_API_KEY, region=AZURE_REGION)
147
+
148
+ voices = {
149
+ "en": {
150
+ "male": "en-US-GuyNeural",
151
+ "female": "en-US-JessaNeural"
152
  },
153
+ "zh": {
154
+ "male": "zh-TW-YunNeural", # Taiwanese male neural voice
155
+ "female": "zh-TW-HsiaoYuNeural" # Taiwanese female neural voice
156
  }
157
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="zh-CN">'
160
+ for item in dialogue:
161
+ role = item['role']
162
+ content = item['content']
163
+ language = detect_language(content)
164
+ gender = role1_gender if role == 'role1' else role2_gender
165
+ voice = voices[language][gender]
166
+ ssml += f'<voice name="{voice}">{content}</voice>'
167
+ ssml += '</speak>'
168
+
169
+ # Create an audio configuration that points to an audio file.
170
+ audio_file = "output_audio.wav"
171
+ audio_output = AudioConfig(filename=audio_file)
172
+
173
+ # Create a speech synthesizer using the given settings
174
+ synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output)
175
+
176
+ # Synthesize the text
177
+ result = synthesizer.speak_ssml(ssml)
178
+
179
+ # Check result
180
+ if result.reason == result.Reason.SynthesizingAudioCompleted:
181
+ print(f"Speech synthesized to [{result.audio_file}] for text [{ssml}]")
182
  else:
183
+ print(f"Could not synthesize the text, reason: {result.reason}")
184
 
185
+ return audio_file
 
186
 
187
 
188