Spaces:

youngtsai
/

dialogue_generator

Runtime error

App Files Files Community

youngtsai commited on Oct 28, 2023

Commit

635f86b

1 Parent(s): dc702e1

def detect_language(text):

Browse files

Files changed (1) hide show

app.py +58 -43

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ import json
 import os
 import openai
 import re
-import azure.cognitiveservices.speech as speechsdk
@@ -119,55 +120,69 @@ def main_function(password: str, theme: str, language: str, method: str, rounds:
     return chatbot_dialogue, audio_path, file_name
 def dialogue_to_audio(dialogue, role1_gender, role2_gender):
-    # Configure Azure Speech Service
-    speech_config = speechsdk.SpeechConfig(subscription=AZURE_API_KEY, region=AZURE_REGION)
-    filename="dialogue_output.wav"
-    audio_config = speechsdk.audio.AudioOutputConfig(filename=filename)
-    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
-    # Map genders to Azure TTS voices (This is for demonstration purposes; you may wish to have more sophisticated voice mapping.)
-    voice_map = {
-        "male": {
-            "中文": "zh-CN-HanHanNeural",
-            "英文": "en-US-GuyNeural"
         },
-        "female": {
-            "中文": "zh-CN-XiaoxiaoNeural",
-            "英文": "en-US-JessaNeural"
         }
     }
-    # Convert dialogue list to text
-    dialogue_text = ""
-    for entry in dialogue:
-        role = entry["role"]
-        content = entry["content"]
-        # Set voice based on role and language
-        if role == role1_gender:
-            voice = voice_map[role1_gender][content[-2:]]
-        else:
-            voice = voice_map[role2_gender][content[-2:]]
-        # Append SSML-formatted content
-        dialogue_text += f"<voice name='{voice}'>{content[:-3]}</voice> "
-    ssml = f"""
-    <speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis'>
-        {dialogue_text}
-    </speak>
-    """
-    # Perform synthesis
-    result = speech_synthesizer.speak_ssml(ssml)
-    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
-        print("Audio synthesized successfully!")
     else:
-        print("Error synthesizing audio:", result.reason)
-    # Return the path to the audio file
-    return filename

 import os
 import openai
 import re
+from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioConfig
     return chatbot_dialogue, audio_path, file_name
+def detect_language(text):
+    """
+    Simple function to detect if text is more likely English or Chinese.
+    """
+    for char in text:
+        if '\u4e00' <= char <= '\u9fff':  # range for Chinese characters
+            return "zh"
+    return "en"
 def dialogue_to_audio(dialogue, role1_gender, role2_gender):
+    """
+    Converts the given dialogue into an audio file using Azure's Text-to-Speech service.
+    Parameters:
+    - dialogue (list): List of dictionaries containing the structured dialogue.
+    - role1_gender (str): Gender of role1. Can be "male" or "female".
+    - role2_gender (str): Gender of role2. Can be "male" or "female".
+    Returns:
+    - str: File path to the generated audio file.
+    """
+    # Set up Azure Speech SDK
+    speech_config = SpeechConfig(subscription=AZURE_API_KEY, region=AZURE_REGION)
+    voices = {
+        "en": {
+            "male": "en-US-GuyNeural",
+            "female": "en-US-JessaNeural"
         },
+        "zh": {
+            "male": "zh-TW-YunNeural",   # Taiwanese male neural voice
+            "female": "zh-TW-HsiaoYuNeural"  # Taiwanese female neural voice
         }
     }
+    ssml = '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="zh-CN">'
+    for item in dialogue:
+        role = item['role']
+        content = item['content']
+        language = detect_language(content)
+        gender = role1_gender if role == 'role1' else role2_gender
+        voice = voices[language][gender]
+        ssml += f'<voice name="{voice}">{content}</voice>'
+    ssml += '</speak>'
+    # Create an audio configuration that points to an audio file.
+    audio_file = "output_audio.wav"
+    audio_output = AudioConfig(filename=audio_file)
+    # Create a speech synthesizer using the given settings
+    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output)
+    # Synthesize the text
+    result = synthesizer.speak_ssml(ssml)
+    # Check result
+    if result.reason == result.Reason.SynthesizingAudioCompleted:
+        print(f"Speech synthesized to [{result.audio_file}] for text [{ssml}]")
     else:
+        print(f"Could not synthesize the text, reason: {result.reason}")
+    return audio_file