Spaces:
Runtime error
Runtime error
LANGUAGE_VOICE_MAPPING = { | |
"Assamese": ["Amit", "Sita"], | |
"Bengali": ["Arjun", "Aditi"], | |
"Bodo": ["Bikram", "Maya"], | |
"Chhattisgarhi": ["Bhanu", "Champa"], | |
"Dogri": ["Karan"], | |
"English": ["Thoma", "Mary"], | |
"Gujarati": ["Yash", "Neha"], | |
"Hindi": ["Rohit", "Divya"], | |
"Kannada": ["Suresh", "Anu"], | |
"Malayalam": ["Anjali", "Harish"], | |
"Manipuri": ["Laishram", "Ranjit"], | |
"Marathi": ["Sanjay", "Sunita"], | |
"Nepali": ["Amrita"], | |
"Odia": ["Manas", "Debjani"], | |
"Punjabi": ["Divjot", "Gurpreet"], | |
"Sanskrit": ["Aryan"], | |
"Tamil": ["Jaya", "Kavitha"], | |
"Telugu": ["Prakash", "Lalitha"] | |
} | |
# Voice characteristics for each speaker | |
VOICE_CHARACTERISTICS = { | |
"Amit": "slightly deep and resonant", | |
"Sita": "clear and well-paced", | |
"Arjun": "moderate and clear", | |
"Aditi": "high-pitched and expressive", | |
"Bikram": "higher-pitched and energetic", | |
"Maya": "balanced and pleasant", | |
"Bhanu": "warm and measured", | |
"Champa": "clear and gentle", | |
"Karan": "high-pitched and engaging", | |
"Thoma": "clear and well-articulated", | |
"Mary": "pleasant and measured", | |
"Yash": "warm and balanced", | |
"Neha": "clear and dynamic", | |
"Rohit": "moderate and expressive", | |
"Divya": "pleasant and well-paced", | |
"Suresh": "clear and precise", | |
"Anu": "warm and melodious", | |
"Anjali": "high-pitched and pleasant", | |
"Harish": "deep and measured", | |
"Laishram": "balanced and smooth", | |
"Ranjit": "clear and authoritative", | |
"Sanjay": "deep and authoritative", | |
"Sunita": "high-pitched and pleasant", | |
"Amrita": "high-pitched and gentle", | |
"Manas": "moderate and measured", | |
"Debjani": "clear and pleasant", | |
"Divjot": "clear and dynamic", | |
"Gurpreet": "warm and balanced", | |
"Aryan": "resonant and measured", | |
"Jaya": "high-pitched and melodious", | |
"Kavitha": "clear and expressive", | |
"Prakash": "clear and well-paced", | |
"Lalitha": "pleasant and melodious" | |
} | |
# Emotion descriptions | |
EMOTION_DESC = { | |
"Neutral": "maintaining a balanced and natural tone", | |
"Happy": "with a warm and positive energy", | |
"Sad": "with a gentle and somber tone", | |
"Angry": "with intense and strong delivery", | |
"Highly Expressive": "with dynamic and vibrant emotional delivery", | |
"Monotone": "with minimal tonal variation" | |
} | |
# Speed descriptions | |
SPEED_DESC = { | |
"Very Slow": "at an extremely measured pace", | |
"Slow": "at a measured, deliberate pace", | |
"Normal": "at a natural, comfortable pace", | |
"Fast": "at a swift, dynamic pace", | |
"Very Fast": "at a rapid, accelerated pace" | |
} | |
# Pitch modifiers | |
PITCH_DESC = { | |
"Very Low": "in an extremely deep register", | |
"Low": "in a deeper register", | |
"Medium": "in a natural pitch range", | |
"High": "in a higher register", | |
"Very High": "in an extremely high register" | |
} | |
BACKGROUND_NOISE_DESC = { | |
"None": "with absolutely no background noise", | |
"Minimal": "with minimal background noise", | |
"Moderate": "with moderate ambient noise", | |
"Noticeable": "with noticeable background sounds" | |
} | |
REVERBERATION_DESC = { | |
"Very Close": "in an extremely intimate setting", | |
"Close": "in a close-sounding environment", | |
"Moderate": "in a moderately spacious environment", | |
"Distant": "in a spacious, reverberant setting", | |
"Very Distant": "in a very large, echoing space" | |
} | |
QUALITY_DESC = { | |
"Basic": "in basic audio quality", | |
"Good": "in good audio quality", | |
"High": "in high audio quality", | |
"Studio": "in professional studio quality" | |
} | |
def construct_description( | |
speaker, | |
language, | |
emotion="Neutral", | |
speed="Normal", | |
pitch="Medium", | |
background_noise="Minimal", | |
reverberation="Close", | |
quality="High" | |
): | |
""" | |
Constructs a comprehensive description for the TTS model based on all available parameters. | |
Args: | |
speaker (str): The name of the speaker | |
language (str): The language being spoken | |
emotion (str): The emotional tone | |
speed (str): The speaking speed | |
pitch (str): The pitch level | |
background_noise (str): Level of background noise | |
reverberation (str): Distance/space effect | |
quality (str): Audio quality level | |
Returns: | |
str: A detailed description for the TTS model | |
""" | |
description = ( | |
f"{speaker} speaks in {language} {VOICE_CHARACTERISTICS.get(speaker, 'with clear articulation')} " | |
f"{PITCH_DESC[pitch]}, {EMOTION_DESC[emotion]} {SPEED_DESC[speed]}. " | |
f"The recording is {REVERBERATION_DESC[reverberation]}, {BACKGROUND_NOISE_DESC[background_noise]}, " | |
f"captured {QUALITY_DESC[quality]}." | |
) | |
return description | |
def get_speakers_for_language(language): | |
""" | |
Get the list of recommended speakers for a given language. | |
Args: | |
language (str): The language to get speakers for | |
Returns: | |
list: List of recommended speakers for the language | |
""" | |
return LANGUAGE_VOICE_MAPPING.get(language, []) |