Update app.py
Browse files
app.py
CHANGED
@@ -1,145 +1,28 @@
|
|
1 |
-
import
|
2 |
-
from pydub import AudioSegment
|
3 |
-
import edge_tts
|
4 |
-
import os
|
5 |
-
import asyncio
|
6 |
-
import uuid
|
7 |
-
import re
|
8 |
-
|
9 |
-
# Function to get the length of an audio file in milliseconds
|
10 |
-
def get_audio_length(audio_file):
|
11 |
-
audio = AudioSegment.from_file(audio_file)
|
12 |
-
return len(audio) / 1000 # Return in seconds for compatibility
|
13 |
-
|
14 |
-
# Function to format time for SRT in milliseconds
|
15 |
-
def format_time_ms(milliseconds):
|
16 |
-
seconds, ms = divmod(int(milliseconds), 1000)
|
17 |
-
mins, secs = divmod(seconds, 60)
|
18 |
-
hrs, mins = divmod(mins, 60)
|
19 |
-
return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
|
20 |
-
|
21 |
-
# Function to split text into segments based on punctuation, ensuring no word is split
|
22 |
-
def split_text_into_segments(text):
|
23 |
-
segments = []
|
24 |
-
raw_segments = re.split(r'([.!?,])', text)
|
25 |
-
|
26 |
-
for i in range(0, len(raw_segments) - 1, 2):
|
27 |
-
sentence = raw_segments[i].strip() + raw_segments[i + 1]
|
28 |
-
words = sentence.split()
|
29 |
-
|
30 |
-
if len(words) <= 8:
|
31 |
-
segments.append(sentence.strip())
|
32 |
-
else:
|
33 |
-
chunk = ""
|
34 |
-
for word in words:
|
35 |
-
if len(chunk.split()) < 8:
|
36 |
-
chunk += " " + word
|
37 |
-
else:
|
38 |
-
segments.append(chunk.strip())
|
39 |
-
chunk = word
|
40 |
-
if chunk:
|
41 |
-
segments.append(chunk.strip())
|
42 |
-
|
43 |
-
if len(raw_segments) % 2 == 1:
|
44 |
-
remaining_text = raw_segments[-1].strip()
|
45 |
-
if remaining_text:
|
46 |
-
segments.append(remaining_text)
|
47 |
-
|
48 |
-
return segments
|
49 |
-
|
50 |
-
# Function to generate SRT with millisecond accuracy per batch
|
51 |
-
async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
|
52 |
-
audio_file = f"batch_{batch_num}_audio.wav"
|
53 |
-
|
54 |
-
tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=pitch)
|
55 |
-
await tts.save(audio_file)
|
56 |
-
|
57 |
-
actual_length = get_audio_length(audio_file) * 1000 # Convert to milliseconds
|
58 |
-
|
59 |
-
segments = split_text_into_segments(batch_text)
|
60 |
-
segment_duration = actual_length / len(segments)
|
61 |
-
start_time = start_offset
|
62 |
-
|
63 |
-
srt_content = ""
|
64 |
-
for index, segment in enumerate(segments):
|
65 |
-
end_time = start_time + segment_duration
|
66 |
-
|
67 |
-
if end_time > start_offset + actual_length:
|
68 |
-
end_time = start_offset + actual_length
|
69 |
-
|
70 |
-
srt_content += f"{index + 1 + (batch_num * 100)}\n"
|
71 |
-
srt_content += f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
|
72 |
-
srt_content += segment + "\n\n"
|
73 |
-
|
74 |
-
start_time = end_time
|
75 |
-
|
76 |
-
return srt_content, audio_file, start_time
|
77 |
-
|
78 |
-
# Batch processing function with millisecond accuracy
|
79 |
-
async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
|
80 |
-
batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
|
81 |
-
all_srt_content = ""
|
82 |
-
combined_audio = AudioSegment.empty()
|
83 |
-
start_offset = 0.0
|
84 |
-
|
85 |
-
for batch_num, batch_text in enumerate(batches):
|
86 |
-
srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice)
|
87 |
-
all_srt_content += srt_content
|
88 |
-
|
89 |
-
batch_audio = AudioSegment.from_file(audio_file)
|
90 |
-
combined_audio += batch_audio
|
91 |
-
start_offset = end_offset
|
92 |
-
|
93 |
-
os.remove(audio_file)
|
94 |
-
progress((batch_num + 1) / len(batches))
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
for line in all_srt_content.strip().splitlines():
|
99 |
-
if '-->' in line:
|
100 |
-
start_str, end_str = line.split(' --> ')
|
101 |
-
start_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], start_str.replace(',', ':').split(':')))
|
102 |
-
end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
|
103 |
-
if end_time > total_audio_length:
|
104 |
-
end_time = total_audio_length
|
105 |
-
line = f"{format_time_ms(start_time * 1000)} --> {format_time_ms(end_time * 1000)}"
|
106 |
-
validated_srt_content += line + "\n"
|
107 |
-
|
108 |
-
unique_id = uuid.uuid4()
|
109 |
-
final_audio_path = f"final_audio_{unique_id}.mp3"
|
110 |
-
final_srt_path = f"final_subtitles_{unique_id}.srt"
|
111 |
-
|
112 |
-
combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
|
113 |
-
|
114 |
-
with open(final_srt_path, "w") as srt_file:
|
115 |
-
srt_file.write(validated_srt_content)
|
116 |
-
|
117 |
-
return final_srt_path, final_audio_path
|
118 |
-
|
119 |
-
# Gradio interface function
|
120 |
-
async def process_script(script_text, pitch, rate, voice):
|
121 |
-
pitch_str = f"{pitch}Hz" if pitch != 0 else "-1Hz"
|
122 |
-
formatted_rate = f"{'+' if rate > 1 else ''}{int(rate)}%"
|
123 |
-
srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch_str, formatted_rate, voice_options[voice])
|
124 |
-
return srt_path, audio_path, audio_path
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
"
|
129 |
-
"
|
130 |
-
|
131 |
-
|
132 |
-
"
|
133 |
-
"
|
134 |
-
"
|
135 |
-
"
|
136 |
-
"
|
137 |
-
"
|
138 |
-
"
|
139 |
-
"
|
140 |
-
"
|
141 |
-
"
|
142 |
-
"
|
|
|
|
|
|
|
143 |
"Maisie": "en-GB-MaisieNeural",
|
144 |
"Ryan": "en-GB-RyanNeural",
|
145 |
"Sonia": "en-GB-SoniaNeural",
|
@@ -163,25 +46,448 @@ voice_options = {
|
|
163 |
"Imani": "en-TZ-ImaniNeural",
|
164 |
"Leah": "en-ZA-LeahNeural",
|
165 |
"Luke": "en-ZA-LukeNeural"
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
}
|
168 |
|
169 |
-
app = gr.Interface(
|
170 |
-
fn=process_script,
|
171 |
-
inputs=[
|
172 |
-
gr.Textbox(label="Enter Script Text", lines=10),
|
173 |
-
gr.Slider(label="Pitch Adjustment (Hz)", minimum=-20, maximum=20, value=0, step=1),
|
174 |
-
gr.Slider(label="Rate Adjustment (%)", minimum=-50, maximum=50, value=-1, step=1),
|
175 |
-
gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Andrew Male"),
|
176 |
-
],
|
177 |
-
outputs=[
|
178 |
-
gr.File(label="Download SRT File"),
|
179 |
-
gr.File(label="Download Audio File"),
|
180 |
-
gr.Audio(label="Audio Playback")
|
181 |
-
],
|
182 |
-
title="WritooAI Pro Text-to-Speech with Subtitle",
|
183 |
-
description="Convert your script into Audio with Auto generated Subtitles.",
|
184 |
-
theme="compact",
|
185 |
-
)
|
186 |
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
import edge_tts
|
4 |
+
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
language_dict = {
|
7 |
+
"Hindi": {
|
8 |
+
"Madhur": "hi-IN-MadhurNeural",
|
9 |
+
"Swara": "hi-IN-SwaraNeural"
|
10 |
+
},
|
11 |
+
"English": {
|
12 |
+
"Jenny": "en-US-JennyNeural",
|
13 |
+
"Guy": "en-US-GuyNeural",
|
14 |
+
"Ana": "en-US-AnaNeural",
|
15 |
+
"Aria": "en-US-AriaNeural",
|
16 |
+
"Brian": "en-US-BrianNeural",
|
17 |
+
"Christopher": "en-US-ChristopherNeural",
|
18 |
+
"Eric": "en-US-EricNeural",
|
19 |
+
"Michelle": "en-US-MichelleNeural",
|
20 |
+
"Roger": "en-US-RogerNeural",
|
21 |
+
"Natasha": "en-AU-NatashaNeural",
|
22 |
+
"William": "en-AU-WilliamNeural",
|
23 |
+
"Clara": "en-CA-ClaraNeural",
|
24 |
+
"Liam": "en-CA-LiamNeural",
|
25 |
+
"Libby": "en-GB-LibbyNeural",
|
26 |
"Maisie": "en-GB-MaisieNeural",
|
27 |
"Ryan": "en-GB-RyanNeural",
|
28 |
"Sonia": "en-GB-SoniaNeural",
|
|
|
46 |
"Imani": "en-TZ-ImaniNeural",
|
47 |
"Leah": "en-ZA-LeahNeural",
|
48 |
"Luke": "en-ZA-LukeNeural"
|
49 |
+
},
|
50 |
+
"Spanish": {
|
51 |
+
"Elena": "es-AR-ElenaNeural",
|
52 |
+
"Tomas": "es-AR-TomasNeural",
|
53 |
+
"Marcelo": "es-BO-MarceloNeural",
|
54 |
+
"Sofia": "es-BO-SofiaNeural",
|
55 |
+
"Gonzalo": "es-CO-GonzaloNeural",
|
56 |
+
"Salome": "es-CO-SalomeNeural",
|
57 |
+
"Juan": "es-CR-JuanNeural",
|
58 |
+
"Maria": "es-CR-MariaNeural",
|
59 |
+
"Belkys": "es-CU-BelkysNeural",
|
60 |
+
"Emilio": "es-DO-EmilioNeural",
|
61 |
+
"Ramona": "es-DO-RamonaNeural",
|
62 |
+
"Andrea": "es-EC-AndreaNeural",
|
63 |
+
"Luis": "es-EC-LuisNeural",
|
64 |
+
"Alvaro": "es-ES-AlvaroNeural",
|
65 |
+
"Elvira": "es-ES-ElviraNeural",
|
66 |
+
"Teresa": "es-GQ-TeresaNeural",
|
67 |
+
"Andres": "es-GT-AndresNeural",
|
68 |
+
"Marta": "es-GT-MartaNeural",
|
69 |
+
"Carlos": "es-HN-CarlosNeural",
|
70 |
+
"Karla": "es-HN-KarlaNeural",
|
71 |
+
"Federico": "es-NI-FedericoNeural",
|
72 |
+
"Yolanda": "es-NI-YolandaNeural",
|
73 |
+
"Margarita": "es-PA-MargaritaNeural",
|
74 |
+
"Roberto": "es-PA-RobertoNeural",
|
75 |
+
"Alex": "es-PE-AlexNeural",
|
76 |
+
"Camila": "es-PE-CamilaNeural",
|
77 |
+
"Karina": "es-PR-KarinaNeural",
|
78 |
+
"Victor": "es-PR-VictorNeural",
|
79 |
+
"Mario": "es-PY-MarioNeural",
|
80 |
+
"Tania": "es-PY-TaniaNeural",
|
81 |
+
"Lorena": "es-SV-LorenaNeural",
|
82 |
+
"Rodrigo": "es-SV-RodrigoNeural",
|
83 |
+
"Alonso": "es-US-AlonsoNeural",
|
84 |
+
"Paloma": "es-US-PalomaNeural",
|
85 |
+
"Mateo": "es-UY-MateoNeural",
|
86 |
+
"Valentina": "es-UY-ValentinaNeural",
|
87 |
+
"Paola": "es-VE-PaolaNeural",
|
88 |
+
"Sebastian": "es-VE-SebastianNeural"
|
89 |
+
},
|
90 |
+
"Arabic": {
|
91 |
+
"Hamed": "ar-SA-HamedNeural",
|
92 |
+
"Zariyah": "ar-SA-ZariyahNeural",
|
93 |
+
"Fatima": "ar-AE-FatimaNeural",
|
94 |
+
"Hamdan": "ar-AE-HamdanNeural",
|
95 |
+
"Ali": "ar-BH-AliNeural",
|
96 |
+
"Laila": "ar-BH-LailaNeural",
|
97 |
+
"Ismael": "ar-DZ-IsmaelNeural",
|
98 |
+
"Salma": "ar-EG-SalmaNeural",
|
99 |
+
"Shakir": "ar-EG-ShakirNeural",
|
100 |
+
"Bassel": "ar-IQ-BasselNeural",
|
101 |
+
"Rana": "ar-IQ-RanaNeural",
|
102 |
+
"Sana": "ar-JO-SanaNeural",
|
103 |
+
"Taim": "ar-JO-TaimNeural",
|
104 |
+
"Fahed": "ar-KW-FahedNeural",
|
105 |
+
"Noura": "ar-KW-NouraNeural",
|
106 |
+
"Layla": "ar-LB-LaylaNeural",
|
107 |
+
"Rami": "ar-LB-RamiNeural",
|
108 |
+
"Iman": "ar-LY-ImanNeural",
|
109 |
+
"Omar": "ar-LY-OmarNeural",
|
110 |
+
"Jamal": "ar-MA-JamalNeural",
|
111 |
+
"Mouna": "ar-MA-MounaNeural",
|
112 |
+
"Abdullah": "ar-OM-AbdullahNeural",
|
113 |
+
"Aysha": "ar-OM-AyshaNeural",
|
114 |
+
"Amal": "ar-QA-AmalNeural",
|
115 |
+
"Moaz": "ar-QA-MoazNeural",
|
116 |
+
"Amany": "ar-SY-AmanyNeural",
|
117 |
+
"Laith": "ar-SY-LaithNeural",
|
118 |
+
"Hedi": "ar-TN-HediNeural",
|
119 |
+
"Reem": "ar-TN-ReemNeural",
|
120 |
+
"Maryam": "ar-YE-MaryamNeural",
|
121 |
+
"Saleh": "ar-YE-SalehNeural"
|
122 |
+
},
|
123 |
+
"Korean": {
|
124 |
+
"Sun-Hi": "ko-KR-SunHiNeural",
|
125 |
+
"InJoon": "ko-KR-InJoonNeural"
|
126 |
+
},
|
127 |
+
"Thai": {
|
128 |
+
"Premwadee": "th-TH-PremwadeeNeural",
|
129 |
+
"Niwat": "th-TH-NiwatNeural"
|
130 |
+
},
|
131 |
+
"Vietnamese": {
|
132 |
+
"HoaiMy": "vi-VN-HoaiMyNeural",
|
133 |
+
"NamMinh": "vi-VN-NamMinhNeural"
|
134 |
+
},
|
135 |
+
"Japanese": {
|
136 |
+
"Nanami": "ja-JP-NanamiNeural",
|
137 |
+
"Keita": "ja-JP-KeitaNeural"
|
138 |
+
},
|
139 |
+
"French": {
|
140 |
+
"Denise": "fr-FR-DeniseNeural",
|
141 |
+
"Eloise": "fr-FR-EloiseNeural",
|
142 |
+
"Henri": "fr-FR-HenriNeural",
|
143 |
+
"Sylvie": "fr-CA-SylvieNeural",
|
144 |
+
"Antoine": "fr-CA-AntoineNeural",
|
145 |
+
"Jean": "fr-CA-JeanNeural",
|
146 |
+
"Ariane": "fr-CH-ArianeNeural",
|
147 |
+
"Fabrice": "fr-CH-FabriceNeural",
|
148 |
+
"Charline": "fr-BE-CharlineNeural",
|
149 |
+
"Gerard": "fr-BE-GerardNeural"
|
150 |
+
},
|
151 |
+
"Portuguese": {
|
152 |
+
"Francisca": "pt-BR-FranciscaNeural",
|
153 |
+
"Antonio": "pt-BR-AntonioNeural",
|
154 |
+
"Duarte": "pt-PT-DuarteNeural",
|
155 |
+
"Raquel": "pt-PT-RaquelNeural"
|
156 |
+
},
|
157 |
+
"Indonesian": {
|
158 |
+
"Ardi": "id-ID-ArdiNeural",
|
159 |
+
"Gadis": "id-ID-GadisNeural"
|
160 |
+
},
|
161 |
+
"Hebrew": {
|
162 |
+
"Avri": "he-IL-AvriNeural",
|
163 |
+
"Hila": "he-IL-HilaNeural"
|
164 |
+
},
|
165 |
+
"Italian": {
|
166 |
+
"Isabella": "it-IT-IsabellaNeural",
|
167 |
+
"Diego": "it-IT-DiegoNeural",
|
168 |
+
"Elsa": "it-IT-ElsaNeural"
|
169 |
+
},
|
170 |
+
"Dutch": {
|
171 |
+
"Colette": "nl-NL-ColetteNeural",
|
172 |
+
"Fenna": "nl-NL-FennaNeural",
|
173 |
+
"Maarten": "nl-NL-MaartenNeural",
|
174 |
+
"Arnaud": "nl-BE-ArnaudNeural",
|
175 |
+
"Dena": "nl-BE-DenaNeural"
|
176 |
+
},
|
177 |
+
"Malay": {
|
178 |
+
"Osman": "ms-MY-OsmanNeural",
|
179 |
+
"Yasmin": "ms-MY-YasminNeural"
|
180 |
+
},
|
181 |
+
"Norwegian": {
|
182 |
+
"Pernille": "nb-NO-PernilleNeural",
|
183 |
+
"Finn": "nb-NO-FinnNeural"
|
184 |
+
},
|
185 |
+
"Swedish": {
|
186 |
+
"Sofie": "sv-SE-SofieNeural",
|
187 |
+
"Mattias": "sv-SE-MattiasNeural"
|
188 |
+
},
|
189 |
+
"Greek": {
|
190 |
+
"Athina": "el-GR-AthinaNeural",
|
191 |
+
"Nestoras": "el-GR-NestorasNeural"
|
192 |
+
},
|
193 |
+
"German": {
|
194 |
+
"Katja": "de-DE-KatjaNeural",
|
195 |
+
"Amala": "de-DE-AmalaNeural",
|
196 |
+
"Conrad": "de-DE-ConradNeural",
|
197 |
+
"Killian": "de-DE-KillianNeural",
|
198 |
+
"Ingrid": "de-AT-IngridNeural",
|
199 |
+
"Jonas": "de-AT-JonasNeural",
|
200 |
+
"Jan": "de-CH-JanNeural",
|
201 |
+
"Leni": "de-CH-LeniNeural"
|
202 |
+
},
|
203 |
+
"Afrikaans": {
|
204 |
+
"Adri": "af-ZA-AdriNeural",
|
205 |
+
"Willem": "af-ZA-WillemNeural"
|
206 |
+
},
|
207 |
+
"Amharic": {
|
208 |
+
"Ameha": "am-ET-AmehaNeural",
|
209 |
+
"Mekdes": "am-ET-MekdesNeural"
|
210 |
+
},
|
211 |
+
"Azerbaijani": {
|
212 |
+
"Babek": "az-AZ-BabekNeural",
|
213 |
+
"Banu": "az-AZ-BanuNeural"
|
214 |
+
},
|
215 |
+
"Bulgarian": {
|
216 |
+
"Borislav": "bg-BG-BorislavNeural",
|
217 |
+
"Kalina": "bg-BG-KalinaNeural"
|
218 |
+
},
|
219 |
+
"Bengali": {
|
220 |
+
"Nabanita": "bn-BD-NabanitaNeural",
|
221 |
+
"Pradeep": "bn-BD-PradeepNeural",
|
222 |
+
"Bashkar": "bn-IN-BashkarNeural",
|
223 |
+
"Tanishaa": "bn-IN-TanishaaNeural"
|
224 |
+
},
|
225 |
+
"Bosnian": {
|
226 |
+
"Goran": "bs-BA-GoranNeural",
|
227 |
+
"Vesna": "bs-BA-VesnaNeural"
|
228 |
+
},
|
229 |
+
"Catalan": {
|
230 |
+
"Joana": "ca-ES-JoanaNeural",
|
231 |
+
"Enric": "ca-ES-EnricNeural"
|
232 |
+
},
|
233 |
+
"Czech": {
|
234 |
+
"Antonin": "cs-CZ-AntoninNeural",
|
235 |
+
"Vlasta": "cs-CZ-VlastaNeural"
|
236 |
+
},
|
237 |
+
"Welsh": {
|
238 |
+
"Aled": "cy-GB-AledNeural",
|
239 |
+
"Nia": "cy-GB-NiaNeural"
|
240 |
+
},
|
241 |
+
"Danish": {
|
242 |
+
"Christel": "da-DK-ChristelNeural",
|
243 |
+
"Jeppe": "da-DK-JeppeNeural"
|
244 |
+
},
|
245 |
+
"Estonian": {
|
246 |
+
"Anu": "et-EE-AnuNeural",
|
247 |
+
"Kert": "et-EE-KertNeural"
|
248 |
+
},
|
249 |
+
"Persian": {
|
250 |
+
"Dilara": "fa-IR-DilaraNeural",
|
251 |
+
"Farid": "fa-IR-FaridNeural"
|
252 |
+
},
|
253 |
+
"Finnish": {
|
254 |
+
"Harri": "fi-FI-HarriNeural",
|
255 |
+
"Noora": "fi-FI-NooraNeural"
|
256 |
+
},
|
257 |
+
"Irish": {
|
258 |
+
"Colm": "ga-IE-ColmNeural",
|
259 |
+
"Orla": "ga-IE-OrlaNeural"
|
260 |
+
},
|
261 |
+
"Galician": {
|
262 |
+
"Roi": "gl-ES-RoiNeural",
|
263 |
+
"Sabela": "gl-ES-SabelaNeural"
|
264 |
+
},
|
265 |
+
"Gujarati": {
|
266 |
+
"Dhwani": "gu-IN-DhwaniNeural",
|
267 |
+
"Niranjan": "gu-IN-NiranjanNeural"
|
268 |
+
},
|
269 |
+
"Croatian": {
|
270 |
+
"Gabrijela": "hr-HR-GabrijelaNeural",
|
271 |
+
"Srecko": "hr-HR-SreckoNeural"
|
272 |
+
},
|
273 |
+
"Hungarian": {
|
274 |
+
"Noemi": "hu-HU-NoemiNeural",
|
275 |
+
"Tamas": "hu-HU-TamasNeural"
|
276 |
+
},
|
277 |
+
"Icelandic": {
|
278 |
+
"Gudrun": "is-IS-GudrunNeural",
|
279 |
+
"Gunnar": "is-IS-GunnarNeural"
|
280 |
+
},
|
281 |
+
"Javanese": {
|
282 |
+
"Dimas": "jv-ID-DimasNeural",
|
283 |
+
"Siti": "jv-ID-SitiNeural"
|
284 |
+
},
|
285 |
+
"Georgian": {
|
286 |
+
"Eka": "ka-GE-EkaNeural",
|
287 |
+
"Giorgi": "ka-GE-GiorgiNeural"
|
288 |
+
},
|
289 |
+
"Kazakh": {
|
290 |
+
"Aigul": "kk-KZ-AigulNeural",
|
291 |
+
"Daulet": "kk-KZ-DauletNeural"
|
292 |
+
},
|
293 |
+
"Khmer": {
|
294 |
+
"Piseth": "km-KH-PisethNeural",
|
295 |
+
"Sreymom": "km-KH-SreymomNeural"
|
296 |
+
},
|
297 |
+
"Kannada": {
|
298 |
+
"Gagan": "kn-IN-GaganNeural",
|
299 |
+
"Sapna": "kn-IN-SapnaNeural"
|
300 |
+
},
|
301 |
+
"Lao": {
|
302 |
+
"Chanthavong": "lo-LA-ChanthavongNeural",
|
303 |
+
"Keomany": "lo-LA-KeomanyNeural"
|
304 |
+
},
|
305 |
+
"Lithuanian": {
|
306 |
+
"Leonas": "lt-LT-LeonasNeural",
|
307 |
+
"Ona": "lt-LT-OnaNeural"
|
308 |
+
},
|
309 |
+
"Latvian": {
|
310 |
+
"Everita": "lv-LV-EveritaNeural",
|
311 |
+
"Nils": "lv-LV-NilsNeural"
|
312 |
+
},
|
313 |
+
"Macedonian": {
|
314 |
+
"Aleksandar": "mk-MK-AleksandarNeural",
|
315 |
+
"Marija": "mk-MK-MarijaNeural"
|
316 |
+
},
|
317 |
+
"Malayalam": {
|
318 |
+
"Midhun": "ml-IN-MidhunNeural",
|
319 |
+
"Sobhana": "ml-IN-SobhanaNeural"
|
320 |
+
},
|
321 |
+
"Mongolian": {
|
322 |
+
"Bataa": "mn-MN-BataaNeural",
|
323 |
+
"Yesui": "mn-MN-YesuiNeural"
|
324 |
+
},
|
325 |
+
"Marathi": {
|
326 |
+
"Aarohi": "mr-IN-AarohiNeural",
|
327 |
+
"Manohar": "mr-IN-ManoharNeural"
|
328 |
+
},
|
329 |
+
"Maltese": {
|
330 |
+
"Grace": "mt-MT-GraceNeural",
|
331 |
+
"Joseph": "mt-MT-JosephNeural"
|
332 |
+
},
|
333 |
+
"Burmese": {
|
334 |
+
"Nilar": "my-MM-NilarNeural",
|
335 |
+
"Thiha": "my-MM-ThihaNeural"
|
336 |
+
},
|
337 |
+
"Nepali": {
|
338 |
+
"Hemkala": "ne-NP-HemkalaNeural",
|
339 |
+
"Sagar": "ne-NP-SagarNeural"
|
340 |
+
},
|
341 |
+
"Polish": {
|
342 |
+
"Marek": "pl-PL-MarekNeural",
|
343 |
+
"Zofia": "pl-PL-ZofiaNeural"
|
344 |
+
},
|
345 |
+
"Pashto": {
|
346 |
+
"Gul Nawaz": "ps-AF-GulNawazNeural",
|
347 |
+
"Latifa": "ps-AF-LatifaNeural"
|
348 |
+
},
|
349 |
+
"Romanian": {
|
350 |
+
"Alina": "ro-RO-AlinaNeural",
|
351 |
+
"Emil": "ro-RO-EmilNeural"
|
352 |
+
},
|
353 |
+
"Russian": {
|
354 |
+
"Svetlana": "ru-RU-SvetlanaNeural",
|
355 |
+
"Dmitry": "ru-RU-DmitryNeural"
|
356 |
+
},
|
357 |
+
"Sinhala": {
|
358 |
+
"Sameera": "si-LK-SameeraNeural",
|
359 |
+
"Thilini": "si-LK-ThiliniNeural"
|
360 |
+
},
|
361 |
+
"Slovak": {
|
362 |
+
"Lukas": "sk-SK-LukasNeural",
|
363 |
+
"Viktoria": "sk-SK-ViktoriaNeural"
|
364 |
+
},
|
365 |
+
"Slovenian": {
|
366 |
+
"Petra": "sl-SI-PetraNeural",
|
367 |
+
"Rok": "sl-SI-RokNeural"
|
368 |
+
},
|
369 |
+
"Somali": {
|
370 |
+
"Muuse": "so-SO-MuuseNeural",
|
371 |
+
"Ubax": "so-SO-UbaxNeural"
|
372 |
+
},
|
373 |
+
"Albanian": {
|
374 |
+
"Anila": "sq-AL-AnilaNeural",
|
375 |
+
"Ilir": "sq-AL-IlirNeural"
|
376 |
+
},
|
377 |
+
"Serbian": {
|
378 |
+
"Nicholas": "sr-RS-NicholasNeural",
|
379 |
+
"Sophie": "sr-RS-SophieNeural"
|
380 |
+
},
|
381 |
+
"Sundanese": {
|
382 |
+
"Jajang": "su-ID-JajangNeural",
|
383 |
+
"Tuti": "su-ID-TutiNeural"
|
384 |
+
},
|
385 |
+
"Swahili": {
|
386 |
+
"Rafiki": "sw-KE-RafikiNeural",
|
387 |
+
"Zuri": "sw-KE-ZuriNeural",
|
388 |
+
"Daudi": "sw-TZ-DaudiNeural",
|
389 |
+
"Rehema": "sw-TZ-RehemaNeural"
|
390 |
+
},
|
391 |
+
"Tamil": {
|
392 |
+
"Pallavi": "ta-IN-PallaviNeural",
|
393 |
+
"Valluvar": "ta-IN-ValluvarNeural",
|
394 |
+
"Kumar": "ta-LK-KumarNeural",
|
395 |
+
"Saranya": "ta-LK-SaranyaNeural",
|
396 |
+
"Kani": "ta-MY-KaniNeural",
|
397 |
+
"Surya": "ta-MY-SuryaNeural",
|
398 |
+
"Anbu": "ta-SG-AnbuNeural"
|
399 |
+
},
|
400 |
+
"Telugu": {
|
401 |
+
"Mohan": "te-IN-MohanNeural",
|
402 |
+
"Shruti": "te-IN-ShrutiNeural"
|
403 |
+
},
|
404 |
+
"Turkish": {
|
405 |
+
"Ahmet": "tr-TR-AhmetNeural",
|
406 |
+
"Emel": "tr-TR-EmelNeural"
|
407 |
+
},
|
408 |
+
"Ukrainian": {
|
409 |
+
"Ostap": "uk-UA-OstapNeural",
|
410 |
+
"Polina": "uk-UA-PolinaNeural"
|
411 |
+
},
|
412 |
+
"Urdu": {
|
413 |
+
"Gul": "ur-IN-GulNeural",
|
414 |
+
"Salman": "ur-IN-SalmanNeural",
|
415 |
+
"Asad": "ur-PK-AsadNeural",
|
416 |
+
"Uzma": "ur-PK-UzmaNeural"
|
417 |
+
},
|
418 |
+
"Uzbek": {
|
419 |
+
"Madina": "uz-UZ-MadinaNeural",
|
420 |
+
"Sardor": "uz-UZ-SardorNeural"
|
421 |
+
},
|
422 |
+
"Mandarin": {
|
423 |
+
"Xiaoxiao": "zh-CN-XiaoxiaoNeural",
|
424 |
+
"Yunyang": "zh-CN-YunyangNeural",
|
425 |
+
"Yunxi": "zh-CN-YunxiNeural",
|
426 |
+
"Xiaoyi": "zh-CN-XiaoyiNeural",
|
427 |
+
"Yunjian": "zh-CN-YunjianNeural",
|
428 |
+
"Yunxia": "zh-CN-YunxiaNeural",
|
429 |
+
"Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
|
430 |
+
"Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
|
431 |
+
"HiuMaan": "zh-HK-HiuMaanNeural",
|
432 |
+
"HiuGaai": "zh-HK-HiuGaaiNeural",
|
433 |
+
"WanLung": "zh-HK-WanLungNeural",
|
434 |
+
"HsiaoChen": "zh-TW-HsiaoChenNeural",
|
435 |
+
"HsiaoYu": "zh-TW-HsiaoYuNeural",
|
436 |
+
"YunJhe": "zh-TW-YunJheNeural"
|
437 |
+
},
|
438 |
+
"Zulu": {
|
439 |
+
"Thando": "zu-ZA-ThandoNeural",
|
440 |
+
"Themba": "zu-ZA-ThembaNeural"
|
441 |
+
}
|
442 |
}
|
443 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
|
445 |
+
async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False):
|
446 |
+
# Define the character limit
|
447 |
+
char_limit = 100000000
|
448 |
+
if len(text) > char_limit:
|
449 |
+
return f"Error: Use 150,000 Words at a time. ", None
|
450 |
+
|
451 |
+
# Get the voice for the selected language and speaker
|
452 |
+
voice = language_dict[language_code][speaker]
|
453 |
+
communicate = edge_tts.Communicate(text, voice)
|
454 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
455 |
+
tmp_path = tmp_file.name
|
456 |
+
await communicate.save(tmp_path)
|
457 |
+
|
458 |
+
return text, tmp_path
|
459 |
+
|
460 |
+
|
461 |
+
|
462 |
+
def get_speakers(language):
|
463 |
+
print(language)
|
464 |
+
speakers = list(language_dict[language].keys())
|
465 |
+
return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
|
466 |
+
|
467 |
+
|
468 |
+
default_language = None
|
469 |
+
default_speaker = None
|
470 |
+
with gr.Blocks(title="Writoo AI V2") as demo:
|
471 |
+
gr.HTML(" ")
|
472 |
+
gr.HTML(f"<h3 style='color:Tomato;'🎶 Exciting News: 10 More Voice Added 🎶 </h3>")
|
473 |
+
|
474 |
+
gr.Markdown("✨ Features: • Convert text to speech in seconds 😍")
|
475 |
+
with gr.Row():
|
476 |
+
with gr.Column():
|
477 |
+
input_text = gr.Textbox(lines=5, label="Input Text", placeholder="Enter text to convert to speech")
|
478 |
+
language = gr.Dropdown(
|
479 |
+
choices=list(language_dict.keys()), value=default_language, label="Languages", interactive=True
|
480 |
+
)
|
481 |
+
speaker = gr.Dropdown(choices=[], value=default_speaker, label="Speakers", interactive=False)
|
482 |
+
tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
|
483 |
+
run_btn = gr.Button(value="Generate Audio", variant="primary")
|
484 |
+
|
485 |
+
with gr.Column():
|
486 |
+
output_text = gr.Textbox(label="Output Text")
|
487 |
+
output_audio = gr.Audio(type="filepath", label="Audio Output")
|
488 |
+
|
489 |
+
language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
|
490 |
+
run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox], outputs=[output_text, output_audio])
|
491 |
+
|
492 |
+
if __name__ == "__main__":
|
493 |
+
demo.queue().launch(share=False)
|