hivecorp commited on
Commit
835e081
·
verified ·
1 Parent(s): 8154937

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +465 -159
app.py CHANGED
@@ -1,145 +1,28 @@
1
- import gradio as gr
2
- from pydub import AudioSegment
3
- import edge_tts
4
- import os
5
- import asyncio
6
- import uuid
7
- import re
8
-
9
- # Function to get the length of an audio file in milliseconds
10
- def get_audio_length(audio_file):
11
- audio = AudioSegment.from_file(audio_file)
12
- return len(audio) / 1000 # Return in seconds for compatibility
13
-
14
- # Function to format time for SRT in milliseconds
15
- def format_time_ms(milliseconds):
16
- seconds, ms = divmod(int(milliseconds), 1000)
17
- mins, secs = divmod(seconds, 60)
18
- hrs, mins = divmod(mins, 60)
19
- return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
20
-
21
- # Function to split text into segments based on punctuation, ensuring no word is split
22
- def split_text_into_segments(text):
23
- segments = []
24
- raw_segments = re.split(r'([.!?,])', text)
25
-
26
- for i in range(0, len(raw_segments) - 1, 2):
27
- sentence = raw_segments[i].strip() + raw_segments[i + 1]
28
- words = sentence.split()
29
-
30
- if len(words) <= 8:
31
- segments.append(sentence.strip())
32
- else:
33
- chunk = ""
34
- for word in words:
35
- if len(chunk.split()) < 8:
36
- chunk += " " + word
37
- else:
38
- segments.append(chunk.strip())
39
- chunk = word
40
- if chunk:
41
- segments.append(chunk.strip())
42
-
43
- if len(raw_segments) % 2 == 1:
44
- remaining_text = raw_segments[-1].strip()
45
- if remaining_text:
46
- segments.append(remaining_text)
47
-
48
- return segments
49
-
50
- # Function to generate SRT with millisecond accuracy per batch
51
- async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
52
- audio_file = f"batch_{batch_num}_audio.wav"
53
-
54
- tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=pitch)
55
- await tts.save(audio_file)
56
-
57
- actual_length = get_audio_length(audio_file) * 1000 # Convert to milliseconds
58
-
59
- segments = split_text_into_segments(batch_text)
60
- segment_duration = actual_length / len(segments)
61
- start_time = start_offset
62
-
63
- srt_content = ""
64
- for index, segment in enumerate(segments):
65
- end_time = start_time + segment_duration
66
-
67
- if end_time > start_offset + actual_length:
68
- end_time = start_offset + actual_length
69
-
70
- srt_content += f"{index + 1 + (batch_num * 100)}\n"
71
- srt_content += f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
72
- srt_content += segment + "\n\n"
73
-
74
- start_time = end_time
75
-
76
- return srt_content, audio_file, start_time
77
-
78
- # Batch processing function with millisecond accuracy
79
- async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
80
- batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
81
- all_srt_content = ""
82
- combined_audio = AudioSegment.empty()
83
- start_offset = 0.0
84
-
85
- for batch_num, batch_text in enumerate(batches):
86
- srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice)
87
- all_srt_content += srt_content
88
-
89
- batch_audio = AudioSegment.from_file(audio_file)
90
- combined_audio += batch_audio
91
- start_offset = end_offset
92
-
93
- os.remove(audio_file)
94
- progress((batch_num + 1) / len(batches))
95
 
96
- total_audio_length = combined_audio.duration_seconds
97
- validated_srt_content = ""
98
- for line in all_srt_content.strip().splitlines():
99
- if '-->' in line:
100
- start_str, end_str = line.split(' --> ')
101
- start_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], start_str.replace(',', ':').split(':')))
102
- end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
103
- if end_time > total_audio_length:
104
- end_time = total_audio_length
105
- line = f"{format_time_ms(start_time * 1000)} --> {format_time_ms(end_time * 1000)}"
106
- validated_srt_content += line + "\n"
107
-
108
- unique_id = uuid.uuid4()
109
- final_audio_path = f"final_audio_{unique_id}.mp3"
110
- final_srt_path = f"final_subtitles_{unique_id}.srt"
111
-
112
- combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
113
-
114
- with open(final_srt_path, "w") as srt_file:
115
- srt_file.write(validated_srt_content)
116
-
117
- return final_srt_path, final_audio_path
118
-
119
- # Gradio interface function
120
- async def process_script(script_text, pitch, rate, voice):
121
- pitch_str = f"{pitch}Hz" if pitch != 0 else "-1Hz"
122
- formatted_rate = f"{'+' if rate > 1 else ''}{int(rate)}%"
123
- srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch_str, formatted_rate, voice_options[voice])
124
- return srt_path, audio_path, audio_path
125
 
126
- # Gradio interface setup
127
- voice_options = {
128
- "Andrew Male": "en-US-AndrewNeural",
129
- "Jenny Female": "en-US-JennyNeural",
130
- "Guy Male": "en-US-GuyNeural",
131
- "Ana Female": "en-US-AnaNeural",
132
- "Aria Female": "en-US-AriaNeural",
133
- "Brian Male": "en-US-BrianNeural",
134
- "Christopher Male": "en-US-ChristopherNeural",
135
- "Eric Male": "en-US-EricNeural",
136
- "Michelle Male": "en-US-MichelleNeural",
137
- "Roger Male": "en-US-RogerNeural",
138
- "Natasha Female": "en-AU-NatashaNeural",
139
- "William Male": "en-AU-WilliamNeural",
140
- "Clara Female": "en-CA-ClaraNeural",
141
- "Liam Female ": "en-CA-LiamNeural",
142
- "Libby Female": "en-GB-LibbyNeural",
 
 
 
143
  "Maisie": "en-GB-MaisieNeural",
144
  "Ryan": "en-GB-RyanNeural",
145
  "Sonia": "en-GB-SoniaNeural",
@@ -163,25 +46,448 @@ voice_options = {
163
  "Imani": "en-TZ-ImaniNeural",
164
  "Leah": "en-ZA-LeahNeural",
165
  "Luke": "en-ZA-LukeNeural"
166
- # Add other voices here...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  }
168
 
169
- app = gr.Interface(
170
- fn=process_script,
171
- inputs=[
172
- gr.Textbox(label="Enter Script Text", lines=10),
173
- gr.Slider(label="Pitch Adjustment (Hz)", minimum=-20, maximum=20, value=0, step=1),
174
- gr.Slider(label="Rate Adjustment (%)", minimum=-50, maximum=50, value=-1, step=1),
175
- gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Andrew Male"),
176
- ],
177
- outputs=[
178
- gr.File(label="Download SRT File"),
179
- gr.File(label="Download Audio File"),
180
- gr.Audio(label="Audio Playback")
181
- ],
182
- title="WritooAI Pro Text-to-Speech with Subtitle",
183
- description="Convert your script into Audio with Auto generated Subtitles.",
184
- theme="compact",
185
- )
186
 
187
- app.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ import edge_tts
4
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ language_dict = {
7
+ "Hindi": {
8
+ "Madhur": "hi-IN-MadhurNeural",
9
+ "Swara": "hi-IN-SwaraNeural"
10
+ },
11
+ "English": {
12
+ "Jenny": "en-US-JennyNeural",
13
+ "Guy": "en-US-GuyNeural",
14
+ "Ana": "en-US-AnaNeural",
15
+ "Aria": "en-US-AriaNeural",
16
+ "Brian": "en-US-BrianNeural",
17
+ "Christopher": "en-US-ChristopherNeural",
18
+ "Eric": "en-US-EricNeural",
19
+ "Michelle": "en-US-MichelleNeural",
20
+ "Roger": "en-US-RogerNeural",
21
+ "Natasha": "en-AU-NatashaNeural",
22
+ "William": "en-AU-WilliamNeural",
23
+ "Clara": "en-CA-ClaraNeural",
24
+ "Liam": "en-CA-LiamNeural",
25
+ "Libby": "en-GB-LibbyNeural",
26
  "Maisie": "en-GB-MaisieNeural",
27
  "Ryan": "en-GB-RyanNeural",
28
  "Sonia": "en-GB-SoniaNeural",
 
46
  "Imani": "en-TZ-ImaniNeural",
47
  "Leah": "en-ZA-LeahNeural",
48
  "Luke": "en-ZA-LukeNeural"
49
+ },
50
+ "Spanish": {
51
+ "Elena": "es-AR-ElenaNeural",
52
+ "Tomas": "es-AR-TomasNeural",
53
+ "Marcelo": "es-BO-MarceloNeural",
54
+ "Sofia": "es-BO-SofiaNeural",
55
+ "Gonzalo": "es-CO-GonzaloNeural",
56
+ "Salome": "es-CO-SalomeNeural",
57
+ "Juan": "es-CR-JuanNeural",
58
+ "Maria": "es-CR-MariaNeural",
59
+ "Belkys": "es-CU-BelkysNeural",
60
+ "Emilio": "es-DO-EmilioNeural",
61
+ "Ramona": "es-DO-RamonaNeural",
62
+ "Andrea": "es-EC-AndreaNeural",
63
+ "Luis": "es-EC-LuisNeural",
64
+ "Alvaro": "es-ES-AlvaroNeural",
65
+ "Elvira": "es-ES-ElviraNeural",
66
+ "Teresa": "es-GQ-TeresaNeural",
67
+ "Andres": "es-GT-AndresNeural",
68
+ "Marta": "es-GT-MartaNeural",
69
+ "Carlos": "es-HN-CarlosNeural",
70
+ "Karla": "es-HN-KarlaNeural",
71
+ "Federico": "es-NI-FedericoNeural",
72
+ "Yolanda": "es-NI-YolandaNeural",
73
+ "Margarita": "es-PA-MargaritaNeural",
74
+ "Roberto": "es-PA-RobertoNeural",
75
+ "Alex": "es-PE-AlexNeural",
76
+ "Camila": "es-PE-CamilaNeural",
77
+ "Karina": "es-PR-KarinaNeural",
78
+ "Victor": "es-PR-VictorNeural",
79
+ "Mario": "es-PY-MarioNeural",
80
+ "Tania": "es-PY-TaniaNeural",
81
+ "Lorena": "es-SV-LorenaNeural",
82
+ "Rodrigo": "es-SV-RodrigoNeural",
83
+ "Alonso": "es-US-AlonsoNeural",
84
+ "Paloma": "es-US-PalomaNeural",
85
+ "Mateo": "es-UY-MateoNeural",
86
+ "Valentina": "es-UY-ValentinaNeural",
87
+ "Paola": "es-VE-PaolaNeural",
88
+ "Sebastian": "es-VE-SebastianNeural"
89
+ },
90
+ "Arabic": {
91
+ "Hamed": "ar-SA-HamedNeural",
92
+ "Zariyah": "ar-SA-ZariyahNeural",
93
+ "Fatima": "ar-AE-FatimaNeural",
94
+ "Hamdan": "ar-AE-HamdanNeural",
95
+ "Ali": "ar-BH-AliNeural",
96
+ "Laila": "ar-BH-LailaNeural",
97
+ "Ismael": "ar-DZ-IsmaelNeural",
98
+ "Salma": "ar-EG-SalmaNeural",
99
+ "Shakir": "ar-EG-ShakirNeural",
100
+ "Bassel": "ar-IQ-BasselNeural",
101
+ "Rana": "ar-IQ-RanaNeural",
102
+ "Sana": "ar-JO-SanaNeural",
103
+ "Taim": "ar-JO-TaimNeural",
104
+ "Fahed": "ar-KW-FahedNeural",
105
+ "Noura": "ar-KW-NouraNeural",
106
+ "Layla": "ar-LB-LaylaNeural",
107
+ "Rami": "ar-LB-RamiNeural",
108
+ "Iman": "ar-LY-ImanNeural",
109
+ "Omar": "ar-LY-OmarNeural",
110
+ "Jamal": "ar-MA-JamalNeural",
111
+ "Mouna": "ar-MA-MounaNeural",
112
+ "Abdullah": "ar-OM-AbdullahNeural",
113
+ "Aysha": "ar-OM-AyshaNeural",
114
+ "Amal": "ar-QA-AmalNeural",
115
+ "Moaz": "ar-QA-MoazNeural",
116
+ "Amany": "ar-SY-AmanyNeural",
117
+ "Laith": "ar-SY-LaithNeural",
118
+ "Hedi": "ar-TN-HediNeural",
119
+ "Reem": "ar-TN-ReemNeural",
120
+ "Maryam": "ar-YE-MaryamNeural",
121
+ "Saleh": "ar-YE-SalehNeural"
122
+ },
123
+ "Korean": {
124
+ "Sun-Hi": "ko-KR-SunHiNeural",
125
+ "InJoon": "ko-KR-InJoonNeural"
126
+ },
127
+ "Thai": {
128
+ "Premwadee": "th-TH-PremwadeeNeural",
129
+ "Niwat": "th-TH-NiwatNeural"
130
+ },
131
+ "Vietnamese": {
132
+ "HoaiMy": "vi-VN-HoaiMyNeural",
133
+ "NamMinh": "vi-VN-NamMinhNeural"
134
+ },
135
+ "Japanese": {
136
+ "Nanami": "ja-JP-NanamiNeural",
137
+ "Keita": "ja-JP-KeitaNeural"
138
+ },
139
+ "French": {
140
+ "Denise": "fr-FR-DeniseNeural",
141
+ "Eloise": "fr-FR-EloiseNeural",
142
+ "Henri": "fr-FR-HenriNeural",
143
+ "Sylvie": "fr-CA-SylvieNeural",
144
+ "Antoine": "fr-CA-AntoineNeural",
145
+ "Jean": "fr-CA-JeanNeural",
146
+ "Ariane": "fr-CH-ArianeNeural",
147
+ "Fabrice": "fr-CH-FabriceNeural",
148
+ "Charline": "fr-BE-CharlineNeural",
149
+ "Gerard": "fr-BE-GerardNeural"
150
+ },
151
+ "Portuguese": {
152
+ "Francisca": "pt-BR-FranciscaNeural",
153
+ "Antonio": "pt-BR-AntonioNeural",
154
+ "Duarte": "pt-PT-DuarteNeural",
155
+ "Raquel": "pt-PT-RaquelNeural"
156
+ },
157
+ "Indonesian": {
158
+ "Ardi": "id-ID-ArdiNeural",
159
+ "Gadis": "id-ID-GadisNeural"
160
+ },
161
+ "Hebrew": {
162
+ "Avri": "he-IL-AvriNeural",
163
+ "Hila": "he-IL-HilaNeural"
164
+ },
165
+ "Italian": {
166
+ "Isabella": "it-IT-IsabellaNeural",
167
+ "Diego": "it-IT-DiegoNeural",
168
+ "Elsa": "it-IT-ElsaNeural"
169
+ },
170
+ "Dutch": {
171
+ "Colette": "nl-NL-ColetteNeural",
172
+ "Fenna": "nl-NL-FennaNeural",
173
+ "Maarten": "nl-NL-MaartenNeural",
174
+ "Arnaud": "nl-BE-ArnaudNeural",
175
+ "Dena": "nl-BE-DenaNeural"
176
+ },
177
+ "Malay": {
178
+ "Osman": "ms-MY-OsmanNeural",
179
+ "Yasmin": "ms-MY-YasminNeural"
180
+ },
181
+ "Norwegian": {
182
+ "Pernille": "nb-NO-PernilleNeural",
183
+ "Finn": "nb-NO-FinnNeural"
184
+ },
185
+ "Swedish": {
186
+ "Sofie": "sv-SE-SofieNeural",
187
+ "Mattias": "sv-SE-MattiasNeural"
188
+ },
189
+ "Greek": {
190
+ "Athina": "el-GR-AthinaNeural",
191
+ "Nestoras": "el-GR-NestorasNeural"
192
+ },
193
+ "German": {
194
+ "Katja": "de-DE-KatjaNeural",
195
+ "Amala": "de-DE-AmalaNeural",
196
+ "Conrad": "de-DE-ConradNeural",
197
+ "Killian": "de-DE-KillianNeural",
198
+ "Ingrid": "de-AT-IngridNeural",
199
+ "Jonas": "de-AT-JonasNeural",
200
+ "Jan": "de-CH-JanNeural",
201
+ "Leni": "de-CH-LeniNeural"
202
+ },
203
+ "Afrikaans": {
204
+ "Adri": "af-ZA-AdriNeural",
205
+ "Willem": "af-ZA-WillemNeural"
206
+ },
207
+ "Amharic": {
208
+ "Ameha": "am-ET-AmehaNeural",
209
+ "Mekdes": "am-ET-MekdesNeural"
210
+ },
211
+ "Azerbaijani": {
212
+ "Babek": "az-AZ-BabekNeural",
213
+ "Banu": "az-AZ-BanuNeural"
214
+ },
215
+ "Bulgarian": {
216
+ "Borislav": "bg-BG-BorislavNeural",
217
+ "Kalina": "bg-BG-KalinaNeural"
218
+ },
219
+ "Bengali": {
220
+ "Nabanita": "bn-BD-NabanitaNeural",
221
+ "Pradeep": "bn-BD-PradeepNeural",
222
+ "Bashkar": "bn-IN-BashkarNeural",
223
+ "Tanishaa": "bn-IN-TanishaaNeural"
224
+ },
225
+ "Bosnian": {
226
+ "Goran": "bs-BA-GoranNeural",
227
+ "Vesna": "bs-BA-VesnaNeural"
228
+ },
229
+ "Catalan": {
230
+ "Joana": "ca-ES-JoanaNeural",
231
+ "Enric": "ca-ES-EnricNeural"
232
+ },
233
+ "Czech": {
234
+ "Antonin": "cs-CZ-AntoninNeural",
235
+ "Vlasta": "cs-CZ-VlastaNeural"
236
+ },
237
+ "Welsh": {
238
+ "Aled": "cy-GB-AledNeural",
239
+ "Nia": "cy-GB-NiaNeural"
240
+ },
241
+ "Danish": {
242
+ "Christel": "da-DK-ChristelNeural",
243
+ "Jeppe": "da-DK-JeppeNeural"
244
+ },
245
+ "Estonian": {
246
+ "Anu": "et-EE-AnuNeural",
247
+ "Kert": "et-EE-KertNeural"
248
+ },
249
+ "Persian": {
250
+ "Dilara": "fa-IR-DilaraNeural",
251
+ "Farid": "fa-IR-FaridNeural"
252
+ },
253
+ "Finnish": {
254
+ "Harri": "fi-FI-HarriNeural",
255
+ "Noora": "fi-FI-NooraNeural"
256
+ },
257
+ "Irish": {
258
+ "Colm": "ga-IE-ColmNeural",
259
+ "Orla": "ga-IE-OrlaNeural"
260
+ },
261
+ "Galician": {
262
+ "Roi": "gl-ES-RoiNeural",
263
+ "Sabela": "gl-ES-SabelaNeural"
264
+ },
265
+ "Gujarati": {
266
+ "Dhwani": "gu-IN-DhwaniNeural",
267
+ "Niranjan": "gu-IN-NiranjanNeural"
268
+ },
269
+ "Croatian": {
270
+ "Gabrijela": "hr-HR-GabrijelaNeural",
271
+ "Srecko": "hr-HR-SreckoNeural"
272
+ },
273
+ "Hungarian": {
274
+ "Noemi": "hu-HU-NoemiNeural",
275
+ "Tamas": "hu-HU-TamasNeural"
276
+ },
277
+ "Icelandic": {
278
+ "Gudrun": "is-IS-GudrunNeural",
279
+ "Gunnar": "is-IS-GunnarNeural"
280
+ },
281
+ "Javanese": {
282
+ "Dimas": "jv-ID-DimasNeural",
283
+ "Siti": "jv-ID-SitiNeural"
284
+ },
285
+ "Georgian": {
286
+ "Eka": "ka-GE-EkaNeural",
287
+ "Giorgi": "ka-GE-GiorgiNeural"
288
+ },
289
+ "Kazakh": {
290
+ "Aigul": "kk-KZ-AigulNeural",
291
+ "Daulet": "kk-KZ-DauletNeural"
292
+ },
293
+ "Khmer": {
294
+ "Piseth": "km-KH-PisethNeural",
295
+ "Sreymom": "km-KH-SreymomNeural"
296
+ },
297
+ "Kannada": {
298
+ "Gagan": "kn-IN-GaganNeural",
299
+ "Sapna": "kn-IN-SapnaNeural"
300
+ },
301
+ "Lao": {
302
+ "Chanthavong": "lo-LA-ChanthavongNeural",
303
+ "Keomany": "lo-LA-KeomanyNeural"
304
+ },
305
+ "Lithuanian": {
306
+ "Leonas": "lt-LT-LeonasNeural",
307
+ "Ona": "lt-LT-OnaNeural"
308
+ },
309
+ "Latvian": {
310
+ "Everita": "lv-LV-EveritaNeural",
311
+ "Nils": "lv-LV-NilsNeural"
312
+ },
313
+ "Macedonian": {
314
+ "Aleksandar": "mk-MK-AleksandarNeural",
315
+ "Marija": "mk-MK-MarijaNeural"
316
+ },
317
+ "Malayalam": {
318
+ "Midhun": "ml-IN-MidhunNeural",
319
+ "Sobhana": "ml-IN-SobhanaNeural"
320
+ },
321
+ "Mongolian": {
322
+ "Bataa": "mn-MN-BataaNeural",
323
+ "Yesui": "mn-MN-YesuiNeural"
324
+ },
325
+ "Marathi": {
326
+ "Aarohi": "mr-IN-AarohiNeural",
327
+ "Manohar": "mr-IN-ManoharNeural"
328
+ },
329
+ "Maltese": {
330
+ "Grace": "mt-MT-GraceNeural",
331
+ "Joseph": "mt-MT-JosephNeural"
332
+ },
333
+ "Burmese": {
334
+ "Nilar": "my-MM-NilarNeural",
335
+ "Thiha": "my-MM-ThihaNeural"
336
+ },
337
+ "Nepali": {
338
+ "Hemkala": "ne-NP-HemkalaNeural",
339
+ "Sagar": "ne-NP-SagarNeural"
340
+ },
341
+ "Polish": {
342
+ "Marek": "pl-PL-MarekNeural",
343
+ "Zofia": "pl-PL-ZofiaNeural"
344
+ },
345
+ "Pashto": {
346
+ "Gul Nawaz": "ps-AF-GulNawazNeural",
347
+ "Latifa": "ps-AF-LatifaNeural"
348
+ },
349
+ "Romanian": {
350
+ "Alina": "ro-RO-AlinaNeural",
351
+ "Emil": "ro-RO-EmilNeural"
352
+ },
353
+ "Russian": {
354
+ "Svetlana": "ru-RU-SvetlanaNeural",
355
+ "Dmitry": "ru-RU-DmitryNeural"
356
+ },
357
+ "Sinhala": {
358
+ "Sameera": "si-LK-SameeraNeural",
359
+ "Thilini": "si-LK-ThiliniNeural"
360
+ },
361
+ "Slovak": {
362
+ "Lukas": "sk-SK-LukasNeural",
363
+ "Viktoria": "sk-SK-ViktoriaNeural"
364
+ },
365
+ "Slovenian": {
366
+ "Petra": "sl-SI-PetraNeural",
367
+ "Rok": "sl-SI-RokNeural"
368
+ },
369
+ "Somali": {
370
+ "Muuse": "so-SO-MuuseNeural",
371
+ "Ubax": "so-SO-UbaxNeural"
372
+ },
373
+ "Albanian": {
374
+ "Anila": "sq-AL-AnilaNeural",
375
+ "Ilir": "sq-AL-IlirNeural"
376
+ },
377
+ "Serbian": {
378
+ "Nicholas": "sr-RS-NicholasNeural",
379
+ "Sophie": "sr-RS-SophieNeural"
380
+ },
381
+ "Sundanese": {
382
+ "Jajang": "su-ID-JajangNeural",
383
+ "Tuti": "su-ID-TutiNeural"
384
+ },
385
+ "Swahili": {
386
+ "Rafiki": "sw-KE-RafikiNeural",
387
+ "Zuri": "sw-KE-ZuriNeural",
388
+ "Daudi": "sw-TZ-DaudiNeural",
389
+ "Rehema": "sw-TZ-RehemaNeural"
390
+ },
391
+ "Tamil": {
392
+ "Pallavi": "ta-IN-PallaviNeural",
393
+ "Valluvar": "ta-IN-ValluvarNeural",
394
+ "Kumar": "ta-LK-KumarNeural",
395
+ "Saranya": "ta-LK-SaranyaNeural",
396
+ "Kani": "ta-MY-KaniNeural",
397
+ "Surya": "ta-MY-SuryaNeural",
398
+ "Anbu": "ta-SG-AnbuNeural"
399
+ },
400
+ "Telugu": {
401
+ "Mohan": "te-IN-MohanNeural",
402
+ "Shruti": "te-IN-ShrutiNeural"
403
+ },
404
+ "Turkish": {
405
+ "Ahmet": "tr-TR-AhmetNeural",
406
+ "Emel": "tr-TR-EmelNeural"
407
+ },
408
+ "Ukrainian": {
409
+ "Ostap": "uk-UA-OstapNeural",
410
+ "Polina": "uk-UA-PolinaNeural"
411
+ },
412
+ "Urdu": {
413
+ "Gul": "ur-IN-GulNeural",
414
+ "Salman": "ur-IN-SalmanNeural",
415
+ "Asad": "ur-PK-AsadNeural",
416
+ "Uzma": "ur-PK-UzmaNeural"
417
+ },
418
+ "Uzbek": {
419
+ "Madina": "uz-UZ-MadinaNeural",
420
+ "Sardor": "uz-UZ-SardorNeural"
421
+ },
422
+ "Mandarin": {
423
+ "Xiaoxiao": "zh-CN-XiaoxiaoNeural",
424
+ "Yunyang": "zh-CN-YunyangNeural",
425
+ "Yunxi": "zh-CN-YunxiNeural",
426
+ "Xiaoyi": "zh-CN-XiaoyiNeural",
427
+ "Yunjian": "zh-CN-YunjianNeural",
428
+ "Yunxia": "zh-CN-YunxiaNeural",
429
+ "Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
430
+ "Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
431
+ "HiuMaan": "zh-HK-HiuMaanNeural",
432
+ "HiuGaai": "zh-HK-HiuGaaiNeural",
433
+ "WanLung": "zh-HK-WanLungNeural",
434
+ "HsiaoChen": "zh-TW-HsiaoChenNeural",
435
+ "HsiaoYu": "zh-TW-HsiaoYuNeural",
436
+ "YunJhe": "zh-TW-YunJheNeural"
437
+ },
438
+ "Zulu": {
439
+ "Thando": "zu-ZA-ThandoNeural",
440
+ "Themba": "zu-ZA-ThembaNeural"
441
+ }
442
  }
443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
+ async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False):
446
+ # Define the character limit
447
+ char_limit = 100000000
448
+ if len(text) > char_limit:
449
+ return f"Error: Use 150,000 Words at a time. ", None
450
+
451
+ # Get the voice for the selected language and speaker
452
+ voice = language_dict[language_code][speaker]
453
+ communicate = edge_tts.Communicate(text, voice)
454
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
455
+ tmp_path = tmp_file.name
456
+ await communicate.save(tmp_path)
457
+
458
+ return text, tmp_path
459
+
460
+
461
+
462
+ def get_speakers(language):
463
+ print(language)
464
+ speakers = list(language_dict[language].keys())
465
+ return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
466
+
467
+
468
+ default_language = None
469
+ default_speaker = None
470
+ with gr.Blocks(title="Writoo AI V2") as demo:
471
+ gr.HTML(" ")
472
+ gr.HTML(f"<h3 style='color:Tomato;'🎶 Exciting News: 10 More Voice Added 🎶 </h3>")
473
+
474
+ gr.Markdown("✨ Features: • Convert text to speech in seconds 😍")
475
+ with gr.Row():
476
+ with gr.Column():
477
+ input_text = gr.Textbox(lines=5, label="Input Text", placeholder="Enter text to convert to speech")
478
+ language = gr.Dropdown(
479
+ choices=list(language_dict.keys()), value=default_language, label="Languages", interactive=True
480
+ )
481
+ speaker = gr.Dropdown(choices=[], value=default_speaker, label="Speakers", interactive=False)
482
+ tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False)
483
+ run_btn = gr.Button(value="Generate Audio", variant="primary")
484
+
485
+ with gr.Column():
486
+ output_text = gr.Textbox(label="Output Text")
487
+ output_audio = gr.Audio(type="filepath", label="Audio Output")
488
+
489
+ language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox])
490
+ run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox], outputs=[output_text, output_audio])
491
+
492
+ if __name__ == "__main__":
493
+ demo.queue().launch(share=False)