RafaG commited on
Commit
69966fe
·
verified ·
1 Parent(s): 7fc52c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +195 -123
app.py CHANGED
@@ -3,43 +3,42 @@ import os
3
  import json
4
  import gradio as gr
5
  from pydub import AudioSegment
 
6
  from header import badges, description
7
  from pydub.silence import split_on_silence
8
  from get_voices import get_voices
9
- #from adjust import remove_silence, controlador_generate_audio, generate_audio
 
 
 
 
 
 
10
 
11
- # Load voices from JSON file
12
  def load_voices():
13
  with open('voices.json', 'r', encoding='utf-8') as f:
14
  return json.load(f)
15
 
16
- # Get formatted voice options for specific language
17
  def get_voice_options(language, voices_data):
18
  if language in voices_data:
19
  return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
20
  return []
21
 
22
- # Extract voice name from formatted string
23
  def extract_voice_name(formatted_voice):
24
  return formatted_voice.split(" | ")[0]
25
 
26
  def update_voice_options(language):
27
  voices_data = load_voices()
28
  voice_options = get_voice_options(language, voices_data)
29
- # Retorna apenas a lista de opções e o primeiro valor
30
  if voice_options:
31
  return gr.Dropdown(choices=voice_options, value=voice_options[0])
32
  return gr.Dropdown(choices=[], value=None)
33
 
34
  def update_voices_and_refresh():
35
- # Execute get_voices to update the voices.json file
36
  get_voices()
37
- # Reload the voices data
38
  voices_data = load_voices()
39
  available_languages = list(voices_data.keys())
40
- # Get initial voices for the first language
41
  initial_voices = get_voice_options(available_languages[0], voices_data) if available_languages else []
42
-
43
  return (
44
  gr.Dropdown(choices=available_languages, value=available_languages[0] if available_languages else None),
45
  gr.Dropdown(choices=initial_voices, value=initial_voices[0] if initial_voices else None)
@@ -47,52 +46,29 @@ def update_voices_and_refresh():
47
 
48
  def remove_silence(input_file, output_file):
49
  audio = AudioSegment.from_wav(input_file)
50
-
51
- # Encontra os segmentos de áudio que não são silêncio
52
  segments = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
53
-
54
- # Concatena os segmentos de áudio não silenciosos
55
  non_silent_audio = AudioSegment.silent(duration=0)
56
  for segment in segments:
57
  non_silent_audio += segment
58
-
59
- # Salva o áudio sem as partes de silêncio
60
  non_silent_audio.export(output_file, format="wav")
61
 
62
  def controlador_generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input, checkbox_cortar_silencio):
63
- # Gerar áudio
64
  audio_file = generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input)
65
  if audio_file:
66
  print("Áudio gerado com sucesso:", audio_file)
67
- # Verificar se o checkbox de cortar silêncio está marcado
68
  if checkbox_cortar_silencio:
69
  print("Cortando silêncio...")
70
- # Remover silêncio do áudio
71
  remove_silence(audio_file, audio_file)
72
  print("Silêncio removido com sucesso!")
73
  else:
74
  print("Erro ao gerar áudio.")
75
- return audio_file # Retornar o caminho do arquivo de áudio
76
 
77
  def generate_audio(texto, modelo_de_voz, velocidade, tom, volume):
78
- # Extract actual voice name from formatted string if necessary
79
  actual_voice = extract_voice_name(modelo_de_voz)
80
-
81
- # Format parameters with proper signs
82
- if velocidade >= 0:
83
- rate_str = f"+{velocidade}%"
84
- else:
85
- rate_str = f"{velocidade}%"
86
-
87
- if tom >= 0:
88
- pitch_str = f"+{tom}Hz"
89
- else:
90
- pitch_str = f"{tom}Hz"
91
-
92
- if volume >= 0:
93
- volume_str = f"+{volume}%"
94
- else:
95
- volume_str = f"{volume}%"
96
 
97
  output_dir = "output"
98
  os.makedirs(output_dir, exist_ok=True)
@@ -119,10 +95,7 @@ def generate_audio(texto, modelo_de_voz, velocidade, tom, volume):
119
  return output_file
120
 
121
  def generate_audio_from_file(file_path, modelo_de_voz, velocidade, tom, volume):
122
- # Extrai o nome real da voz formatada, se necessário
123
  actual_voice = extract_voice_name(modelo_de_voz)
124
-
125
- # Formatação dos parâmetros com sinais adequados
126
  rate_str = f"+{velocidade}%" if velocidade >= 0 else f"{velocidade}%"
127
  pitch_str = f"+{tom}Hz" if tom >= 0 else f"{tom}Hz"
128
  volume_str = f"+{volume}%" if volume >= 0 else f"{volume}%"
@@ -131,10 +104,9 @@ def generate_audio_from_file(file_path, modelo_de_voz, velocidade, tom, volume):
131
  os.makedirs(output_dir, exist_ok=True)
132
  output_file = os.path.join(output_dir, "new_audio.mp3")
133
 
134
- # Usar -f FILE para passar o caminho do arquivo de texto
135
  cmd = [
136
  "edge-tts",
137
- "-f", file_path, # Certificar que o conteúdo do arquivo seja texto puro
138
  "--rate=" + rate_str,
139
  "--pitch=" + pitch_str,
140
  "--volume=" + volume_str,
@@ -156,10 +128,7 @@ def controlador_generate_audio_from_file(file, voice_model_input, speed_input, p
156
  if file is None:
157
  return None
158
 
159
- # Neste caso, o 'file' já é o caminho do arquivo, então não precisa reescrever
160
- temp_file_path = file # Caminho do arquivo que você recebe do Gradio
161
-
162
- # Gerar o áudio
163
  audio_file = generate_audio_from_file(temp_file_path, voice_model_input, speed_input, pitch_input, volume_input)
164
 
165
  if audio_file:
@@ -173,6 +142,131 @@ def controlador_generate_audio_from_file(file, voice_model_input, speed_input, p
173
 
174
  return audio_file
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"), title="QuickTTS") as iface:
177
  gr.Markdown(badges)
178
  gr.Markdown(description)
@@ -182,17 +276,14 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
182
 
183
  with gr.Tabs():
184
  with gr.TabItem("Edge-TTS"):
185
- gr.Markdown("É ilimitado, podendo até mesmo colocar um livro inteiro, mas claro, tem a questão de tempo, quanto maior o texto, mais demorado é, dublagem por SRT talvez um dia eu bote.")
186
 
187
  with gr.Row():
188
- # Language selection dropdown
189
  language_input = gr.Dropdown(
190
  choices=available_languages,
191
  label="Idioma",
192
  value=available_languages[52] if available_languages else None
193
  )
194
-
195
- # Voice model dropdown (will be updated based on language selection)
196
  initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
197
  voice_model_input = gr.Dropdown(
198
  choices=initial_voices,
@@ -200,7 +291,6 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
200
  value=initial_voices[0] if initial_voices else None
201
  )
202
 
203
- # Connect language selection to voice model update
204
  language_input.change(
205
  fn=update_voice_options,
206
  inputs=[language_input],
@@ -211,29 +301,11 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
211
 
212
  with gr.Row():
213
  with gr.Column():
214
- speed_input = gr.Slider(
215
- minimum=-200,
216
- maximum=200,
217
- label="Velocidade (%)",
218
- value=0,
219
- interactive=True
220
- )
221
  with gr.Column():
222
- pitch_input = gr.Slider(
223
- minimum=-100,
224
- maximum=100,
225
- label="Tom (Hz)",
226
- value=0,
227
- interactive=True
228
- )
229
  with gr.Column():
230
- volume_input = gr.Slider(
231
- minimum=-99,
232
- maximum=100,
233
- label="Volume (%)",
234
- value=0,
235
- interactive=True
236
- )
237
 
238
  checkbox_cortar_silencio = gr.Checkbox(label="Cortar Silencio", interactive=True)
239
  audio_output = gr.Audio(label="Resultado", type="filepath", interactive=False)
@@ -242,39 +314,27 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
242
  edgetts_button = gr.Button(value="Falar")
243
  edgetts_button.click(
244
  controlador_generate_audio,
245
- inputs=[
246
- audio_input,
247
- voice_model_input,
248
- speed_input,
249
- pitch_input, # New input
250
- volume_input, # New input
251
- checkbox_cortar_silencio
252
- ],
253
  outputs=[audio_output]
254
  )
255
-
256
  clear_button = gr.ClearButton(audio_input, value='Limpar')
257
 
258
- # Add update voices button at the top
259
  update_voices_btn = gr.Button(value="Atualizar Lista de Vozes")
260
- # Connect update voices button to refresh function
261
  update_voices_btn.click(
262
  fn=update_voices_and_refresh,
263
  inputs=[],
264
  outputs=[language_input, voice_model_input]
265
  )
266
  gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
267
-
268
  with gr.TabItem("Lote (Arquivo txt)"):
269
- gr.Markdown("Carregar texto de um arquivo")
270
- # Language and voice selection (same as first tab)
271
  with gr.Row():
272
  language_input_file = gr.Dropdown(
273
  choices=available_languages,
274
  label="Idioma",
275
  value=available_languages[52] if available_languages else None
276
  )
277
-
278
  initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
279
  voice_model_input_file = gr.Dropdown(
280
  choices=initial_voices,
@@ -287,39 +347,16 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
287
  inputs=[language_input_file],
288
  outputs=[voice_model_input_file]
289
  )
290
- gr.Markdown("O programa vai ler linha por linha e entregar em um único áudio")
291
- # File input
292
- file_input = gr.File(
293
- label="Arquivo de Texto",
294
- file_types=[".txt"],
295
- type="filepath"
296
- )
297
 
298
  with gr.Row():
299
  with gr.Column():
300
- speed_input_file = gr.Slider(
301
- minimum=-200,
302
- maximum=200,
303
- label="Velocidade (%)",
304
- value=0,
305
- interactive=True
306
- )
307
  with gr.Column():
308
- pitch_input_file = gr.Slider(
309
- minimum=-100,
310
- maximum=100,
311
- label="Tom (Hz)",
312
- value=0,
313
- interactive=True
314
- )
315
  with gr.Column():
316
- volume_input_file = gr.Slider(
317
- minimum=-99,
318
- maximum=100,
319
- label="Volume (%)",
320
- value=0,
321
- interactive=True
322
- )
323
 
324
  checkbox_cortar_silencio_file = gr.Checkbox(label="Cortar Silencio", interactive=True)
325
  audio_output_file = gr.Audio(label="Resultado", type="filepath", interactive=False)
@@ -327,23 +364,58 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"
327
  edgetts_button_file = gr.Button(value="Falar")
328
  edgetts_button_file.click(
329
  controlador_generate_audio_from_file,
330
- inputs=[
331
- file_input,
332
- voice_model_input_file,
333
- speed_input_file,
334
- pitch_input_file,
335
- volume_input_file,
336
- checkbox_cortar_silencio_file
337
- ],
338
  outputs=[audio_output_file]
339
  )
340
-
341
  clear_button_file = gr.ClearButton(file_input, value='Limpar')
342
 
343
  gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  gr.Markdown("""
346
  Desenvolvido por Rafael Godoy <br>
347
  Apoie o projeto pelo https://nubank.com.br/pagar/1ls6a4/0QpSSbWBSq, qualquer valor é bem vindo.
 
 
348
  """)
349
  iface.launch()
 
3
  import json
4
  import gradio as gr
5
  from pydub import AudioSegment
6
+ from pydub.playback import play
7
  from header import badges, description
8
  from pydub.silence import split_on_silence
9
  from get_voices import get_voices
10
+ import asyncio
11
+ from pathlib import Path
12
+ import pysrt
13
+ from tqdm import tqdm
14
+ import shutil
15
+
16
+ srt_temp_deleta = True
17
 
 
18
  def load_voices():
19
  with open('voices.json', 'r', encoding='utf-8') as f:
20
  return json.load(f)
21
 
 
22
  def get_voice_options(language, voices_data):
23
  if language in voices_data:
24
  return [f"{voice['name']} | {voice['gender']}" for voice in voices_data[language]]
25
  return []
26
 
 
27
  def extract_voice_name(formatted_voice):
28
  return formatted_voice.split(" | ")[0]
29
 
30
  def update_voice_options(language):
31
  voices_data = load_voices()
32
  voice_options = get_voice_options(language, voices_data)
 
33
  if voice_options:
34
  return gr.Dropdown(choices=voice_options, value=voice_options[0])
35
  return gr.Dropdown(choices=[], value=None)
36
 
37
  def update_voices_and_refresh():
 
38
  get_voices()
 
39
  voices_data = load_voices()
40
  available_languages = list(voices_data.keys())
 
41
  initial_voices = get_voice_options(available_languages[0], voices_data) if available_languages else []
 
42
  return (
43
  gr.Dropdown(choices=available_languages, value=available_languages[0] if available_languages else None),
44
  gr.Dropdown(choices=initial_voices, value=initial_voices[0] if initial_voices else None)
 
46
 
47
  def remove_silence(input_file, output_file):
48
  audio = AudioSegment.from_wav(input_file)
 
 
49
  segments = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
 
 
50
  non_silent_audio = AudioSegment.silent(duration=0)
51
  for segment in segments:
52
  non_silent_audio += segment
 
 
53
  non_silent_audio.export(output_file, format="wav")
54
 
55
  def controlador_generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input, checkbox_cortar_silencio):
 
56
  audio_file = generate_audio(audio_input, voice_model_input, speed_input, pitch_input, volume_input)
57
  if audio_file:
58
  print("Áudio gerado com sucesso:", audio_file)
 
59
  if checkbox_cortar_silencio:
60
  print("Cortando silêncio...")
 
61
  remove_silence(audio_file, audio_file)
62
  print("Silêncio removido com sucesso!")
63
  else:
64
  print("Erro ao gerar áudio.")
65
+ return audio_file
66
 
67
  def generate_audio(texto, modelo_de_voz, velocidade, tom, volume):
 
68
  actual_voice = extract_voice_name(modelo_de_voz)
69
+ rate_str = f"+{velocidade}%" if velocidade >= 0 else f"{velocidade}%"
70
+ pitch_str = f"+{tom}Hz" if tom >= 0 else f"{tom}Hz"
71
+ volume_str = f"+{volume}%" if volume >= 0 else f"{volume}%"
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  output_dir = "output"
74
  os.makedirs(output_dir, exist_ok=True)
 
95
  return output_file
96
 
97
  def generate_audio_from_file(file_path, modelo_de_voz, velocidade, tom, volume):
 
98
  actual_voice = extract_voice_name(modelo_de_voz)
 
 
99
  rate_str = f"+{velocidade}%" if velocidade >= 0 else f"{velocidade}%"
100
  pitch_str = f"+{tom}Hz" if tom >= 0 else f"{tom}Hz"
101
  volume_str = f"+{volume}%" if volume >= 0 else f"{volume}%"
 
104
  os.makedirs(output_dir, exist_ok=True)
105
  output_file = os.path.join(output_dir, "new_audio.mp3")
106
 
 
107
  cmd = [
108
  "edge-tts",
109
+ "-f", file_path,
110
  "--rate=" + rate_str,
111
  "--pitch=" + pitch_str,
112
  "--volume=" + volume_str,
 
128
  if file is None:
129
  return None
130
 
131
+ temp_file_path = file
 
 
 
132
  audio_file = generate_audio_from_file(temp_file_path, voice_model_input, speed_input, pitch_input, volume_input)
133
 
134
  if audio_file:
 
142
 
143
  return audio_file
144
 
145
+ def timetoms(time_obj):
146
+ return time_obj.hours * 3600000 + time_obj.minutes * 60000 + time_obj.seconds * 1000 + time_obj.milliseconds
147
+
148
+ async def merge_audio_files(output_folder, srt_file):
149
+ subs = pysrt.open(str(srt_file))
150
+ final_audio = AudioSegment.silent(duration=0)
151
+ base_name = Path(srt_file).stem
152
+ audio_dir = Path(output_folder)
153
+ total_files = len(subs)
154
+ additional_silence_duration = 1000
155
+
156
+ with tqdm(total=total_files, desc=f"Mesclando áudios para {base_name}", unit="segmento") as pbar:
157
+ current_time = 0
158
+ for i, sub in enumerate(subs, start=1):
159
+ start_time = timetoms(sub.start)
160
+ end_time = timetoms(sub.end)
161
+ audio_file = audio_dir / f"{sub.index:02d}.mp3"
162
+
163
+ if audio_file.exists():
164
+ audio = AudioSegment.from_mp3(str(audio_file))
165
+ audio_segment = audio
166
+ else:
167
+ print(f"\nArquivo de áudio não encontrado: {audio_file}")
168
+ audio_segment = AudioSegment.silent(duration=end_time - start_time)
169
+ pbar.update(1)
170
+
171
+ if i == 1 and start_time > 0:
172
+ silence = AudioSegment.silent(duration=start_time)
173
+ final_audio += silence
174
+ current_time = start_time
175
+
176
+ if start_time > current_time:
177
+ silence_duration = start_time - current_time
178
+ silence = AudioSegment.silent(duration=silence_duration)
179
+ final_audio += silence
180
+
181
+ final_audio += audio_segment
182
+ current_time = end_time
183
+
184
+ final_audio += AudioSegment.silent(duration=additional_silence_duration)
185
+
186
+ output_file = audio_dir.parent / f"{base_name}_final.mp3"
187
+ final_audio.export(str(output_file), format="mp3")
188
+ print(f"\nÁudio final salvo em: {output_file}\n")
189
+ return str(output_file)
190
+
191
+ async def adjust_audio_speed(input_file, output_file, target_duration_ms):
192
+ audio = AudioSegment.from_mp3(input_file)
193
+ original_duration_ms = len(audio)
194
+
195
+ if original_duration_ms == 0:
196
+ print(f"Erro: Áudio em {input_file} tem duração zero.")
197
+ return audio
198
+
199
+ speed_audios = original_duration_ms / target_duration_ms
200
+
201
+ adjusted_audio = audio.speedup(playback_speed=speed_audios) if speed_audios > 1 else audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * speed_audios)})
202
+
203
+ if len(adjusted_audio) > target_duration_ms:
204
+ adjusted_audio = adjusted_audio[:target_duration_ms]
205
+ elif len(adjusted_audio) < target_duration_ms:
206
+ adjusted_audio += AudioSegment.silent(duration=target_duration_ms - len(adjusted_audio))
207
+
208
+ adjusted_audio.export(output_file, format="mp3")
209
+ return adjusted_audio
210
+
211
+ async def process_srt_file(srt_file, voice, output_dir, pitch, volume):
212
+ from edge_tts import Communicate as EdgeTTS
213
+ subs = pysrt.open(srt_file)
214
+ output_dir = Path(output_dir)
215
+ output_dir.mkdir(parents=True, exist_ok=True)
216
+
217
+ total_indices = len(subs)
218
+ batches = [list(range(i, min(i + 2, total_indices))) for i in range(0, total_indices, 2)]
219
+
220
+ pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
221
+ volume_str = f"+{volume}%" if volume >= 0 else f"{volume}%"
222
+
223
+ with tqdm(total=total_indices, desc="Gerando e ajustando áudios com EdgeTTS", unit="segmento") as pbar:
224
+ for batch in batches:
225
+ tasks = []
226
+ for i in batch:
227
+ sub = subs[i]
228
+ output_file = output_dir / f"{sub.index:02d}.mp3"
229
+ temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
230
+ target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
231
+
232
+ if not output_file.exists() or output_file.stat().st_size == 0:
233
+ tts = EdgeTTS(text=sub.text, voice=voice, pitch=pitch_str, volume=volume_str)
234
+ tasks.append(tts.save(str(temp_file)))
235
+
236
+ if tasks:
237
+ await asyncio.gather(*tasks)
238
+
239
+ for i in batch:
240
+ sub = subs[i]
241
+ temp_file = output_dir / f"{sub.index:02d}_temp.mp3"
242
+ output_file = output_dir / f"{sub.index:02d}.mp3"
243
+ target_duration_ms = timetoms(sub.end) - timetoms(sub.start)
244
+
245
+ if temp_file.exists():
246
+ await adjust_audio_speed(temp_file, output_file, target_duration_ms)
247
+ os.remove(temp_file)
248
+ pbar.update(1)
249
+
250
+ final_audio = await merge_audio_files(output_dir, srt_file)
251
+
252
+ if srt_temp_deleta:
253
+ shutil.rmtree(output_dir, ignore_errors=True)
254
+ print(f"Pasta temporária {output_dir} apagada.")
255
+ else:
256
+ print(f"Pasta temporária {output_dir} mantida.")
257
+
258
+ return final_audio
259
+
260
+ def controlador_process_srt_file(srt_file, voice_model_input, pitch_input, volume_input):
261
+ if srt_file is None:
262
+ return None
263
+
264
+ actual_voice = extract_voice_name(voice_model_input)
265
+ output_dir = "output/srt_temp"
266
+
267
+ audio_file = asyncio.run(process_srt_file(srt_file, actual_voice, output_dir, pitch_input, volume_input))
268
+ return audio_file
269
+
270
  with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="blue"), title="QuickTTS") as iface:
271
  gr.Markdown(badges)
272
  gr.Markdown(description)
 
276
 
277
  with gr.Tabs():
278
  with gr.TabItem("Edge-TTS"):
279
+ gr.Markdown("É ilimitado, podendo até mesmo colocar um livro inteiro, mas claro, tem a questão de tempo, quanto maior o texto, mais demorado é.")
280
 
281
  with gr.Row():
 
282
  language_input = gr.Dropdown(
283
  choices=available_languages,
284
  label="Idioma",
285
  value=available_languages[52] if available_languages else None
286
  )
 
 
287
  initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
288
  voice_model_input = gr.Dropdown(
289
  choices=initial_voices,
 
291
  value=initial_voices[0] if initial_voices else None
292
  )
293
 
 
294
  language_input.change(
295
  fn=update_voice_options,
296
  inputs=[language_input],
 
301
 
302
  with gr.Row():
303
  with gr.Column():
304
+ speed_input = gr.Slider(minimum=-200, maximum=200, label="Velocidade (%)", value=0, interactive=True)
 
 
 
 
 
 
305
  with gr.Column():
306
+ pitch_input = gr.Slider(minimum=-100, maximum=100, label="Tom (Hz)", value=0, interactive=True)
 
 
 
 
 
 
307
  with gr.Column():
308
+ volume_input = gr.Slider(minimum=-99, maximum=100, label="Volume (%)", value=0, interactive=True)
 
 
 
 
 
 
309
 
310
  checkbox_cortar_silencio = gr.Checkbox(label="Cortar Silencio", interactive=True)
311
  audio_output = gr.Audio(label="Resultado", type="filepath", interactive=False)
 
314
  edgetts_button = gr.Button(value="Falar")
315
  edgetts_button.click(
316
  controlador_generate_audio,
317
+ inputs=[audio_input, voice_model_input, speed_input, pitch_input, volume_input, checkbox_cortar_silencio],
 
 
 
 
 
 
 
318
  outputs=[audio_output]
319
  )
 
320
  clear_button = gr.ClearButton(audio_input, value='Limpar')
321
 
 
322
  update_voices_btn = gr.Button(value="Atualizar Lista de Vozes")
 
323
  update_voices_btn.click(
324
  fn=update_voices_and_refresh,
325
  inputs=[],
326
  outputs=[language_input, voice_model_input]
327
  )
328
  gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
329
+
330
  with gr.TabItem("Lote (Arquivo txt)"):
331
+ gr.Markdown("Carregar texto de um arquivo")
 
332
  with gr.Row():
333
  language_input_file = gr.Dropdown(
334
  choices=available_languages,
335
  label="Idioma",
336
  value=available_languages[52] if available_languages else None
337
  )
 
338
  initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
339
  voice_model_input_file = gr.Dropdown(
340
  choices=initial_voices,
 
347
  inputs=[language_input_file],
348
  outputs=[voice_model_input_file]
349
  )
350
+ gr.Markdown("O programa vai ler linha por linha e entregar em um único áudio")
351
+ file_input = gr.File(label="Arquivo de Texto", file_types=[".txt"], type="filepath")
 
 
 
 
 
352
 
353
  with gr.Row():
354
  with gr.Column():
355
+ speed_input_file = gr.Slider(minimum=-200, maximum=200, label="Velocidade (%)", value=0, interactive=True)
 
 
 
 
 
 
356
  with gr.Column():
357
+ pitch_input_file = gr.Slider(minimum=-100, maximum=100, label="Tom (Hz)", value=0, interactive=True)
 
 
 
 
 
 
358
  with gr.Column():
359
+ volume_input_file = gr.Slider(minimum=-99, maximum=100, label="Volume (%)", value=0, interactive=True)
 
 
 
 
 
 
360
 
361
  checkbox_cortar_silencio_file = gr.Checkbox(label="Cortar Silencio", interactive=True)
362
  audio_output_file = gr.Audio(label="Resultado", type="filepath", interactive=False)
 
364
  edgetts_button_file = gr.Button(value="Falar")
365
  edgetts_button_file.click(
366
  controlador_generate_audio_from_file,
367
+ inputs=[file_input, voice_model_input_file, speed_input_file, pitch_input_file, volume_input_file, checkbox_cortar_silencio_file],
 
 
 
 
 
 
 
368
  outputs=[audio_output_file]
369
  )
 
370
  clear_button_file = gr.ClearButton(file_input, value='Limpar')
371
 
372
  gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
373
+
374
+ with gr.TabItem("Ler .SRT"):
375
+ gr.Markdown("Carregar um arquivo SRT e gerar áudio sincronizado com os tempos das legendas. A velocidade é ajustada automaticamente para cada legenda.")
376
+ with gr.Row():
377
+ language_input_srt = gr.Dropdown(
378
+ choices=available_languages,
379
+ label="Idioma",
380
+ value=available_languages[52] if available_languages else None
381
+ )
382
+ initial_voices = get_voice_options(available_languages[52], voices_data) if available_languages else []
383
+ voice_model_input_srt = gr.Dropdown(
384
+ choices=initial_voices,
385
+ label="Modelo de Voz",
386
+ value=initial_voices[0] if initial_voices else None
387
+ )
388
+
389
+ language_input_srt.change(
390
+ fn=update_voice_options,
391
+ inputs=[language_input_srt],
392
+ outputs=[voice_model_input_srt]
393
+ )
394
 
395
+ srt_input = gr.File(label="Arquivo SRT", file_types=[".srt"], type="filepath")
396
+
397
+ with gr.Row():
398
+ with gr.Column():
399
+ pitch_input_srt = gr.Slider(minimum=-100, maximum=100, label="Tom (Hz)", value=0, interactive=True)
400
+ with gr.Column():
401
+ volume_input_srt = gr.Slider(minimum=-99, maximum=100, label="Volume (%)", value=0, interactive=True)
402
+
403
+ audio_output_srt = gr.Audio(label="Resultado", type="filepath", interactive=False)
404
+ with gr.Row():
405
+ srt_button = gr.Button(value="Gerar Áudio")
406
+ srt_button.click(
407
+ controlador_process_srt_file,
408
+ inputs=[srt_input, voice_model_input_srt, pitch_input_srt, volume_input_srt],
409
+ outputs=[audio_output_srt]
410
+ )
411
+ clear_button_srt = gr.ClearButton(srt_input, value='Limpar')
412
+
413
+ gr.Markdown("Agradecimentos a rany2 pelo Edge-TTS")
414
+
415
  gr.Markdown("""
416
  Desenvolvido por Rafael Godoy <br>
417
  Apoie o projeto pelo https://nubank.com.br/pagar/1ls6a4/0QpSSbWBSq, qualquer valor é bem vindo.
418
+
419
+ Se você precisa de dublagem para seus vídeos Youtube/Cursos e afins, entre em contato comigo https://www.instagram.com/rafael.godoy.ebert/
420
  """)
421
  iface.launch()