hivecorp commited on
Commit
7ad2a01
·
verified ·
1 Parent(s): 2b5d6f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -30
app.py CHANGED
@@ -2,16 +2,13 @@ import gradio as gr
2
  import edge_tts
3
  import asyncio
4
  import tempfile
5
- import os
6
  from moviepy.editor import AudioFileClip
7
  import re
8
 
9
- # Get all available voices
10
  async def get_voices():
11
  voices = await edge_tts.list_voices()
12
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
13
 
14
- # Text to speech function
15
  async def text_to_speech(text, voice, rate, pitch):
16
  if not text.strip():
17
  return None, gr.Warning("Please enter the text to convert.")
@@ -22,57 +19,51 @@ async def text_to_speech(text, voice, rate, pitch):
22
  rate_str = f"{rate:+d}%"
23
  pitch_str = f"{pitch:+d}Hz"
24
  communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
 
25
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
26
  tmp_path = tmp_file.name
27
  await communicate.save(tmp_path)
 
28
  return tmp_path, None
29
 
30
- # Generate SRT based on estimated timing
31
- def generate_srt(text, speech_rate, max_words_per_line):
32
- # Clean up input text
33
- text = re.sub(r'\s+', ' ', text.strip()) # Remove excessive whitespace
34
-
35
- # Split into words
36
  words = text.split()
37
-
38
- # Calculate timing for each line
39
  srt_lines = []
40
  current_line = []
41
- current_time = 0.0 # Start time in seconds
42
  total_words = len(words)
43
-
44
  for i, word in enumerate(words):
45
  current_line.append(word)
46
-
47
- # Calculate current line length
48
  if len(current_line) >= max_words_per_line or i == total_words - 1:
49
- # Create SRT entry
50
  line_text = ' '.join(current_line)
51
- duration = len(line_text.split()) / speech_rate # Estimate duration based on speech rate
 
 
52
 
53
- # Format timing
54
- start_time = current_time
55
- end_time = current_time + duration
56
 
 
57
  start_time_str = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},{int((start_time % 1) * 1000):03}"
58
  end_time_str = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},{int((end_time % 1) * 1000):03}"
59
 
60
  srt_lines.append(f"{len(srt_lines) + 1}\n{start_time_str} --> {end_time_str}\n{line_text}\n")
61
-
62
- # Move to the next line
63
  current_line = []
64
- current_time += duration # Update current time
65
-
66
  return ''.join(srt_lines)
67
 
68
- # Gradio interface function
69
- def tts_interface(text, voice, rate, pitch, speech_rate, max_words_per_line):
70
  audio_path, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
71
  if warning:
72
  return None, None, warning
73
 
 
 
 
74
  # Generate SRT file
75
- srt_content = generate_srt(text, speech_rate, max_words_per_line)
76
  srt_path = audio_path.replace('.mp3', '_subtitle.srt')
77
 
78
  with open(srt_path, 'w') as f:
@@ -80,7 +71,6 @@ def tts_interface(text, voice, rate, pitch, speech_rate, max_words_per_line):
80
 
81
  return audio_path, srt_path, None
82
 
83
- # Create Gradio app
84
  async def create_demo():
85
  voices = await get_voices()
86
 
@@ -91,7 +81,6 @@ async def create_demo():
91
  gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
92
  gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
93
  gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
94
- gr.Slider(minimum=100, maximum=300, value=150, label="Speech Rate (words per minute)", step=1),
95
  gr.Slider(minimum=3, maximum=8, value=5, label="Max Words per Line", step=1),
96
  ],
97
  outputs=[
@@ -107,7 +96,6 @@ async def create_demo():
107
 
108
  return demo
109
 
110
- # Run the app
111
  if __name__ == "__main__":
112
  demo = asyncio.run(create_demo())
113
  demo.launch()
 
2
  import edge_tts
3
  import asyncio
4
  import tempfile
 
5
  from moviepy.editor import AudioFileClip
6
  import re
7
 
 
8
  async def get_voices():
9
  voices = await edge_tts.list_voices()
10
  return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
11
 
 
12
  async def text_to_speech(text, voice, rate, pitch):
13
  if not text.strip():
14
  return None, gr.Warning("Please enter the text to convert.")
 
19
  rate_str = f"{rate:+d}%"
20
  pitch_str = f"{pitch:+d}Hz"
21
  communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
22
+
23
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
  tmp_path = tmp_file.name
25
  await communicate.save(tmp_path)
26
+
27
  return tmp_path, None
28
 
29
+ def generate_srt(text, audio_duration, max_words_per_line):
30
+ # Eliminate extra spaces and split into words
31
+ text = re.sub(r'\s+', ' ', text.strip())
 
 
 
32
  words = text.split()
 
 
33
  srt_lines = []
34
  current_line = []
 
35
  total_words = len(words)
36
+
37
  for i, word in enumerate(words):
38
  current_line.append(word)
39
+ # Create a line if we reach the max words per line or at the end of the text
 
40
  if len(current_line) >= max_words_per_line or i == total_words - 1:
 
41
  line_text = ' '.join(current_line)
42
+ # Adjust duration proportionally
43
+ duration = audio_duration * (len(current_line) / total_words)
44
+ start_time = (sum(len(' '.join(srt_lines[j].split()[2:])) for j in range(len(srt_lines))) / total_words) * audio_duration if srt_lines else 0
45
 
46
+ end_time = start_time + duration
 
 
47
 
48
+ # Formatting time for SRT
49
  start_time_str = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},{int((start_time % 1) * 1000):03}"
50
  end_time_str = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},{int((end_time % 1) * 1000):03}"
51
 
52
  srt_lines.append(f"{len(srt_lines) + 1}\n{start_time_str} --> {end_time_str}\n{line_text}\n")
 
 
53
  current_line = []
54
+
 
55
  return ''.join(srt_lines)
56
 
57
+ def tts_interface(text, voice, rate, pitch, max_words_per_line):
 
58
  audio_path, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
59
  if warning:
60
  return None, None, warning
61
 
62
+ # Calculate audio duration
63
+ audio_duration = AudioFileClip(audio_path).duration # Get duration in seconds
64
+
65
  # Generate SRT file
66
+ srt_content = generate_srt(text, audio_duration, max_words_per_line)
67
  srt_path = audio_path.replace('.mp3', '_subtitle.srt')
68
 
69
  with open(srt_path, 'w') as f:
 
71
 
72
  return audio_path, srt_path, None
73
 
 
74
  async def create_demo():
75
  voices = await get_voices()
76
 
 
81
  gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
82
  gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
83
  gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
 
84
  gr.Slider(minimum=3, maximum=8, value=5, label="Max Words per Line", step=1),
85
  ],
86
  outputs=[
 
96
 
97
  return demo
98
 
 
99
  if __name__ == "__main__":
100
  demo = asyncio.run(create_demo())
101
  demo.launch()