Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,13 @@ import gradio as gr
|
|
2 |
import edge_tts
|
3 |
import asyncio
|
4 |
import tempfile
|
5 |
-
import os
|
6 |
from moviepy.editor import AudioFileClip
|
7 |
import re
|
8 |
|
9 |
-
# Get all available voices
|
10 |
async def get_voices():
|
11 |
voices = await edge_tts.list_voices()
|
12 |
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
|
13 |
|
14 |
-
# Text to speech function
|
15 |
async def text_to_speech(text, voice, rate, pitch):
|
16 |
if not text.strip():
|
17 |
return None, gr.Warning("Please enter the text to convert.")
|
@@ -22,57 +19,51 @@ async def text_to_speech(text, voice, rate, pitch):
|
|
22 |
rate_str = f"{rate:+d}%"
|
23 |
pitch_str = f"{pitch:+d}Hz"
|
24 |
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
|
|
|
25 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
26 |
tmp_path = tmp_file.name
|
27 |
await communicate.save(tmp_path)
|
|
|
28 |
return tmp_path, None
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
text = re.sub(r'\s+', ' ', text.strip()) # Remove excessive whitespace
|
34 |
-
|
35 |
-
# Split into words
|
36 |
words = text.split()
|
37 |
-
|
38 |
-
# Calculate timing for each line
|
39 |
srt_lines = []
|
40 |
current_line = []
|
41 |
-
current_time = 0.0 # Start time in seconds
|
42 |
total_words = len(words)
|
43 |
-
|
44 |
for i, word in enumerate(words):
|
45 |
current_line.append(word)
|
46 |
-
|
47 |
-
# Calculate current line length
|
48 |
if len(current_line) >= max_words_per_line or i == total_words - 1:
|
49 |
-
# Create SRT entry
|
50 |
line_text = ' '.join(current_line)
|
51 |
-
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
start_time = current_time
|
55 |
-
end_time = current_time + duration
|
56 |
|
|
|
57 |
start_time_str = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},{int((start_time % 1) * 1000):03}"
|
58 |
end_time_str = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},{int((end_time % 1) * 1000):03}"
|
59 |
|
60 |
srt_lines.append(f"{len(srt_lines) + 1}\n{start_time_str} --> {end_time_str}\n{line_text}\n")
|
61 |
-
|
62 |
-
# Move to the next line
|
63 |
current_line = []
|
64 |
-
|
65 |
-
|
66 |
return ''.join(srt_lines)
|
67 |
|
68 |
-
|
69 |
-
def tts_interface(text, voice, rate, pitch, speech_rate, max_words_per_line):
|
70 |
audio_path, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
|
71 |
if warning:
|
72 |
return None, None, warning
|
73 |
|
|
|
|
|
|
|
74 |
# Generate SRT file
|
75 |
-
srt_content = generate_srt(text,
|
76 |
srt_path = audio_path.replace('.mp3', '_subtitle.srt')
|
77 |
|
78 |
with open(srt_path, 'w') as f:
|
@@ -80,7 +71,6 @@ def tts_interface(text, voice, rate, pitch, speech_rate, max_words_per_line):
|
|
80 |
|
81 |
return audio_path, srt_path, None
|
82 |
|
83 |
-
# Create Gradio app
|
84 |
async def create_demo():
|
85 |
voices = await get_voices()
|
86 |
|
@@ -91,7 +81,6 @@ async def create_demo():
|
|
91 |
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
|
92 |
gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
|
93 |
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
|
94 |
-
gr.Slider(minimum=100, maximum=300, value=150, label="Speech Rate (words per minute)", step=1),
|
95 |
gr.Slider(minimum=3, maximum=8, value=5, label="Max Words per Line", step=1),
|
96 |
],
|
97 |
outputs=[
|
@@ -107,7 +96,6 @@ async def create_demo():
|
|
107 |
|
108 |
return demo
|
109 |
|
110 |
-
# Run the app
|
111 |
if __name__ == "__main__":
|
112 |
demo = asyncio.run(create_demo())
|
113 |
demo.launch()
|
|
|
2 |
import edge_tts
|
3 |
import asyncio
|
4 |
import tempfile
|
|
|
5 |
from moviepy.editor import AudioFileClip
|
6 |
import re
|
7 |
|
|
|
8 |
async def get_voices():
|
9 |
voices = await edge_tts.list_voices()
|
10 |
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
|
11 |
|
|
|
12 |
async def text_to_speech(text, voice, rate, pitch):
|
13 |
if not text.strip():
|
14 |
return None, gr.Warning("Please enter the text to convert.")
|
|
|
19 |
rate_str = f"{rate:+d}%"
|
20 |
pitch_str = f"{pitch:+d}Hz"
|
21 |
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
|
22 |
+
|
23 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
24 |
tmp_path = tmp_file.name
|
25 |
await communicate.save(tmp_path)
|
26 |
+
|
27 |
return tmp_path, None
|
28 |
|
29 |
+
def generate_srt(text, audio_duration, max_words_per_line):
|
30 |
+
# Eliminate extra spaces and split into words
|
31 |
+
text = re.sub(r'\s+', ' ', text.strip())
|
|
|
|
|
|
|
32 |
words = text.split()
|
|
|
|
|
33 |
srt_lines = []
|
34 |
current_line = []
|
|
|
35 |
total_words = len(words)
|
36 |
+
|
37 |
for i, word in enumerate(words):
|
38 |
current_line.append(word)
|
39 |
+
# Create a line if we reach the max words per line or at the end of the text
|
|
|
40 |
if len(current_line) >= max_words_per_line or i == total_words - 1:
|
|
|
41 |
line_text = ' '.join(current_line)
|
42 |
+
# Adjust duration proportionally
|
43 |
+
duration = audio_duration * (len(current_line) / total_words)
|
44 |
+
start_time = (sum(len(' '.join(srt_lines[j].split()[2:])) for j in range(len(srt_lines))) / total_words) * audio_duration if srt_lines else 0
|
45 |
|
46 |
+
end_time = start_time + duration
|
|
|
|
|
47 |
|
48 |
+
# Formatting time for SRT
|
49 |
start_time_str = f"{int(start_time // 3600):02}:{int((start_time % 3600) // 60):02}:{int(start_time % 60):02},{int((start_time % 1) * 1000):03}"
|
50 |
end_time_str = f"{int(end_time // 3600):02}:{int((end_time % 3600) // 60):02}:{int(end_time % 60):02},{int((end_time % 1) * 1000):03}"
|
51 |
|
52 |
srt_lines.append(f"{len(srt_lines) + 1}\n{start_time_str} --> {end_time_str}\n{line_text}\n")
|
|
|
|
|
53 |
current_line = []
|
54 |
+
|
|
|
55 |
return ''.join(srt_lines)
|
56 |
|
57 |
+
def tts_interface(text, voice, rate, pitch, max_words_per_line):
|
|
|
58 |
audio_path, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
|
59 |
if warning:
|
60 |
return None, None, warning
|
61 |
|
62 |
+
# Calculate audio duration
|
63 |
+
audio_duration = AudioFileClip(audio_path).duration # Get duration in seconds
|
64 |
+
|
65 |
# Generate SRT file
|
66 |
+
srt_content = generate_srt(text, audio_duration, max_words_per_line)
|
67 |
srt_path = audio_path.replace('.mp3', '_subtitle.srt')
|
68 |
|
69 |
with open(srt_path, 'w') as f:
|
|
|
71 |
|
72 |
return audio_path, srt_path, None
|
73 |
|
|
|
74 |
async def create_demo():
|
75 |
voices = await get_voices()
|
76 |
|
|
|
81 |
gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
|
82 |
gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1),
|
83 |
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1),
|
|
|
84 |
gr.Slider(minimum=3, maximum=8, value=5, label="Max Words per Line", step=1),
|
85 |
],
|
86 |
outputs=[
|
|
|
96 |
|
97 |
return demo
|
98 |
|
|
|
99 |
if __name__ == "__main__":
|
100 |
demo = asyncio.run(create_demo())
|
101 |
demo.launch()
|