Update app.py
Browse files
app.py
CHANGED
@@ -5,18 +5,7 @@ from scipy.io import wavfile
|
|
5 |
from voice_processing import tts, get_model_names, voice_mapping
|
6 |
from io import BytesIO
|
7 |
import asyncio
|
8 |
-
|
9 |
-
import multiprocessing
|
10 |
-
|
11 |
-
# Determine the optimal number of threads
|
12 |
-
def get_optimal_threads():
|
13 |
-
cpu_count = multiprocessing.cpu_count()
|
14 |
-
# Assuming you want to use 75% of the available cores
|
15 |
-
optimal_threads = int(cpu_count * 0.75)
|
16 |
-
return optimal_threads
|
17 |
-
|
18 |
-
# Initialize the ThreadPoolExecutor with the optimal number of threads
|
19 |
-
executor = ThreadPoolExecutor(max_workers=get_optimal_threads())
|
20 |
|
21 |
async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
|
22 |
edge_tts_voice = voice_mapping.get(selected_voice)
|
@@ -28,14 +17,12 @@ async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uplo
|
|
28 |
with open(voice_upload.name, 'rb') as f:
|
29 |
voice_upload_file = f.read()
|
30 |
|
31 |
-
# Process the text input or uploaded voice
|
32 |
info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
|
33 |
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
|
34 |
)
|
35 |
|
36 |
_, audio_output = tts_output_data
|
37 |
|
38 |
-
# Return audio data as bytes
|
39 |
audio_bytes = None
|
40 |
if isinstance(audio_output, np.ndarray):
|
41 |
byte_io = BytesIO()
|
@@ -48,39 +35,68 @@ async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uplo
|
|
48 |
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
|
49 |
return {"info": info}, audio_data_uri
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def get_models():
|
52 |
return get_model_names()
|
53 |
|
54 |
def get_voices():
|
55 |
return list(voice_mapping.keys())
|
56 |
|
57 |
-
def parallel_convert_tts(input_data):
|
58 |
-
model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload = input_data
|
59 |
-
loop = asyncio.new_event_loop()
|
60 |
-
asyncio.set_event_loop(loop)
|
61 |
-
return loop.run_until_complete(
|
62 |
-
convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload)
|
63 |
-
)
|
64 |
-
|
65 |
-
def run_parallel_conversion(inputs):
|
66 |
-
futures = [
|
67 |
-
executor.submit(parallel_convert_tts, input_data) for input_data in inputs
|
68 |
-
]
|
69 |
-
results = [future.result() for future in futures]
|
70 |
-
return results
|
71 |
-
|
72 |
iface = gr.Interface(
|
73 |
-
fn=
|
74 |
inputs=[
|
75 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
76 |
],
|
77 |
outputs=[
|
78 |
gr.JSON(label="Info"),
|
79 |
-
gr.
|
80 |
],
|
81 |
-
title="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
)
|
83 |
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
|
|
|
5 |
from voice_processing import tts, get_model_names, voice_mapping
|
6 |
from io import BytesIO
|
7 |
import asyncio
|
8 |
+
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
|
11 |
edge_tts_voice = voice_mapping.get(selected_voice)
|
|
|
17 |
with open(voice_upload.name, 'rb') as f:
|
18 |
voice_upload_file = f.read()
|
19 |
|
|
|
20 |
info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
|
21 |
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
|
22 |
)
|
23 |
|
24 |
_, audio_output = tts_output_data
|
25 |
|
|
|
26 |
audio_bytes = None
|
27 |
if isinstance(audio_output, np.ndarray):
|
28 |
byte_io = BytesIO()
|
|
|
35 |
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
|
36 |
return {"info": info}, audio_data_uri
|
37 |
|
38 |
+
async def batch_convert_tts(json_file):
|
39 |
+
results = []
|
40 |
+
tasks = []
|
41 |
+
|
42 |
+
with open(json_file.name, 'r') as file:
|
43 |
+
batch_data = json.load(file)
|
44 |
+
|
45 |
+
for entry in batch_data:
|
46 |
+
model_name = entry.get("model_name")
|
47 |
+
tts_text = entry.get("text")
|
48 |
+
selected_voice = entry.get("voice")
|
49 |
+
slang_rate = entry.get("slang_rate", 0.5)
|
50 |
+
use_uploaded_voice = entry.get("use_uploaded_voice", False)
|
51 |
+
voice_upload = entry.get("voice_upload", None)
|
52 |
+
|
53 |
+
tasks.append(convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload))
|
54 |
+
|
55 |
+
responses = await asyncio.gather(*tasks)
|
56 |
+
|
57 |
+
for response in responses:
|
58 |
+
results.append({"info": response[0], "audio_uri": response[1]})
|
59 |
+
|
60 |
+
return results
|
61 |
+
|
62 |
def get_models():
|
63 |
return get_model_names()
|
64 |
|
65 |
def get_voices():
|
66 |
return list(voice_mapping.keys())
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
iface = gr.Interface(
|
69 |
+
fn=convert_tts,
|
70 |
inputs=[
|
71 |
+
gr.Dropdown(choices=get_models(), label="Model", interactive=True),
|
72 |
+
gr.Textbox(label="Text", placeholder="Enter text here"),
|
73 |
+
gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
|
74 |
+
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
|
75 |
+
gr.Checkbox(label="Use Uploaded Voice"),
|
76 |
+
gr.File(label="Voice File")
|
77 |
],
|
78 |
outputs=[
|
79 |
gr.JSON(label="Info"),
|
80 |
+
gr.Textbox(label="Audio URI")
|
81 |
],
|
82 |
+
title="Text-to-Speech Conversion",
|
83 |
+
allow_flagging="never"
|
84 |
+
)
|
85 |
+
|
86 |
+
batch_iface = gr.Interface(
|
87 |
+
fn=batch_convert_tts,
|
88 |
+
inputs=gr.File(label="JSON File"),
|
89 |
+
outputs=gr.JSON(label="Batch Results"),
|
90 |
+
title="Batch Text-to-Speech Conversion",
|
91 |
+
allow_flagging="never"
|
92 |
)
|
93 |
|
94 |
+
app = gr.TabbedInterface(
|
95 |
+
interface_list=[iface, batch_iface],
|
96 |
+
tab_names=["Single Conversion", "Batch Conversion"]
|
97 |
+
)
|
98 |
+
|
99 |
+
app.launch()
|
100 |
+
|
101 |
|
102 |
|