Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import base64
|
3 |
+
import numpy as np
|
4 |
+
from scipy.io import wavfile
|
5 |
+
from voice_processing import parallel_tts, get_model_names
|
6 |
+
from io import BytesIO
|
7 |
+
import asyncio # Import asyncio
|
8 |
+
|
9 |
+
# Define an asynchronous function for the Gradio interface
|
10 |
+
async def convert_tts(model_name, audio_file, slang_rate):
|
11 |
+
if audio_file is None:
|
12 |
+
return {"error": "No audio file uploaded."}, None
|
13 |
+
|
14 |
+
try:
|
15 |
+
# Create task for parallel processing
|
16 |
+
task = (model_name, None, None, slang_rate, True, audio_file)
|
17 |
+
|
18 |
+
# Asynchronous call to processing function
|
19 |
+
result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task])
|
20 |
+
|
21 |
+
if result[0] is None or len(result[0]) != 3:
|
22 |
+
return {"error": "Processing failed"}, None
|
23 |
+
|
24 |
+
info, _, (tgt_sr, audio_output) = result[0]
|
25 |
+
|
26 |
+
# Process audio output to bytes
|
27 |
+
if audio_output is None:
|
28 |
+
return {"error": "No audio output generated"}, None
|
29 |
+
|
30 |
+
audio_bytes = None
|
31 |
+
if isinstance(audio_output, np.ndarray):
|
32 |
+
byte_io = BytesIO()
|
33 |
+
wavfile.write(byte_io, tgt_sr, audio_output)
|
34 |
+
byte_io.seek(0)
|
35 |
+
audio_bytes = byte_io.read()
|
36 |
+
else:
|
37 |
+
audio_bytes = audio_output
|
38 |
+
|
39 |
+
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
|
40 |
+
return {"info": info}, audio_data_uri
|
41 |
+
|
42 |
+
except Exception as e:
|
43 |
+
print(f"Error in convert_tts: {str(e)}")
|
44 |
+
return {"error": str(e)}, None
|
45 |
+
|
46 |
+
def get_models():
|
47 |
+
return get_model_names()
|
48 |
+
|
49 |
+
# Initialize the Gradio interface
|
50 |
+
iface = gr.Interface(
|
51 |
+
fn=convert_tts,
|
52 |
+
inputs=[
|
53 |
+
gr.Dropdown(choices=get_models(), label="Model", interactive=True),
|
54 |
+
gr.Audio(label="Upload Audio", type="filepath"),
|
55 |
+
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
|
56 |
+
],
|
57 |
+
outputs=[
|
58 |
+
gr.JSON(label="Info"),
|
59 |
+
gr.Audio(label="Converted Audio")
|
60 |
+
],
|
61 |
+
title="Voice Conversion"
|
62 |
+
).queue()
|
63 |
+
|
64 |
+
# Launch the interface with max_threads
|
65 |
+
iface.launch(debug=True, max_threads=10) # Set max_threads to control concurrency
|