tts / app.py
MAZALA2024's picture
Update app.py
93f33db verified
raw
history blame
2.96 kB
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names
from io import BytesIO
import asyncio
import os
import tempfile
from pathlib import Path
from gradio.data_classes import FileData
# Create necessary directories
OUTPUTS_DIR = Path(__file__).parent / "outputs"
TEMP_DIR = Path(__file__).parent / "temp"
UPLOAD_DIR = Path(__file__).parent / "uploaded"
# Ensure directories exist
for directory in [OUTPUTS_DIR, TEMP_DIR, UPLOAD_DIR]:
directory.mkdir(exist_ok=True)
async def convert_tts(model_name, audio_file, slang_rate):
if audio_file is None:
return {"error": "No audio file uploaded."}, None
try:
print(f"Processing audio file: {audio_file}")
# Create task for parallel processing
task = (model_name, None, None, slang_rate, True, audio_file)
# Asynchronous call to processing function
result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task])
if result[0] is None or len(result[0]) != 3:
return {"error": "Processing failed"}, None
info, _, (tgt_sr, audio_output) = result[0]
if audio_output is None:
return {"error": "No audio output generated"}, None
# Save output to a file in the outputs directory
output_filename = f"output_{os.urandom(4).hex()}.wav"
output_path = str(OUTPUTS_DIR / output_filename)
try:
if isinstance(audio_output, np.ndarray):
wavfile.write(output_path, tgt_sr, audio_output)
else:
with open(output_path, "wb") as f:
f.write(audio_output)
print(f"Saved output to: {output_path}")
return {"info": info}, output_path
except Exception as save_error:
print(f"Error saving output: {save_error}")
return {"error": f"Error saving output: {str(save_error)}"}, None
except Exception as e:
print(f"Error in convert_tts: {str(e)}")
import traceback
traceback.print_exc()
return {"error": str(e)}, None
def get_models():
return get_model_names()
# Initialize the Gradio interface
iface = gr.Interface(
fn=convert_tts,
inputs=[
gr.Dropdown(choices=get_models(), label="Model", interactive=True),
gr.Audio(label="Upload Audio", source="upload"),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
],
outputs=[
gr.JSON(label="Info"),
gr.Audio(label="Converted Audio")
],
title="Voice Conversion",
analytics_enabled=False,
cache_examples=False
).queue()
# Launch the interface
if __name__ == "__main__":
iface.launch(
debug=True,
show_error=True,
max_threads=10,
share=False # Set to True if you want to create a public link
)