tts / app.py
MAZALA2024's picture
Update app.py
6c8d2ca verified
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names
import os
import logging
# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
async def convert_tts(model_name, audio_file, slang_rate):
try:
logger.debug(f"Received request - model: {model_name}, audio: {type(audio_file)}, slang: {slang_rate}")
if audio_file is None:
logger.error("No audio file provided")
return {"error": "No audio file uploaded."}, None
# Log the audio file details
if hasattr(audio_file, 'name'):
logger.debug(f"Audio file name: {audio_file.name}")
logger.debug(f"Audio file type: {type(audio_file)}")
# Create task for parallel processing
task = (model_name, None, None, slang_rate, True, audio_file)
# Process the audio
logger.debug("Starting audio processing")
result = parallel_tts([task])
logger.debug(f"Processing result: {type(result)}")
if not result or result[0] is None:
logger.error("Processing failed - no result")
return {"error": "Processing failed"}, None
info, _, (tgt_sr, audio_output) = result[0]
logger.debug(f"Processing complete - info: {info}")
if audio_output is None:
logger.error("No audio output generated")
return {"error": "No audio output generated"}, None
# Save the output
try:
output_filename = f"output_{os.urandom(4).hex()}.wav"
output_path = os.path.join("outputs", output_filename)
os.makedirs("outputs", exist_ok=True)
if isinstance(audio_output, np.ndarray):
logger.debug(f"Saving numpy array with shape {audio_output.shape}")
wavfile.write(output_path, tgt_sr, audio_output)
else:
logger.debug(f"Saving raw audio data of type {type(audio_output)}")
with open(output_path, "wb") as f:
f.write(audio_output)
logger.debug(f"Successfully saved to {output_path}")
return {"info": info}, output_path
except Exception as save_error:
logger.error(f"Error saving output: {save_error}", exc_info=True)
return {"error": f"Error saving output: {str(save_error)}"}, None
except Exception as e:
logger.error(f"Error in convert_tts: {str(e)}", exc_info=True)
return {"error": str(e)}, None
# Interface definition
iface = gr.Interface(
fn=convert_tts,
inputs=[
gr.Dropdown(choices=get_model_names(), label="Model", interactive=True),
gr.Audio(label="Upload Audio", type="filepath"),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
],
outputs=[
gr.JSON(label="Info"),
gr.Audio(label="Converted Audio")
],
title="Voice Conversion"
).queue()
if __name__ == "__main__":
iface.launch(
debug=True,
show_error=True,
max_threads=10
)