tts / app.py
MAZALA2024's picture
Update app.py
251af67 verified
raw
history blame
4.54 kB
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names
import os
import logging
from rvc_service import RVCService # Our new service
import asyncio
from voice_processing import parallel_tts, get_model_names
import sys
from datetime import datetime # Add this import
import traceback
import json
# Set up enhanced logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
handlers=[
logging.FileHandler('rvc_server.log'),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger('rvc_server')
# Initialize RVC Service
rvc_service = RVCService()
def setup_request_logging():
"""Creates a unique logger for request handling"""
request_id = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
logger = logging.getLogger(f'request_{request_id}')
return logger, request_id
def convert_tts(model_name, audio_file, slang_rate):
"""Voice conversion endpoint"""
req_logger, request_id = setup_request_logging()
try:
req_logger.info(f"New request received - ID: {request_id}")
req_logger.info(f"Parameters: model={model_name}, slang_rate={slang_rate}")
if audio_file is None:
req_logger.error("No audio file provided")
return {"error": "No audio file uploaded."}, None
try:
req_logger.info(f"Processing audio file: {audio_file}")
sr, audio = wavfile.read(audio_file)
req_logger.info(f"Audio loaded: sr={sr}Hz, shape={audio.shape}")
# Create task for parallel processing
task = (model_name, None, None, slang_rate, True, audio_file)
req_logger.info("Running parallel processing")
result = parallel_tts([task])
if not result or result[0] is None:
req_logger.error("Processing failed - no result")
return {"error": "Processing failed"}, None
# Get the result and return directly
result_tuple = result[0]
# Check if result_tuple is in the expected format
if isinstance(result_tuple, tuple) and len(result_tuple) == 3:
info, _, (tgt_sr, audio_output) = result_tuple
if audio_output is None:
req_logger.error("No audio output generated")
return {"error": "No audio output generated"}, None
# Save the output
output_filename = f"output_{request_id}.wav"
output_path = os.path.join("outputs", output_filename)
os.makedirs("outputs", exist_ok=True)
if isinstance(audio_output, np.ndarray):
req_logger.info(f"Saving numpy array output: shape={audio_output.shape}")
wavfile.write(output_path, tgt_sr, audio_output)
else:
req_logger.info("Saving raw audio output")
with open(output_path, "wb") as f:
f.write(audio_output)
req_logger.info(f"Successfully saved to {output_path}")
return {"info": info}, output_path
else:
req_logger.error(f"Invalid result format: {result_tuple}")
return {"error": "Invalid result format"}, None
except Exception as e:
req_logger.error(f"Error processing audio: {str(e)}")
return {"error": f"Processing error: {str(e)}"}, None
except Exception as e:
req_logger.error(f"Unexpected error: {str(e)}")
return {"error": str(e)}, None
# Create the Gradio interface with queue
iface = gr.Interface(
fn=convert_tts, # Remove async if it's there
inputs=[
gr.Dropdown(choices=get_model_names(), label="Model", interactive=True),
gr.Audio(label="Upload Audio", type="filepath"),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
],
outputs=[
gr.JSON(label="Info"),
gr.Audio(label="Converted Audio")
],
title="Voice Conversion"
).queue()
if __name__ == "__main__":
logger.info("Starting RVC server")
try:
iface.launch(
debug=True,
show_error=True,
max_threads=10
)
except Exception as e:
logger.error(f"Error launching server: {e}", exc_info=True)