|
import gradio as gr |
|
import base64 |
|
import numpy as np |
|
from scipy.io import wavfile |
|
from voice_processing import parallel_tts, get_model_names |
|
import os |
|
import logging |
|
from rvc_service import RVCService |
|
import asyncio |
|
from voice_processing import parallel_tts, get_model_names |
|
import sys |
|
from datetime import datetime |
|
import traceback |
|
import json |
|
|
|
|
|
|
|
logging.basicConfig( |
|
level=logging.DEBUG, |
|
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s', |
|
handlers=[ |
|
logging.FileHandler('rvc_server.log'), |
|
logging.StreamHandler(sys.stdout) |
|
] |
|
) |
|
logger = logging.getLogger('rvc_server') |
|
|
|
|
|
rvc_service = RVCService() |
|
|
|
def setup_request_logging(): |
|
"""Creates a unique logger for request handling""" |
|
request_id = datetime.now().strftime('%Y%m%d_%H%M%S_%f') |
|
logger = logging.getLogger(f'request_{request_id}') |
|
return logger, request_id |
|
|
|
def convert_tts(model_name, audio_file, slang_rate): |
|
"""Voice conversion endpoint""" |
|
req_logger, request_id = setup_request_logging() |
|
|
|
try: |
|
req_logger.info(f"New request received - ID: {request_id}") |
|
req_logger.info(f"Parameters: model={model_name}, slang_rate={slang_rate}") |
|
|
|
if audio_file is None: |
|
req_logger.error("No audio file provided") |
|
return {"error": "No audio file uploaded."}, None |
|
|
|
try: |
|
req_logger.info(f"Processing audio file: {audio_file}") |
|
sr, audio = wavfile.read(audio_file) |
|
req_logger.info(f"Audio loaded: sr={sr}Hz, shape={audio.shape}") |
|
|
|
|
|
task = (model_name, None, None, slang_rate, True, audio_file) |
|
|
|
req_logger.info("Running parallel processing") |
|
result = parallel_tts([task]) |
|
|
|
if not result or result[0] is None: |
|
req_logger.error("Processing failed - no result") |
|
return {"error": "Processing failed"}, None |
|
|
|
|
|
result_tuple = result[0] |
|
|
|
|
|
if isinstance(result_tuple, tuple) and len(result_tuple) == 3: |
|
info, _, (tgt_sr, audio_output) = result_tuple |
|
|
|
if audio_output is None: |
|
req_logger.error("No audio output generated") |
|
return {"error": "No audio output generated"}, None |
|
|
|
|
|
output_filename = f"output_{request_id}.wav" |
|
output_path = os.path.join("outputs", output_filename) |
|
os.makedirs("outputs", exist_ok=True) |
|
|
|
if isinstance(audio_output, np.ndarray): |
|
req_logger.info(f"Saving numpy array output: shape={audio_output.shape}") |
|
wavfile.write(output_path, tgt_sr, audio_output) |
|
else: |
|
req_logger.info("Saving raw audio output") |
|
with open(output_path, "wb") as f: |
|
f.write(audio_output) |
|
|
|
req_logger.info(f"Successfully saved to {output_path}") |
|
return {"info": info}, output_path |
|
else: |
|
req_logger.error(f"Invalid result format: {result_tuple}") |
|
return {"error": "Invalid result format"}, None |
|
|
|
except Exception as e: |
|
req_logger.error(f"Error processing audio: {str(e)}") |
|
return {"error": f"Processing error: {str(e)}"}, None |
|
|
|
except Exception as e: |
|
req_logger.error(f"Unexpected error: {str(e)}") |
|
return {"error": str(e)}, None |
|
|
|
|
|
iface = gr.Interface( |
|
fn=convert_tts, |
|
inputs=[ |
|
gr.Dropdown(choices=get_model_names(), label="Model", interactive=True), |
|
gr.Audio(label="Upload Audio", type="filepath"), |
|
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), |
|
], |
|
outputs=[ |
|
gr.JSON(label="Info"), |
|
gr.Audio(label="Converted Audio") |
|
], |
|
title="Voice Conversion" |
|
).queue() |
|
|
|
if __name__ == "__main__": |
|
logger.info("Starting RVC server") |
|
try: |
|
iface.launch( |
|
debug=True, |
|
show_error=True, |
|
max_threads=10 |
|
) |
|
except Exception as e: |
|
logger.error(f"Error launching server: {e}", exc_info=True) |