File size: 4,537 Bytes
06dd398 49e8fad 6dc7b04 555564b 516b487 10ee224 8290230 516b487 10ee224 516b487 93f33db d60199c 516b487 06dd398 516b487 6dc7b04 516b487 6dc7b04 b853861 461e4d3 d60199c 06dd398 b853861 461e4d3 b853861 23e22b5 d60199c 23e22b5 251af67 461e4d3 251af67 461e4d3 d60199c 461e4d3 23e22b5 251af67 d60199c 461e4d3 d60199c 461e4d3 d60199c 251af67 d60199c 461e4d3 d60199c 461e4d3 23e22b5 06dd398 461e4d3 516b487 b853861 516b487 06dd398 2e30408 06dd398 6dc7b04 b39fefc 06dd398 1b123c7 06dd398 516b487 06dd398 49e8fad 516b487 b4bd731 516b487 b4bd731 516b487 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names
import os
import logging
from rvc_service import RVCService # Our new service
import asyncio
from voice_processing import parallel_tts, get_model_names
import sys
from datetime import datetime # Add this import
import traceback
import json
# Set up enhanced logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
handlers=[
logging.FileHandler('rvc_server.log'),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger('rvc_server')
# Initialize RVC Service
rvc_service = RVCService()
def setup_request_logging():
"""Creates a unique logger for request handling"""
request_id = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
logger = logging.getLogger(f'request_{request_id}')
return logger, request_id
def convert_tts(model_name, audio_file, slang_rate):
"""Voice conversion endpoint"""
req_logger, request_id = setup_request_logging()
try:
req_logger.info(f"New request received - ID: {request_id}")
req_logger.info(f"Parameters: model={model_name}, slang_rate={slang_rate}")
if audio_file is None:
req_logger.error("No audio file provided")
return {"error": "No audio file uploaded."}, None
try:
req_logger.info(f"Processing audio file: {audio_file}")
sr, audio = wavfile.read(audio_file)
req_logger.info(f"Audio loaded: sr={sr}Hz, shape={audio.shape}")
# Create task for parallel processing
task = (model_name, None, None, slang_rate, True, audio_file)
req_logger.info("Running parallel processing")
result = parallel_tts([task])
if not result or result[0] is None:
req_logger.error("Processing failed - no result")
return {"error": "Processing failed"}, None
# Get the result and return directly
result_tuple = result[0]
# Check if result_tuple is in the expected format
if isinstance(result_tuple, tuple) and len(result_tuple) == 3:
info, _, (tgt_sr, audio_output) = result_tuple
if audio_output is None:
req_logger.error("No audio output generated")
return {"error": "No audio output generated"}, None
# Save the output
output_filename = f"output_{request_id}.wav"
output_path = os.path.join("outputs", output_filename)
os.makedirs("outputs", exist_ok=True)
if isinstance(audio_output, np.ndarray):
req_logger.info(f"Saving numpy array output: shape={audio_output.shape}")
wavfile.write(output_path, tgt_sr, audio_output)
else:
req_logger.info("Saving raw audio output")
with open(output_path, "wb") as f:
f.write(audio_output)
req_logger.info(f"Successfully saved to {output_path}")
return {"info": info}, output_path
else:
req_logger.error(f"Invalid result format: {result_tuple}")
return {"error": "Invalid result format"}, None
except Exception as e:
req_logger.error(f"Error processing audio: {str(e)}")
return {"error": f"Processing error: {str(e)}"}, None
except Exception as e:
req_logger.error(f"Unexpected error: {str(e)}")
return {"error": str(e)}, None
# Create the Gradio interface with queue
iface = gr.Interface(
fn=convert_tts, # Remove async if it's there
inputs=[
gr.Dropdown(choices=get_model_names(), label="Model", interactive=True),
gr.Audio(label="Upload Audio", type="filepath"),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
],
outputs=[
gr.JSON(label="Info"),
gr.Audio(label="Converted Audio")
],
title="Voice Conversion"
).queue()
if __name__ == "__main__":
logger.info("Starting RVC server")
try:
iface.launch(
debug=True,
show_error=True,
max_threads=10
)
except Exception as e:
logger.error(f"Error launching server: {e}", exc_info=True) |