File size: 4,537 Bytes
06dd398
 
 
 
 
49e8fad
6dc7b04
555564b
 
516b487
10ee224
 
 
 
8290230
516b487
 
 
 
 
 
 
 
 
 
 
 
10ee224
 
 
516b487
 
 
 
 
93f33db
d60199c
 
516b487
 
06dd398
516b487
 
6dc7b04
 
516b487
6dc7b04
 
b853861
461e4d3
 
d60199c
06dd398
b853861
461e4d3
b853861
 
23e22b5
d60199c
23e22b5
 
 
251af67
461e4d3
 
 
251af67
461e4d3
 
d60199c
461e4d3
 
 
23e22b5
251af67
d60199c
 
 
 
 
461e4d3
d60199c
 
461e4d3
d60199c
 
251af67
d60199c
 
461e4d3
 
 
d60199c
461e4d3
 
 
23e22b5
06dd398
461e4d3
516b487
b853861
516b487
06dd398
2e30408
06dd398
6dc7b04
b39fefc
06dd398
 
 
 
1b123c7
06dd398
516b487
06dd398
 
49e8fad
516b487
b4bd731
 
 
 
516b487
b4bd731
516b487
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names
import os
import logging
from rvc_service import RVCService  # Our new service
import asyncio
from voice_processing import parallel_tts, get_model_names
import sys
from datetime import datetime  # Add this import
import traceback
import json


# Set up enhanced logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
    handlers=[
        logging.FileHandler('rvc_server.log'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger('rvc_server')

# Initialize RVC Service
rvc_service = RVCService()

def setup_request_logging():
    """Creates a unique logger for request handling"""
    request_id = datetime.now().strftime('%Y%m%d_%H%M%S_%f')
    logger = logging.getLogger(f'request_{request_id}')
    return logger, request_id

def convert_tts(model_name, audio_file, slang_rate):
    """Voice conversion endpoint"""
    req_logger, request_id = setup_request_logging()
    
    try:
        req_logger.info(f"New request received - ID: {request_id}")
        req_logger.info(f"Parameters: model={model_name}, slang_rate={slang_rate}")
        
        if audio_file is None:
            req_logger.error("No audio file provided")
            return {"error": "No audio file uploaded."}, None

        try:
            req_logger.info(f"Processing audio file: {audio_file}")
            sr, audio = wavfile.read(audio_file)
            req_logger.info(f"Audio loaded: sr={sr}Hz, shape={audio.shape}")

            # Create task for parallel processing
            task = (model_name, None, None, slang_rate, True, audio_file)
            
            req_logger.info("Running parallel processing")
            result = parallel_tts([task])
            
            if not result or result[0] is None:
                req_logger.error("Processing failed - no result")
                return {"error": "Processing failed"}, None

            # Get the result and return directly
            result_tuple = result[0]
            
            # Check if result_tuple is in the expected format
            if isinstance(result_tuple, tuple) and len(result_tuple) == 3:
                info, _, (tgt_sr, audio_output) = result_tuple
                
                if audio_output is None:
                    req_logger.error("No audio output generated")
                    return {"error": "No audio output generated"}, None

                # Save the output
                output_filename = f"output_{request_id}.wav"
                output_path = os.path.join("outputs", output_filename)
                os.makedirs("outputs", exist_ok=True)

                if isinstance(audio_output, np.ndarray):
                    req_logger.info(f"Saving numpy array output: shape={audio_output.shape}")
                    wavfile.write(output_path, tgt_sr, audio_output)
                else:
                    req_logger.info("Saving raw audio output")
                    with open(output_path, "wb") as f:
                        f.write(audio_output)
                
                req_logger.info(f"Successfully saved to {output_path}")
                return {"info": info}, output_path
            else:
                req_logger.error(f"Invalid result format: {result_tuple}")
                return {"error": "Invalid result format"}, None

        except Exception as e:
            req_logger.error(f"Error processing audio: {str(e)}")
            return {"error": f"Processing error: {str(e)}"}, None

    except Exception as e:
        req_logger.error(f"Unexpected error: {str(e)}")
        return {"error": str(e)}, None
        
# Create the Gradio interface with queue
iface = gr.Interface(
    fn=convert_tts,  # Remove async if it's there
    inputs=[
        gr.Dropdown(choices=get_model_names(), label="Model", interactive=True),
        gr.Audio(label="Upload Audio", type="filepath"),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
    ],
    outputs=[
        gr.JSON(label="Info"),
        gr.Audio(label="Converted Audio")
    ],
    title="Voice Conversion"
).queue()

if __name__ == "__main__":
    logger.info("Starting RVC server")
    try:
        iface.launch(
            debug=True,
            show_error=True,
            max_threads=10
        )
    except Exception as e:
        logger.error(f"Error launching server: {e}", exc_info=True)