Update voice_processing.py
Browse files- voice_processing.py +9 -8
voice_processing.py
CHANGED
@@ -92,21 +92,25 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
|
|
92 |
try:
|
93 |
logger.info("Starting audio processing")
|
94 |
|
95 |
-
#
|
|
|
|
|
|
|
|
|
96 |
audio, sr = librosa.load(audio_file, sr=16000, mono=True)
|
97 |
logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
|
98 |
|
99 |
-
# Get model data
|
100 |
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
101 |
|
102 |
# Set RMVPE
|
103 |
vc.model_rmvpe = rmvpe_model
|
104 |
|
105 |
-
# Process using
|
106 |
times = [0, 0, 0]
|
107 |
audio_opt = vc.pipeline(
|
108 |
-
hubert_model,
|
109 |
-
net_g,
|
110 |
0, # speaker id
|
111 |
audio,
|
112 |
audio_file,
|
@@ -125,9 +129,6 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
|
|
125 |
f0_file=None
|
126 |
)
|
127 |
|
128 |
-
if tgt_sr != 0 and tgt_sr >= 16000:
|
129 |
-
tgt_sr = resample_sr
|
130 |
-
|
131 |
info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
|
132 |
logger.info(info)
|
133 |
return (info, None, (tgt_sr, audio_opt))
|
|
|
92 |
try:
|
93 |
logger.info("Starting audio processing")
|
94 |
|
95 |
+
# Get model name from audio file path
|
96 |
+
model_name = os.path.basename(os.path.dirname(audio_file))
|
97 |
+
logger.info(f"Processing for model: {model_name}")
|
98 |
+
|
99 |
+
# Load audio using librosa directly
|
100 |
audio, sr = librosa.load(audio_file, sr=16000, mono=True)
|
101 |
logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
|
102 |
|
103 |
+
# Get model data
|
104 |
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
105 |
|
106 |
# Set RMVPE
|
107 |
vc.model_rmvpe = rmvpe_model
|
108 |
|
109 |
+
# Process using VC pipeline
|
110 |
times = [0, 0, 0]
|
111 |
audio_opt = vc.pipeline(
|
112 |
+
hubert_model,
|
113 |
+
net_g,
|
114 |
0, # speaker id
|
115 |
audio,
|
116 |
audio_file,
|
|
|
129 |
f0_file=None
|
130 |
)
|
131 |
|
|
|
|
|
|
|
132 |
info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
|
133 |
logger.info(info)
|
134 |
return (info, None, (tgt_sr, audio_opt))
|