Update voice_processing.py
Browse files- voice_processing.py +13 -15
voice_processing.py
CHANGED
@@ -92,25 +92,30 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
|
|
92 |
try:
|
93 |
logger.info("Starting audio processing")
|
94 |
|
95 |
-
#
|
96 |
-
model_name = os.
|
|
|
|
|
|
|
|
|
|
|
97 |
logger.info(f"Processing for model: {model_name}")
|
98 |
|
99 |
# Load audio using librosa directly
|
100 |
audio, sr = librosa.load(audio_file, sr=16000, mono=True)
|
101 |
logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
|
102 |
|
103 |
-
# Get model data
|
104 |
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
105 |
|
106 |
# Set RMVPE
|
107 |
vc.model_rmvpe = rmvpe_model
|
108 |
|
109 |
-
# Process using VC pipeline
|
110 |
times = [0, 0, 0]
|
111 |
audio_opt = vc.pipeline(
|
112 |
-
hubert_model,
|
113 |
-
net_g,
|
114 |
0, # speaker id
|
115 |
audio,
|
116 |
audio_file,
|
@@ -243,7 +248,7 @@ def run_async_in_thread(fn, *args):
|
|
243 |
loop.close()
|
244 |
return result
|
245 |
|
246 |
-
def parallel_tts(tasks):
|
247 |
"""Process multiple TTS tasks"""
|
248 |
logger.info(f"Received {len(tasks)} tasks for processing")
|
249 |
results = []
|
@@ -261,14 +266,7 @@ def parallel_tts(tasks): # Remove any async here
|
|
261 |
results.append(None)
|
262 |
continue
|
263 |
|
264 |
-
result = process_audio(
|
265 |
-
model=model,
|
266 |
-
audio_file=audio_file,
|
267 |
-
logger=logger,
|
268 |
-
index_rate=0,
|
269 |
-
use_uploaded_voice=use_uploaded_voice,
|
270 |
-
uploaded_voice=None
|
271 |
-
)
|
272 |
logger.info(f"Processing completed for task {i+1}")
|
273 |
|
274 |
results.append(result)
|
|
|
92 |
try:
|
93 |
logger.info("Starting audio processing")
|
94 |
|
95 |
+
# The model name should come from parallel_tts task parameters
|
96 |
+
model_name = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")][0]
|
97 |
+
for m in os.listdir(model_root):
|
98 |
+
if os.path.isdir(f"{model_root}/{m}") and "mongolian7-male" in m:
|
99 |
+
model_name = m
|
100 |
+
break
|
101 |
+
|
102 |
logger.info(f"Processing for model: {model_name}")
|
103 |
|
104 |
# Load audio using librosa directly
|
105 |
audio, sr = librosa.load(audio_file, sr=16000, mono=True)
|
106 |
logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
|
107 |
|
108 |
+
# Get model data using existing function
|
109 |
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
110 |
|
111 |
# Set RMVPE
|
112 |
vc.model_rmvpe = rmvpe_model
|
113 |
|
114 |
+
# Process using the VC pipeline
|
115 |
times = [0, 0, 0]
|
116 |
audio_opt = vc.pipeline(
|
117 |
+
hubert_model, # Use global hubert model
|
118 |
+
net_g, # Use the generator from model_data
|
119 |
0, # speaker id
|
120 |
audio,
|
121 |
audio_file,
|
|
|
248 |
loop.close()
|
249 |
return result
|
250 |
|
251 |
+
def parallel_tts(tasks):
|
252 |
"""Process multiple TTS tasks"""
|
253 |
logger.info(f"Received {len(tasks)} tasks for processing")
|
254 |
results = []
|
|
|
266 |
results.append(None)
|
267 |
continue
|
268 |
|
269 |
+
result = process_audio(model, audio_file, logger)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
logger.info(f"Processing completed for task {i+1}")
|
271 |
|
272 |
results.append(result)
|