Spaces:

mazalaai
/

tts

Sleeping

MAZALA2024 commited on Dec 14, 2024

Commit

be4df6b

verified ·

1 Parent(s): 13a3616

Update voice_processing.py

Files changed (1) hide show

voice_processing.py CHANGED Viewed

@@ -92,21 +92,25 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
     try:
         logger.info("Starting audio processing")
-        # Load audio using librosa directly (matching original working code)
         audio, sr = librosa.load(audio_file, sr=16000, mono=True)
         logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
-        # Get model data using existing function
         tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
         # Set RMVPE
         vc.model_rmvpe = rmvpe_model
-        # Process using the VC pipeline that we know works
         times = [0, 0, 0]
         audio_opt = vc.pipeline(
-            hubert_model,  # Use global hubert model
-            net_g,  # Use the generator from model_data
             0,  # speaker id
             audio,
             audio_file,
@@ -125,9 +129,6 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
             f0_file=None
         )
-        if tgt_sr != 0 and tgt_sr >= 16000:
-            tgt_sr = resample_sr
         info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
         logger.info(info)
         return (info, None, (tgt_sr, audio_opt))

     try:
         logger.info("Starting audio processing")
+        # Get model name from audio file path
+        model_name = os.path.basename(os.path.dirname(audio_file))
+        logger.info(f"Processing for model: {model_name}")
+        # Load audio using librosa directly
         audio, sr = librosa.load(audio_file, sr=16000, mono=True)
         logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
+        # Get model data
         tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
         # Set RMVPE
         vc.model_rmvpe = rmvpe_model
+        # Process using VC pipeline
         times = [0, 0, 0]
         audio_opt = vc.pipeline(
+            hubert_model,
+            net_g,
             0,  # speaker id
             audio,
             audio_file,
             f0_file=None
         )
         info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
         logger.info(info)
         return (info, None, (tgt_sr, audio_opt))