MAZALA2024 commited on
Commit
be4df6b
·
verified ·
1 Parent(s): 13a3616

Update voice_processing.py

Browse files
Files changed (1) hide show
  1. voice_processing.py +9 -8
voice_processing.py CHANGED
@@ -92,21 +92,25 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
92
  try:
93
  logger.info("Starting audio processing")
94
 
95
- # Load audio using librosa directly (matching original working code)
 
 
 
 
96
  audio, sr = librosa.load(audio_file, sr=16000, mono=True)
97
  logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
98
 
99
- # Get model data using existing function
100
  tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
101
 
102
  # Set RMVPE
103
  vc.model_rmvpe = rmvpe_model
104
 
105
- # Process using the VC pipeline that we know works
106
  times = [0, 0, 0]
107
  audio_opt = vc.pipeline(
108
- hubert_model, # Use global hubert model
109
- net_g, # Use the generator from model_data
110
  0, # speaker id
111
  audio,
112
  audio_file,
@@ -125,9 +129,6 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
125
  f0_file=None
126
  )
127
 
128
- if tgt_sr != 0 and tgt_sr >= 16000:
129
- tgt_sr = resample_sr
130
-
131
  info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
132
  logger.info(info)
133
  return (info, None, (tgt_sr, audio_opt))
 
92
  try:
93
  logger.info("Starting audio processing")
94
 
95
+ # Get model name from audio file path
96
+ model_name = os.path.basename(os.path.dirname(audio_file))
97
+ logger.info(f"Processing for model: {model_name}")
98
+
99
+ # Load audio using librosa directly
100
  audio, sr = librosa.load(audio_file, sr=16000, mono=True)
101
  logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
102
 
103
+ # Get model data
104
  tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
105
 
106
  # Set RMVPE
107
  vc.model_rmvpe = rmvpe_model
108
 
109
+ # Process using VC pipeline
110
  times = [0, 0, 0]
111
  audio_opt = vc.pipeline(
112
+ hubert_model,
113
+ net_g,
114
  0, # speaker id
115
  audio,
116
  audio_file,
 
129
  f0_file=None
130
  )
131
 
 
 
 
132
  info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
133
  logger.info(info)
134
  return (info, None, (tgt_sr, audio_opt))