MAZALA2024 commited on
Commit
13a3616
·
verified ·
1 Parent(s): 8ebe7fa

Update voice_processing.py

Browse files
Files changed (1) hide show
  1. voice_processing.py +14 -9
voice_processing.py CHANGED
@@ -92,20 +92,22 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
92
  try:
93
  logger.info("Starting audio processing")
94
 
95
- if model is None:
96
- logger.error("No model provided for processing")
97
- return None
98
 
99
- # Load and process audio
100
  tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
101
- if f0_method == "rmvpe":
102
- vc.model_rmvpe = rmvpe_model
 
103
 
 
104
  times = [0, 0, 0]
105
  audio_opt = vc.pipeline(
106
- hubert_model,
107
- net_g,
108
- 0, # sid
109
  audio,
110
  audio_file,
111
  times,
@@ -123,6 +125,9 @@ def process_audio(model, audio_file, logger, index_rate=0, use_uploaded_voice=Tr
123
  f0_file=None
124
  )
125
 
 
 
 
126
  info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
127
  logger.info(info)
128
  return (info, None, (tgt_sr, audio_opt))
 
92
  try:
93
  logger.info("Starting audio processing")
94
 
95
+ # Load audio using librosa directly (matching original working code)
96
+ audio, sr = librosa.load(audio_file, sr=16000, mono=True)
97
+ logger.info(f"Loaded audio: sr={sr}Hz, shape={audio.shape}")
98
 
99
+ # Get model data using existing function
100
  tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
101
+
102
+ # Set RMVPE
103
+ vc.model_rmvpe = rmvpe_model
104
 
105
+ # Process using the VC pipeline that we know works
106
  times = [0, 0, 0]
107
  audio_opt = vc.pipeline(
108
+ hubert_model, # Use global hubert model
109
+ net_g, # Use the generator from model_data
110
+ 0, # speaker id
111
  audio,
112
  audio_file,
113
  times,
 
125
  f0_file=None
126
  )
127
 
128
+ if tgt_sr != 0 and tgt_sr >= 16000:
129
+ tgt_sr = resample_sr
130
+
131
  info = f"Success. Time: npy: {times[0]}s, f0: {times[1]}s, infer: {times[2]}s"
132
  logger.info(info)
133
  return (info, None, (tgt_sr, audio_opt))