Kit-Lemonfoot commited on
Commit
cf9bca8
·
1 Parent(s): 73e6a9f

Added some experimental preloading of the RMVPE and VC models to hopefully ease inference time. May break stuff.

Browse files
Files changed (2) hide show
  1. app.py +19 -8
  2. vc_infer_pipeline.py +17 -9
app.py CHANGED
@@ -33,15 +33,26 @@ limitation = os.getenv("SYSTEM") == "spaces"
33
  #limitation=True
34
 
35
  audio_mode = []
36
- f0method_mode = ["pm", "crepe", "harvest"]
37
- f0method_info = "PM is fast but low quality, crepe and harvest are slow but good quality, RMVPE is the best of both worlds. (Default: RMVPE))"
38
  if limitation is True:
 
39
  audio_mode = ["TTS Audio", "Upload audio"]
 
40
  else:
 
41
  audio_mode = ["TTS Audio", "Youtube", "Upload audio"]
 
42
 
43
- if os.path.isfile("rmvpe.pt"):
44
- f0method_mode.append("rmvpe")
 
 
 
 
 
 
 
 
45
 
46
  def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect):
47
  try:
@@ -99,10 +110,10 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
99
  net_g = net_g.half()
100
  else:
101
  net_g = net_g.float()
102
- vc = VC(tgt_sr, config)
103
 
104
  #Gen audio
105
- audio_opt = vc.pipeline(
106
  hubert_model,
107
  net_g,
108
  0,
@@ -125,7 +136,7 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
125
  )
126
  info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
127
  print(f"Successful inference with model {name} | {tts_text} | {info}")
128
- del net_g, vc, cpt
129
  return info, (tgt_sr, audio_opt)
130
  except:
131
  info = traceback.format_exc()
@@ -516,7 +527,7 @@ if __name__ == '__main__':
516
  "#### <center>Original devs:\n"
517
  "<center>the RVC Project, lj1995, zomehwh \n\n"
518
  "#### <center>Model creators:\n"
519
- "<center>dacoolkid44, Hijack, Maki Ligon, megaaziib, KitLemonfoot, yeey5, Sui, MahdeenSky, Itaxhix, Acato, Kyuubical, MartinFLL, Listra92, IshimaIshimsky, ZomballTH, Jotape91, RigidSpinner, RandomAssBettel, Mimizukari, Oida, Shu-Kun, Nhat Minh, Ardha27, Legitdark, TempoHawk, 0x3e9, Kaiaya, Skeetawn, Sonphantrung, Pianissimo, Gloomwastragic, Sunesu, Aimbo, Act8113, Blyxeen\n"
520
  )
521
  if limitation is True:
522
  app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
 
33
  #limitation=True
34
 
35
  audio_mode = []
36
+ f0method_mode = []
 
37
  if limitation is True:
38
+ f0method_info = "PM is better for testing, RMVPE is better for finalized generations. (Default: RMVPE)"
39
  audio_mode = ["TTS Audio", "Upload audio"]
40
+ f0method_mode = ["pm", "rmvpe"]
41
  else:
42
+ f0method_info = "PM is fast but low quality, crepe and harvest are slow but good quality, RMVPE is the best of both worlds. (Default: RMVPE)"
43
  audio_mode = ["TTS Audio", "Youtube", "Upload audio"]
44
+ f0method_mode = ["pm", "crepe", "harvest", "rmvpe"]
45
 
46
+ #if os.path.isfile("rmvpe.pt"):
47
+ # f0method_mode.append("rmvpe")
48
+
49
+
50
+ #Eagerload VCs
51
+ print("Preloading VCs...")
52
+ vcArr=[]
53
+ vcArr.append(VC(32000, config))
54
+ vcArr.append(VC(40000, config))
55
+ vcArr.append(VC(48000, config))
56
 
57
  def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect):
58
  try:
 
110
  net_g = net_g.half()
111
  else:
112
  net_g = net_g.float()
113
+ vcIdx = int((tgt_sr/8000)-4)
114
 
115
  #Gen audio
116
+ audio_opt = vcArr[vcIdx].pipeline(
117
  hubert_model,
118
  net_g,
119
  0,
 
136
  )
137
  info = f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
138
  print(f"Successful inference with model {name} | {tts_text} | {info}")
139
+ del net_g, cpt
140
  return info, (tgt_sr, audio_opt)
141
  except:
142
  info = traceback.format_exc()
 
527
  "#### <center>Original devs:\n"
528
  "<center>the RVC Project, lj1995, zomehwh \n\n"
529
  "#### <center>Model creators:\n"
530
+ "<center>dacoolkid44, Hijack, Maki Ligon, megaaziib, KitLemonfoot, yeey5, Sui, MahdeenSky, Itaxhix, Acato, Kyuubical, Listra92, IshimaIshimsky, ZomballTH, Jotape91, RigidSpinner, RandomAssBettel, Mimizukari, Oida, Shu-Kun, Nhat Minh, Ardha27, Legitdark, TempoHawk, 0x3e9, Kaiaya, Skeetawn, Sonphantrung, Pianissimo, Gloomwastragic, Sunesu, Aimbo, Act8113, Blyxeen\n"
531
  )
532
  if limitation is True:
533
  app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
vc_infer_pipeline.py CHANGED
@@ -13,6 +13,14 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
13
 
14
  input_audio_path2wav = {}
15
 
 
 
 
 
 
 
 
 
16
 
17
  @lru_cache
18
  def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
@@ -128,14 +136,14 @@ class VC(object):
128
  f0[pd < 0.1] = 0
129
  f0 = f0[0].cpu().numpy()
130
  elif f0_method == "rmvpe":
131
- if hasattr(self, "model_rmvpe") == False:
132
- from rmvpe import RMVPE
133
-
134
- print("loading rmvpe model")
135
- self.model_rmvpe = RMVPE(
136
- "rmvpe.pt", is_half=self.is_half, device=self.device
137
- )
138
- f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
139
  f0 *= pow(2, f0_up_key / 12)
140
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
141
  tf0 = self.sr // self.window # 每秒f0点数
@@ -440,4 +448,4 @@ class VC(object):
440
  del pitch, pitchf, sid
441
  if torch.cuda.is_available():
442
  torch.cuda.empty_cache()
443
- return audio_opt
 
13
 
14
  input_audio_path2wav = {}
15
 
16
+ #Attempting a eagerload of the RMVPE model here.
17
+ from config import Config
18
+ config = Config()
19
+ from rmvpe import RMVPE
20
+ print("Preloading RMVPE model")
21
+ model_rmvpe = RMVPE("rmvpe.pt", is_half=config.is_half, device=config.device)
22
+ del config
23
+
24
 
25
  @lru_cache
26
  def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
 
136
  f0[pd < 0.1] = 0
137
  f0 = f0[0].cpu().numpy()
138
  elif f0_method == "rmvpe":
139
+ ## if hasattr(self, "model_rmvpe") == False:
140
+ ## from rmvpe import RMVPE
141
+ ##
142
+ ## print("loading rmvpe model")
143
+ ## self.model_rmvpe = RMVPE(
144
+ ## "rmvpe.pt", is_half=self.is_half, device=self.device
145
+ ## )
146
+ f0 = model_rmvpe.infer_from_audio(x, thred=0.03)
147
  f0 *= pow(2, f0_up_key / 12)
148
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
149
  tf0 = self.sr // self.window # 每秒f0点数
 
448
  del pitch, pitchf, sid
449
  if torch.cuda.is_available():
450
  torch.cuda.empty_cache()
451
+ return audio_opt