ArkanDash commited on
Commit
db06f79
1 Parent(s): d028cb0

feat(app): add support for model_info.json

Browse files
Files changed (5) hide show
  1. app-full.py +62 -32
  2. app.py +62 -31
  3. config.py +5 -5
  4. requirements-full.txt +0 -1
  5. weights/model_info.json +10 -0
app-full.py CHANGED
@@ -151,36 +151,65 @@ if __name__ == '__main__':
151
  models = []
152
  tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
153
  voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
154
- folder_path = "weights"
155
- for name in os.listdir(folder_path):
156
- print("check folder: " + name)
157
- if name.startswith("."): break
158
- cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
159
- index_path = glob.glob(f"{folder_path}/{name}/*.index")
160
- checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
161
- title = name
162
- if cover_path:
163
- cover = cover_path[0]
164
- else:
165
- cover = ""
166
- index = index_path[0]
167
- cpt = torch.load(checkpoint_path[0], map_location="cpu")
168
- tgt_sr = cpt["config"][-1]
169
- cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
170
- if_f0 = cpt.get("f0", 1)
171
- if if_f0 == 1:
172
- net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
173
- else:
174
- net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
175
- del net_g.enc_q
176
- print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
177
- net_g.eval().to(config.device)
178
- if config.is_half:
179
- net_g = net_g.half()
180
- else:
181
- net_g = net_g.float()
182
- vc = VC(tgt_sr, config)
183
- models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  with gr.Blocks() as app:
185
  gr.Markdown(
186
  "# <center> RVC Models (Latest Update)\n"
@@ -190,12 +219,13 @@ if __name__ == '__main__':
190
  "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
191
  )
192
  with gr.Tabs():
193
- for (name, title, cover, vc_fn) in models:
194
  with gr.TabItem(name):
195
  with gr.Row():
196
  gr.Markdown(
197
  '<div align="center">'
198
  f'<div>{title}</div>\n'+
 
199
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
200
  '</div>'
201
  )
@@ -251,4 +281,4 @@ if __name__ == '__main__':
251
  if config.files:
252
  vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
253
  vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
254
- app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.share)
 
151
  models = []
152
  tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
153
  voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
154
+ if config.json:
155
+ with open("weights/model_info.json", "r", encoding="utf-8") as f:
156
+ models_info = json.load(f)
157
+ for name, info in models_info.items():
158
+ if not info['enable']:
159
+ continue
160
+ title = info['title']
161
+ author = info.get("author", None)
162
+ cover = f"weights/{name}/{info['cover']}"
163
+ index = f"weights/{name}/{info['feature_retrieval_library']}"
164
+ cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
165
+ tgt_sr = cpt["config"][-1]
166
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
167
+ if_f0 = cpt.get("f0", 1)
168
+ if if_f0 == 1:
169
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
170
+ else:
171
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
172
+ del net_g.enc_q
173
+ print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
174
+ net_g.eval().to(config.device)
175
+ if config.is_half:
176
+ net_g = net_g.half()
177
+ else:
178
+ net_g = net_g.float()
179
+ vc = VC(tgt_sr, config)
180
+ models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
181
+ else:
182
+ folder_path = "weights"
183
+ for name in os.listdir(folder_path):
184
+ print("check folder: " + name)
185
+ if name.startswith("."): break
186
+ cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
187
+ index_path = glob.glob(f"{folder_path}/{name}/*.index")
188
+ checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
189
+ title = name
190
+ author = ""
191
+ if cover_path:
192
+ cover = cover_path[0]
193
+ else:
194
+ cover = ""
195
+ index = index_path[0]
196
+ cpt = torch.load(checkpoint_path[0], map_location="cpu")
197
+ tgt_sr = cpt["config"][-1]
198
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
199
+ if_f0 = cpt.get("f0", 1)
200
+ if if_f0 == 1:
201
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
202
+ else:
203
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
204
+ del net_g.enc_q
205
+ print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
206
+ net_g.eval().to(config.device)
207
+ if config.is_half:
208
+ net_g = net_g.half()
209
+ else:
210
+ net_g = net_g.float()
211
+ vc = VC(tgt_sr, config)
212
+ models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
213
  with gr.Blocks() as app:
214
  gr.Markdown(
215
  "# <center> RVC Models (Latest Update)\n"
 
219
  "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
220
  )
221
  with gr.Tabs():
222
+ for (name, title, author, cover, vc_fn) in models:
223
  with gr.TabItem(name):
224
  with gr.Row():
225
  gr.Markdown(
226
  '<div align="center">'
227
  f'<div>{title}</div>\n'+
228
+ (f'<div>Model author: {author}</div>' if author else "")+
229
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
230
  '</div>'
231
  )
 
281
  if config.files:
282
  vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
283
  vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
284
+ app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
app.py CHANGED
@@ -102,41 +102,71 @@ if __name__ == '__main__':
102
  models = []
103
  tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
104
  voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
105
- folder_path = "weights"
106
- for name in os.listdir(folder_path):
107
- print("check folder: " + name)
108
- if name.startswith("."): break
109
- cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
110
- index_path = glob.glob(f"{folder_path}/{name}/*.index")
111
- checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
112
- title = name
113
- if cover_path:
114
- cover = cover_path[0]
115
- else:
116
- cover = ""
117
- index = index_path[0]
118
- cpt = torch.load(checkpoint_path[0], map_location="cpu")
119
- tgt_sr = cpt["config"][-1]
120
- cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
121
- if_f0 = cpt.get("f0", 1)
122
- if if_f0 == 1:
123
- net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
124
- else:
125
- net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
126
- del net_g.enc_q
127
- print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
128
- net_g.eval().to(config.device)
129
- if config.is_half:
130
- net_g = net_g.half()
131
- else:
132
- net_g = net_g.float()
133
- vc = VC(tgt_sr, config)
134
- models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  with gr.Blocks() as app:
136
  gr.Markdown(
137
  "# <center> RVC Models (Latest Update)\n"
138
  "## <center> The input audio should be clean and pure voice without background music.\n"
139
  "### <center> Recommended to use google colab for more features. \n"
 
140
  "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
141
  "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
142
  )
@@ -147,6 +177,7 @@ if __name__ == '__main__':
147
  gr.Markdown(
148
  '<div align="center">'
149
  f'<div>{title}</div>\n'+
 
150
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
151
  '</div>'
152
  )
@@ -176,4 +207,4 @@ if __name__ == '__main__':
176
  vc_output2 = gr.Audio(label="Output Audio")
177
  vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
178
  tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
179
- app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.share)
 
102
  models = []
103
  tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
104
  voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
105
+ if config.json or limitation:
106
+ with open("weights/model_info.json", "r", encoding="utf-8") as f:
107
+ models_info = json.load(f)
108
+ for name, info in models_info.items():
109
+ if not info['enable']:
110
+ continue
111
+ title = info['title']
112
+ author = info.get("author", None)
113
+ cover = f"weights/{name}/{info['cover']}"
114
+ index = f"weights/{name}/{info['feature_retrieval_library']}"
115
+ cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
116
+ tgt_sr = cpt["config"][-1]
117
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
118
+ if_f0 = cpt.get("f0", 1)
119
+ if if_f0 == 1:
120
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
121
+ else:
122
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
123
+ del net_g.enc_q
124
+ print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
125
+ net_g.eval().to(config.device)
126
+ if config.is_half:
127
+ net_g = net_g.half()
128
+ else:
129
+ net_g = net_g.float()
130
+ vc = VC(tgt_sr, config)
131
+ models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
132
+ else:
133
+ folder_path = "weights"
134
+ for name in os.listdir(folder_path):
135
+ print("check folder: " + name)
136
+ if name.startswith("."): break
137
+ cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
138
+ index_path = glob.glob(f"{folder_path}/{name}/*.index")
139
+ checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
140
+ title = name
141
+ author = ""
142
+ if cover_path:
143
+ cover = cover_path[0]
144
+ else:
145
+ cover = ""
146
+ index = index_path[0]
147
+ cpt = torch.load(checkpoint_path[0], map_location="cpu")
148
+ tgt_sr = cpt["config"][-1]
149
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
150
+ if_f0 = cpt.get("f0", 1)
151
+ if if_f0 == 1:
152
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
153
+ else:
154
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
155
+ del net_g.enc_q
156
+ print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
157
+ net_g.eval().to(config.device)
158
+ if config.is_half:
159
+ net_g = net_g.half()
160
+ else:
161
+ net_g = net_g.float()
162
+ vc = VC(tgt_sr, config)
163
+ models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
164
  with gr.Blocks() as app:
165
  gr.Markdown(
166
  "# <center> RVC Models (Latest Update)\n"
167
  "## <center> The input audio should be clean and pure voice without background music.\n"
168
  "### <center> Recommended to use google colab for more features. \n"
169
+ "##### <center> Total_fea.npy is depricated.\nPlease regenerate your model to latest RVC.\n"
170
  "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
171
  "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
172
  )
 
177
  gr.Markdown(
178
  '<div align="center">'
179
  f'<div>{title}</div>\n'+
180
+ (f'<div>Model author: {author}</div>' if author else "")+
181
  (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
182
  '</div>'
183
  )
 
207
  vc_output2 = gr.Audio(label="Output Audio")
208
  vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
209
  tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
210
+ app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
config.py CHANGED
@@ -13,12 +13,12 @@ class Config:
13
  (
14
  self.python_cmd,
15
  self.listen_port,
16
- self.iscolab,
17
  self.noparallel,
18
  self.noautoopen,
19
  self.api,
20
- self.share,
21
- self.files
22
  ) = self.arg_parse()
23
  self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
24
 
@@ -39,8 +39,8 @@ class Config:
39
  help="Do not open in browser automatically",
40
  )
41
  parser.add_argument('--api', action="store_true", default=False)
42
- parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
43
  parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
 
44
  cmd_opts = parser.parse_args()
45
 
46
  cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
@@ -52,8 +52,8 @@ class Config:
52
  cmd_opts.noparallel,
53
  cmd_opts.noautoopen,
54
  cmd_opts.api,
55
- cmd_opts.share,
56
  cmd_opts.files,
 
57
  )
58
 
59
  def device_config(self) -> tuple:
 
13
  (
14
  self.python_cmd,
15
  self.listen_port,
16
+ self.colab,
17
  self.noparallel,
18
  self.noautoopen,
19
  self.api,
20
+ self.files,
21
+ self.json
22
  ) = self.arg_parse()
23
  self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
24
 
 
39
  help="Do not open in browser automatically",
40
  )
41
  parser.add_argument('--api', action="store_true", default=False)
 
42
  parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
43
+ parser.add_argument("--json", action="store_true", default=False, help="use model_info.json")
44
  cmd_opts = parser.parse_args()
45
 
46
  cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
 
52
  cmd_opts.noparallel,
53
  cmd_opts.noautoopen,
54
  cmd_opts.api,
 
55
  cmd_opts.files,
56
+ cmd_opts.json
57
  )
58
 
59
  def device_config(self) -> tuple:
requirements-full.txt CHANGED
@@ -44,6 +44,5 @@ audioread
44
  uvicorn>=0.21.1
45
  colorama>=0.4.6
46
  edge-tts
47
- demucs
48
  yt_dlp
49
  ffmpeg
 
44
  uvicorn>=0.21.1
45
  colorama>=0.4.6
46
  edge-tts
 
47
  yt_dlp
48
  ffmpeg
weights/model_info.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nilou-jp": {
3
+ "enable": true,
4
+ "name": "nilou-jp",
5
+ "title": "Genshin Impact - Nilou",
6
+ "cover": "cover.png",
7
+ "feature_retrieval_library": "added_IVF218_Flat_nprobe_5.index",
8
+ "author":"ArkanDash"
9
+ }
10
+ }