feat(app): add support for model_info.json
Browse files- app-full.py +62 -32
- app.py +62 -31
- config.py +5 -5
- requirements-full.txt +0 -1
- weights/model_info.json +10 -0
app-full.py
CHANGED
@@ -151,36 +151,65 @@ if __name__ == '__main__':
|
|
151 |
models = []
|
152 |
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
|
153 |
voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
net_g
|
173 |
-
|
174 |
-
net_g
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
with gr.Blocks() as app:
|
185 |
gr.Markdown(
|
186 |
"# <center> RVC Models (Latest Update)\n"
|
@@ -190,12 +219,13 @@ if __name__ == '__main__':
|
|
190 |
"[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
|
191 |
)
|
192 |
with gr.Tabs():
|
193 |
-
for (name, title, cover, vc_fn) in models:
|
194 |
with gr.TabItem(name):
|
195 |
with gr.Row():
|
196 |
gr.Markdown(
|
197 |
'<div align="center">'
|
198 |
f'<div>{title}</div>\n'+
|
|
|
199 |
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
|
200 |
'</div>'
|
201 |
)
|
@@ -251,4 +281,4 @@ if __name__ == '__main__':
|
|
251 |
if config.files:
|
252 |
vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
|
253 |
vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
|
254 |
-
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.
|
|
|
151 |
models = []
|
152 |
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
|
153 |
voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
154 |
+
if config.json:
|
155 |
+
with open("weights/model_info.json", "r", encoding="utf-8") as f:
|
156 |
+
models_info = json.load(f)
|
157 |
+
for name, info in models_info.items():
|
158 |
+
if not info['enable']:
|
159 |
+
continue
|
160 |
+
title = info['title']
|
161 |
+
author = info.get("author", None)
|
162 |
+
cover = f"weights/{name}/{info['cover']}"
|
163 |
+
index = f"weights/{name}/{info['feature_retrieval_library']}"
|
164 |
+
cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
|
165 |
+
tgt_sr = cpt["config"][-1]
|
166 |
+
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
167 |
+
if_f0 = cpt.get("f0", 1)
|
168 |
+
if if_f0 == 1:
|
169 |
+
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
170 |
+
else:
|
171 |
+
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
172 |
+
del net_g.enc_q
|
173 |
+
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
|
174 |
+
net_g.eval().to(config.device)
|
175 |
+
if config.is_half:
|
176 |
+
net_g = net_g.half()
|
177 |
+
else:
|
178 |
+
net_g = net_g.float()
|
179 |
+
vc = VC(tgt_sr, config)
|
180 |
+
models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
|
181 |
+
else:
|
182 |
+
folder_path = "weights"
|
183 |
+
for name in os.listdir(folder_path):
|
184 |
+
print("check folder: " + name)
|
185 |
+
if name.startswith("."): break
|
186 |
+
cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
|
187 |
+
index_path = glob.glob(f"{folder_path}/{name}/*.index")
|
188 |
+
checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
|
189 |
+
title = name
|
190 |
+
author = ""
|
191 |
+
if cover_path:
|
192 |
+
cover = cover_path[0]
|
193 |
+
else:
|
194 |
+
cover = ""
|
195 |
+
index = index_path[0]
|
196 |
+
cpt = torch.load(checkpoint_path[0], map_location="cpu")
|
197 |
+
tgt_sr = cpt["config"][-1]
|
198 |
+
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
199 |
+
if_f0 = cpt.get("f0", 1)
|
200 |
+
if if_f0 == 1:
|
201 |
+
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
202 |
+
else:
|
203 |
+
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
204 |
+
del net_g.enc_q
|
205 |
+
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
|
206 |
+
net_g.eval().to(config.device)
|
207 |
+
if config.is_half:
|
208 |
+
net_g = net_g.half()
|
209 |
+
else:
|
210 |
+
net_g = net_g.float()
|
211 |
+
vc = VC(tgt_sr, config)
|
212 |
+
models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
|
213 |
with gr.Blocks() as app:
|
214 |
gr.Markdown(
|
215 |
"# <center> RVC Models (Latest Update)\n"
|
|
|
219 |
"[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
|
220 |
)
|
221 |
with gr.Tabs():
|
222 |
+
for (name, title, author, cover, vc_fn) in models:
|
223 |
with gr.TabItem(name):
|
224 |
with gr.Row():
|
225 |
gr.Markdown(
|
226 |
'<div align="center">'
|
227 |
f'<div>{title}</div>\n'+
|
228 |
+
(f'<div>Model author: {author}</div>' if author else "")+
|
229 |
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
|
230 |
'</div>'
|
231 |
)
|
|
|
281 |
if config.files:
|
282 |
vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
|
283 |
vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
|
284 |
+
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
|
app.py
CHANGED
@@ -102,41 +102,71 @@ if __name__ == '__main__':
|
|
102 |
models = []
|
103 |
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
|
104 |
voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
net_g
|
124 |
-
|
125 |
-
net_g
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
with gr.Blocks() as app:
|
136 |
gr.Markdown(
|
137 |
"# <center> RVC Models (Latest Update)\n"
|
138 |
"## <center> The input audio should be clean and pure voice without background music.\n"
|
139 |
"### <center> Recommended to use google colab for more features. \n"
|
|
|
140 |
"[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
|
141 |
"[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
|
142 |
)
|
@@ -147,6 +177,7 @@ if __name__ == '__main__':
|
|
147 |
gr.Markdown(
|
148 |
'<div align="center">'
|
149 |
f'<div>{title}</div>\n'+
|
|
|
150 |
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
|
151 |
'</div>'
|
152 |
)
|
@@ -176,4 +207,4 @@ if __name__ == '__main__':
|
|
176 |
vc_output2 = gr.Audio(label="Output Audio")
|
177 |
vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
|
178 |
tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
|
179 |
-
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.
|
|
|
102 |
models = []
|
103 |
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
|
104 |
voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
105 |
+
if config.json or limitation:
|
106 |
+
with open("weights/model_info.json", "r", encoding="utf-8") as f:
|
107 |
+
models_info = json.load(f)
|
108 |
+
for name, info in models_info.items():
|
109 |
+
if not info['enable']:
|
110 |
+
continue
|
111 |
+
title = info['title']
|
112 |
+
author = info.get("author", None)
|
113 |
+
cover = f"weights/{name}/{info['cover']}"
|
114 |
+
index = f"weights/{name}/{info['feature_retrieval_library']}"
|
115 |
+
cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
|
116 |
+
tgt_sr = cpt["config"][-1]
|
117 |
+
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
118 |
+
if_f0 = cpt.get("f0", 1)
|
119 |
+
if if_f0 == 1:
|
120 |
+
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
121 |
+
else:
|
122 |
+
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
123 |
+
del net_g.enc_q
|
124 |
+
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
|
125 |
+
net_g.eval().to(config.device)
|
126 |
+
if config.is_half:
|
127 |
+
net_g = net_g.half()
|
128 |
+
else:
|
129 |
+
net_g = net_g.float()
|
130 |
+
vc = VC(tgt_sr, config)
|
131 |
+
models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
|
132 |
+
else:
|
133 |
+
folder_path = "weights"
|
134 |
+
for name in os.listdir(folder_path):
|
135 |
+
print("check folder: " + name)
|
136 |
+
if name.startswith("."): break
|
137 |
+
cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
|
138 |
+
index_path = glob.glob(f"{folder_path}/{name}/*.index")
|
139 |
+
checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
|
140 |
+
title = name
|
141 |
+
author = ""
|
142 |
+
if cover_path:
|
143 |
+
cover = cover_path[0]
|
144 |
+
else:
|
145 |
+
cover = ""
|
146 |
+
index = index_path[0]
|
147 |
+
cpt = torch.load(checkpoint_path[0], map_location="cpu")
|
148 |
+
tgt_sr = cpt["config"][-1]
|
149 |
+
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
150 |
+
if_f0 = cpt.get("f0", 1)
|
151 |
+
if if_f0 == 1:
|
152 |
+
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
153 |
+
else:
|
154 |
+
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
155 |
+
del net_g.enc_q
|
156 |
+
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
|
157 |
+
net_g.eval().to(config.device)
|
158 |
+
if config.is_half:
|
159 |
+
net_g = net_g.half()
|
160 |
+
else:
|
161 |
+
net_g = net_g.float()
|
162 |
+
vc = VC(tgt_sr, config)
|
163 |
+
models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
|
164 |
with gr.Blocks() as app:
|
165 |
gr.Markdown(
|
166 |
"# <center> RVC Models (Latest Update)\n"
|
167 |
"## <center> The input audio should be clean and pure voice without background music.\n"
|
168 |
"### <center> Recommended to use google colab for more features. \n"
|
169 |
+
"##### <center> Total_fea.npy is depricated.\nPlease regenerate your model to latest RVC.\n"
|
170 |
"[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
|
171 |
"[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
|
172 |
)
|
|
|
177 |
gr.Markdown(
|
178 |
'<div align="center">'
|
179 |
f'<div>{title}</div>\n'+
|
180 |
+
(f'<div>Model author: {author}</div>' if author else "")+
|
181 |
(f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
|
182 |
'</div>'
|
183 |
)
|
|
|
207 |
vc_output2 = gr.Audio(label="Output Audio")
|
208 |
vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
|
209 |
tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
|
210 |
+
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
|
config.py
CHANGED
@@ -13,12 +13,12 @@ class Config:
|
|
13 |
(
|
14 |
self.python_cmd,
|
15 |
self.listen_port,
|
16 |
-
self.
|
17 |
self.noparallel,
|
18 |
self.noautoopen,
|
19 |
self.api,
|
20 |
-
self.
|
21 |
-
self.
|
22 |
) = self.arg_parse()
|
23 |
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
24 |
|
@@ -39,8 +39,8 @@ class Config:
|
|
39 |
help="Do not open in browser automatically",
|
40 |
)
|
41 |
parser.add_argument('--api', action="store_true", default=False)
|
42 |
-
parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
|
43 |
parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
|
|
|
44 |
cmd_opts = parser.parse_args()
|
45 |
|
46 |
cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
|
@@ -52,8 +52,8 @@ class Config:
|
|
52 |
cmd_opts.noparallel,
|
53 |
cmd_opts.noautoopen,
|
54 |
cmd_opts.api,
|
55 |
-
cmd_opts.share,
|
56 |
cmd_opts.files,
|
|
|
57 |
)
|
58 |
|
59 |
def device_config(self) -> tuple:
|
|
|
13 |
(
|
14 |
self.python_cmd,
|
15 |
self.listen_port,
|
16 |
+
self.colab,
|
17 |
self.noparallel,
|
18 |
self.noautoopen,
|
19 |
self.api,
|
20 |
+
self.files,
|
21 |
+
self.json
|
22 |
) = self.arg_parse()
|
23 |
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
24 |
|
|
|
39 |
help="Do not open in browser automatically",
|
40 |
)
|
41 |
parser.add_argument('--api', action="store_true", default=False)
|
|
|
42 |
parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
|
43 |
+
parser.add_argument("--json", action="store_true", default=False, help="use model_info.json")
|
44 |
cmd_opts = parser.parse_args()
|
45 |
|
46 |
cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
|
|
|
52 |
cmd_opts.noparallel,
|
53 |
cmd_opts.noautoopen,
|
54 |
cmd_opts.api,
|
|
|
55 |
cmd_opts.files,
|
56 |
+
cmd_opts.json
|
57 |
)
|
58 |
|
59 |
def device_config(self) -> tuple:
|
requirements-full.txt
CHANGED
@@ -44,6 +44,5 @@ audioread
|
|
44 |
uvicorn>=0.21.1
|
45 |
colorama>=0.4.6
|
46 |
edge-tts
|
47 |
-
demucs
|
48 |
yt_dlp
|
49 |
ffmpeg
|
|
|
44 |
uvicorn>=0.21.1
|
45 |
colorama>=0.4.6
|
46 |
edge-tts
|
|
|
47 |
yt_dlp
|
48 |
ffmpeg
|
weights/model_info.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"nilou-jp": {
|
3 |
+
"enable": true,
|
4 |
+
"name": "nilou-jp",
|
5 |
+
"title": "Genshin Impact - Nilou",
|
6 |
+
"cover": "cover.png",
|
7 |
+
"feature_retrieval_library": "added_IVF218_Flat_nprobe_5.index",
|
8 |
+
"author":"ArkanDash"
|
9 |
+
}
|
10 |
+
}
|