Spaces:
Sleeping
Sleeping
unilight
commited on
Commit
·
fe970c6
1
Parent(s):
d2ddbfd
description and refactor
Browse files
app.py
CHANGED
@@ -9,13 +9,6 @@ from huggingface_hub import hf_hub_download
|
|
9 |
import torch
|
10 |
import yaml
|
11 |
|
12 |
-
# from s3prl_vc.upstream.interface import get_upstream
|
13 |
-
# from s3prl.nn import Featurizer
|
14 |
-
# import s3prl_vc.models
|
15 |
-
# from s3prl_vc.utils import read_hdf5
|
16 |
-
# from s3prl_vc.vocoder import Vocoder
|
17 |
-
|
18 |
-
|
19 |
# ---------- Settings ----------
|
20 |
GPU_ID = '-1'
|
21 |
os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
|
@@ -34,13 +27,6 @@ MIN_REQUIRED_WAV_LENGTH = 1040
|
|
34 |
# jp_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "jp", '*.wav')))
|
35 |
# zh_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "zh", '*.wav')))
|
36 |
|
37 |
-
# TRGSPKS = ["TEF1", "TEF2", "TEM1", "TEM2"]
|
38 |
-
|
39 |
-
# ref_samples = {
|
40 |
-
# trgspk: sorted(glob(os.path.join("./ref_samples", trgspk, '*.wav')))
|
41 |
-
# for trgspk in TRGSPKS
|
42 |
-
# }
|
43 |
-
|
44 |
# ---------- Logging ----------
|
45 |
logger.add('app.log', mode='a')
|
46 |
logger.info('============================= App restarted =============================')
|
@@ -131,13 +117,8 @@ with gr.Blocks(title="S3PRL-VC: Any-to-one voice conversion demo on VCC2020") as
|
|
131 |
# Demo for SHEET: Speech Human Evaluation Estimation Toolkit
|
132 |
### [Paper (To be uploaded)] [[Code]](https://github.com/unilight/sheet)
|
133 |
**SHEET** is a subjective speech quality assessment (SSQA) toolkit designed to conduct SSQA research. It was specifically designed to interactive with MOS-Bench, a collective of datasets to benchmark SSQA models.
|
134 |
-
|
135 |
|
136 |
-
In this demo,
|
137 |
-
|
138 |
-
**S3PRL-VC** is a voice conversion (VC) toolkit for benchmarking self-supervised speech representations (S3Rs). The term **any-to-one** means that the system can convert from any unseen speaker to a pre-defined speaker given in training.
|
139 |
-
In this demo, you can record your voice, and the model will convert your voice to one of the four pre-defined speakers. These four speakers come from the **voice conversion challenge (VCC) 2020**. You can listen to the samples to get a sense of what these speakers sound like.
|
140 |
-
The **RTF** of the system is around **1.5~2.5**, i.e. if you recorded a 5 second long audio, it will take 5 * (1.5~2.5) = 7.5~12.5 seconds to generate the output.
|
141 |
"""
|
142 |
)
|
143 |
|
@@ -175,9 +156,7 @@ with gr.Blocks(title="S3PRL-VC: Any-to-one voice conversion demo on VCC2020") as
|
|
175 |
|
176 |
if __name__ == '__main__':
|
177 |
try:
|
178 |
-
demo.launch(debug=True
|
179 |
-
# enable_queue=True,
|
180 |
-
)
|
181 |
except KeyboardInterrupt as e:
|
182 |
print(e)
|
183 |
|
|
|
9 |
import torch
|
10 |
import yaml
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# ---------- Settings ----------
|
13 |
GPU_ID = '-1'
|
14 |
os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
|
|
|
27 |
# jp_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "jp", '*.wav')))
|
28 |
# zh_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "zh", '*.wav')))
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
# ---------- Logging ----------
|
31 |
logger.add('app.log', mode='a')
|
32 |
logger.info('============================= App restarted =============================')
|
|
|
117 |
# Demo for SHEET: Speech Human Evaluation Estimation Toolkit
|
118 |
### [Paper (To be uploaded)] [[Code]](https://github.com/unilight/sheet)
|
119 |
**SHEET** is a subjective speech quality assessment (SSQA) toolkit designed to conduct SSQA research. It was specifically designed to interactive with MOS-Bench, a collective of datasets to benchmark SSQA models.
|
|
|
120 |
|
121 |
+
In this demo, you can record your own voice or upload speech files to assess the quality.
|
|
|
|
|
|
|
|
|
122 |
"""
|
123 |
)
|
124 |
|
|
|
156 |
|
157 |
if __name__ == '__main__':
|
158 |
try:
|
159 |
+
demo.launch(debug=True)
|
|
|
|
|
160 |
except KeyboardInterrupt as e:
|
161 |
print(e)
|
162 |
|