unilight commited on
Commit
fe970c6
·
1 Parent(s): d2ddbfd

description and refactor

Browse files
Files changed (1) hide show
  1. app.py +2 -23
app.py CHANGED
@@ -9,13 +9,6 @@ from huggingface_hub import hf_hub_download
9
  import torch
10
  import yaml
11
 
12
- # from s3prl_vc.upstream.interface import get_upstream
13
- # from s3prl.nn import Featurizer
14
- # import s3prl_vc.models
15
- # from s3prl_vc.utils import read_hdf5
16
- # from s3prl_vc.vocoder import Vocoder
17
-
18
-
19
  # ---------- Settings ----------
20
  GPU_ID = '-1'
21
  os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
@@ -34,13 +27,6 @@ MIN_REQUIRED_WAV_LENGTH = 1040
34
  # jp_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "jp", '*.wav')))
35
  # zh_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "zh", '*.wav')))
36
 
37
- # TRGSPKS = ["TEF1", "TEF2", "TEM1", "TEM2"]
38
-
39
- # ref_samples = {
40
- # trgspk: sorted(glob(os.path.join("./ref_samples", trgspk, '*.wav')))
41
- # for trgspk in TRGSPKS
42
- # }
43
-
44
  # ---------- Logging ----------
45
  logger.add('app.log', mode='a')
46
  logger.info('============================= App restarted =============================')
@@ -131,13 +117,8 @@ with gr.Blocks(title="S3PRL-VC: Any-to-one voice conversion demo on VCC2020") as
131
  # Demo for SHEET: Speech Human Evaluation Estimation Toolkit
132
  ### [Paper (To be uploaded)] [[Code]](https://github.com/unilight/sheet)
133
  **SHEET** is a subjective speech quality assessment (SSQA) toolkit designed to conduct SSQA research. It was specifically designed to interactive with MOS-Bench, a collective of datasets to benchmark SSQA models.
134
-
135
 
136
- In this demo, we provide interactive models
137
-
138
- **S3PRL-VC** is a voice conversion (VC) toolkit for benchmarking self-supervised speech representations (S3Rs). The term **any-to-one** means that the system can convert from any unseen speaker to a pre-defined speaker given in training.
139
- In this demo, you can record your voice, and the model will convert your voice to one of the four pre-defined speakers. These four speakers come from the **voice conversion challenge (VCC) 2020**. You can listen to the samples to get a sense of what these speakers sound like.
140
- The **RTF** of the system is around **1.5~2.5**, i.e. if you recorded a 5 second long audio, it will take 5 * (1.5~2.5) = 7.5~12.5 seconds to generate the output.
141
  """
142
  )
143
 
@@ -175,9 +156,7 @@ with gr.Blocks(title="S3PRL-VC: Any-to-one voice conversion demo on VCC2020") as
175
 
176
  if __name__ == '__main__':
177
  try:
178
- demo.launch(debug=True,
179
- # enable_queue=True,
180
- )
181
  except KeyboardInterrupt as e:
182
  print(e)
183
 
 
9
  import torch
10
  import yaml
11
 
 
 
 
 
 
 
 
12
  # ---------- Settings ----------
13
  GPU_ID = '-1'
14
  os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
 
27
  # jp_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "jp", '*.wav')))
28
  # zh_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "zh", '*.wav')))
29
 
 
 
 
 
 
 
 
30
  # ---------- Logging ----------
31
  logger.add('app.log', mode='a')
32
  logger.info('============================= App restarted =============================')
 
117
  # Demo for SHEET: Speech Human Evaluation Estimation Toolkit
118
  ### [Paper (To be uploaded)] [[Code]](https://github.com/unilight/sheet)
119
  **SHEET** is a subjective speech quality assessment (SSQA) toolkit designed to conduct SSQA research. It was specifically designed to interactive with MOS-Bench, a collective of datasets to benchmark SSQA models.
 
120
 
121
+ In this demo, you can record your own voice or upload speech files to assess the quality.
 
 
 
 
122
  """
123
  )
124
 
 
156
 
157
  if __name__ == '__main__':
158
  try:
159
+ demo.launch(debug=True)
 
 
160
  except KeyboardInterrupt as e:
161
  print(e)
162