Spaces:

unilight
/

sheet-demo

Sleeping

App Files Files Community

unilight commited on Oct 3, 2024

Commit

fe970c6

1 Parent(s): d2ddbfd

description and refactor

Browse files

Files changed (1) hide show

app.py +2 -23

app.py CHANGED Viewed

@@ -9,13 +9,6 @@ from huggingface_hub import hf_hub_download
 import torch
 import yaml
-# from s3prl_vc.upstream.interface import get_upstream
-# from s3prl.nn import Featurizer
-# import s3prl_vc.models
-# from s3prl_vc.utils import read_hdf5
-# from s3prl_vc.vocoder import Vocoder
 # ---------- Settings ----------
 GPU_ID = '-1'
 os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
@@ -34,13 +27,6 @@ MIN_REQUIRED_WAV_LENGTH = 1040
 # jp_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "jp", '*.wav')))
 # zh_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "zh", '*.wav')))
-# TRGSPKS = ["TEF1", "TEF2", "TEM1", "TEM2"]
-# ref_samples = {
-    # trgspk: sorted(glob(os.path.join("./ref_samples", trgspk, '*.wav')))
-    # for trgspk in TRGSPKS
-# }
 # ---------- Logging ----------
 logger.add('app.log', mode='a')
 logger.info('============================= App restarted =============================')
@@ -131,13 +117,8 @@ with gr.Blocks(title="S3PRL-VC: Any-to-one voice conversion demo on VCC2020") as
         # Demo for SHEET: Speech Human Evaluation Estimation Toolkit
         ### [Paper (To be uploaded)] [[Code]](https://github.com/unilight/sheet)
         **SHEET** is a subjective speech quality assessment (SSQA) toolkit designed to conduct SSQA research. It was specifically designed to interactive with MOS-Bench, a collective of datasets to benchmark SSQA models.
-        In this demo, we provide interactive models
-        **S3PRL-VC** is a voice conversion (VC) toolkit for benchmarking self-supervised speech representations (S3Rs). The term **any-to-one** means that the system can convert from any unseen speaker to a pre-defined speaker given in training.
-        In this demo, you can record your voice, and the model will convert your voice to one of the four pre-defined speakers. These four speakers come from the **voice conversion challenge (VCC) 2020**. You can listen to the samples to get a sense of what these speakers sound like.
-        The **RTF** of the system is around **1.5~2.5**, i.e. if you recorded a 5 second long audio, it will take 5 * (1.5~2.5) = 7.5~12.5 seconds to generate the output.
         """
     )
@@ -175,9 +156,7 @@ with gr.Blocks(title="S3PRL-VC: Any-to-one voice conversion demo on VCC2020") as
 if __name__ == '__main__':
     try:
-        demo.launch(debug=True,
-                    #  enable_queue=True,
-                     )
     except KeyboardInterrupt as e:
         print(e)

 import torch
 import yaml
 # ---------- Settings ----------
 GPU_ID = '-1'
 os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
 # jp_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "jp", '*.wav')))
 # zh_examples = sorted(glob(os.path.join(EXAMPLE_DIR, "zh", '*.wav')))
 # ---------- Logging ----------
 logger.add('app.log', mode='a')
 logger.info('============================= App restarted =============================')
         # Demo for SHEET: Speech Human Evaluation Estimation Toolkit
         ### [Paper (To be uploaded)] [[Code]](https://github.com/unilight/sheet)
         **SHEET** is a subjective speech quality assessment (SSQA) toolkit designed to conduct SSQA research. It was specifically designed to interactive with MOS-Bench, a collective of datasets to benchmark SSQA models.
+        In this demo, you can record your own voice or upload speech files to assess the quality.
         """
     )
 if __name__ == '__main__':
     try:
+        demo.launch(debug=True)
     except KeyboardInterrupt as e:
         print(e)