Spaces:
Running
Running
Add more description
Browse files
app.py
CHANGED
@@ -6,7 +6,18 @@ import soundfile as sf
|
|
6 |
from s2st_inference import s2st_inference
|
7 |
from utils import download_model
|
8 |
|
9 |
-
DESCRIPTION = r"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
SAMPLE_RATE = 16000
|
12 |
MAX_INPUT_LENGTH = 60 # seconds
|
|
|
6 |
from s2st_inference import s2st_inference
|
7 |
from utils import download_model
|
8 |
|
9 |
+
DESCRIPTION = r"""
|
10 |
+
**Speech-to-Speech Translation from Spanish to English**
|
11 |
+
|
12 |
+
- Paper: Direct Speech-to-Speech Translation With Discrete Units
|
13 |
+
- Dataset: CVSS-C
|
14 |
+
- Toolkit: [ESPnet](https://github.com/espnet/espnet)
|
15 |
+
- Pretrained Speech-to-Unit translation model: https://huggingface.co/espnet/jiyang_tang_cvss-c_es-en_discrete_unit
|
16 |
+
- Pretrained WaveGAN vocoder: https://huggingface.co/espnet/cvss-c_en_wavegan_hubert_vocoder
|
17 |
+
|
18 |
+
Part of a CMU MIIS capstone project with [@realzza](https://github.com/realzza)
|
19 |
+
and [@sophia1488](https://github.com/sophia1488)
|
20 |
+
"""
|
21 |
|
22 |
SAMPLE_RATE = 16000
|
23 |
MAX_INPUT_LENGTH = 60 # seconds
|