|
--- |
|
tags: |
|
- espnet |
|
- audio |
|
- speech-translation |
|
language: |
|
- en |
|
- es |
|
datasets: |
|
- cvss |
|
license: cc-by-4.0 |
|
--- |
|
|
|
## ESPnet2 S2ST model |
|
|
|
Vocoder is located [here](https://huggingface.co/espnet/cvss-c_en_wavegan_hubert_vocoder), trained by [realzza](https://github.com/realzza) |
|
|
|
### `espnet/jiyang_tang_cvss-c_es-en_discrete_unit` |
|
|
|
This model was trained by Jiyang Tang using cvss recipe in [espnet](https://github.com/espnet/espnet/). |
|
|
|
### Demo: How to use in ESPnet2 |
|
|
|
Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html) |
|
if you haven't done that already. |
|
|
|
```bash |
|
cd espnet |
|
git checkout c002f05ab3ef82938b6a980806cd7f97baba2299 |
|
pip install -e . |
|
cd egs2/cvss/s2st1 |
|
./run.sh --skip_data_prep false --skip_train true --download_model espnet/jiyang_tang_cvss-c_es-en_discrete_unit |
|
``` |
|
|
|
<!-- Generated by scripts/utils/show_translation_result.sh --> |
|
# RESULTS |
|
## Environments |
|
- date: `Wed Oct 4 22:20:55 EDT 2023` |
|
- python version: `3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]` |
|
- espnet version: `espnet 202308` |
|
- pytorch version: `pytorch 1.13.1` |
|
- Git hash: `79a3b3e2d9c9105f0f3f6d92d282e17f9ca91ed0` |
|
- Commit date: `Mon Sep 25 16:39:40 2023 -0400` |
|
|
|
## S2ST config |
|
|
|
<details><summary>expand</summary> |
|
|
|
``` |
|
config: conf/train_s2st_discrete_unit.yaml |
|
print_config: false |
|
log_level: INFO |
|
drop_last_iter: false |
|
dry_run: false |
|
iterator_type: sequence |
|
valid_iterator_type: null |
|
output_dir: exp/s2st_train_s2st_discrete_unit_raw_fbank_es_en |
|
ngpu: 1 |
|
seed: 0 |
|
num_workers: 2 |
|
num_att_plot: 0 |
|
dist_backend: nccl |
|
dist_init_method: env:// |
|
dist_world_size: 2 |
|
dist_rank: 0 |
|
local_rank: 0 |
|
dist_master_addr: localhost |
|
dist_master_port: 56635 |
|
dist_launcher: null |
|
multiprocessing_distributed: true |
|
unused_parameters: false |
|
sharded_ddp: false |
|
cudnn_enabled: true |
|
cudnn_benchmark: false |
|
cudnn_deterministic: true |
|
collect_stats: false |
|
write_collected_feats: false |
|
max_epoch: 500 |
|
patience: null |
|
val_scheduler_criterion: |
|
- valid |
|
- loss |
|
early_stopping_criterion: |
|
- valid |
|
- loss |
|
- min |
|
best_model_criterion: |
|
- - valid |
|
- loss |
|
- min |
|
- - train |
|
- loss |
|
- min |
|
keep_nbest_models: 5 |
|
nbest_averaging_interval: 0 |
|
grad_clip: 1.0 |
|
grad_clip_type: 2.0 |
|
grad_noise: false |
|
accum_grad: 4 |
|
no_forward_run: false |
|
resume: true |
|
train_dtype: float32 |
|
use_amp: false |
|
log_interval: null |
|
use_matplotlib: true |
|
use_tensorboard: true |
|
create_graph_in_tensorboard: false |
|
use_wandb: false |
|
wandb_project: null |
|
wandb_id: null |
|
wandb_entity: null |
|
wandb_name: null |
|
wandb_model_log_interval: -1 |
|
detect_anomaly: false |
|
pretrain_path: null |
|
init_param: [] |
|
ignore_init_mismatch: false |
|
freeze_param: [] |
|
num_iters_per_epoch: null |
|
batch_size: 110 |
|
valid_batch_size: null |
|
batch_bins: 1000000 |
|
valid_batch_bins: null |
|
train_shape_file: |
|
- exp/s2st_stats_raw_es_en/train/tgt_speech_shape |
|
- exp/s2st_stats_raw_es_en/train/src_speech_shape |
|
- exp/s2st_stats_raw_es_en/train/src_text_shape.char |
|
- exp/s2st_stats_raw_es_en/train/tgt_text_shape.char |
|
valid_shape_file: |
|
- exp/s2st_stats_raw_es_en/valid/src_speech_shape |
|
- exp/s2st_stats_raw_es_en/valid/tgt_speech_shape |
|
- exp/s2st_stats_raw_es_en/valid/tgt_text_shape.char |
|
- exp/s2st_stats_raw_es_en/valid/src_text_shape.char |
|
batch_type: sorted |
|
valid_batch_type: null |
|
fold_length: |
|
- 800 |
|
- 150 |
|
- 150 |
|
- 150 |
|
sort_in_batch: descending |
|
shuffle_within_batch: false |
|
sort_batch: descending |
|
multiple_iterator: false |
|
chunk_length: 500 |
|
chunk_shift_ratio: 0.5 |
|
num_cache_chunks: 1024 |
|
chunk_excluded_key_prefixes: [] |
|
train_data_path_and_name_and_type: |
|
- - dump/raw/train_es/text.km.hubert_layer6_500.en.unique |
|
- tgt_speech |
|
- text |
|
- - dump/raw/train_es/wav.scp.es |
|
- src_speech |
|
- sound |
|
- - dump/raw/train_es/text.es |
|
- src_text |
|
- text |
|
- - dump/raw/train_es/text.en |
|
- tgt_text |
|
- text |
|
valid_data_path_and_name_and_type: |
|
- - dump/raw/dev_es/wav.scp.es |
|
- src_speech |
|
- sound |
|
- - dump/raw/dev_es/text.km.hubert_layer6_500.en.unique |
|
- tgt_speech |
|
- text |
|
- - dump/raw/dev_es/text.en |
|
- tgt_text |
|
- text |
|
- - dump/raw/dev_es/text.es |
|
- src_text |
|
- text |
|
allow_variable_data_keys: false |
|
max_cache_size: 0.0 |
|
max_cache_fd: 32 |
|
valid_max_cache_size: null |
|
exclude_weight_decay: false |
|
exclude_weight_decay_conf: {} |
|
optim: adamw |
|
optim_conf: |
|
lr: 0.0005 |
|
eps: 1.0e-06 |
|
scheduler: warmuplr |
|
scheduler_conf: |
|
warmup_steps: 10000 |
|
s2st_type: discrete_unit |
|
tgt_token_list: |
|
- <blank> |
|
- <unk> |
|
- <space> |
|
- e |
|
- a |
|
- t |
|
- i |
|
- o |
|
- s |
|
- n |
|
- r |
|
- h |
|
- l |
|
- d |
|
- c |
|
- u |
|
- m |
|
- f |
|
- p |
|
- g |
|
- w |
|
- y |
|
- b |
|
- v |
|
- k |
|
- '''' |
|
- x |
|
- j |
|
- z |
|
- q |
|
- ñ |
|
- '-' |
|
- í |
|
- ó |
|
- á |
|
- é |
|
- ú |
|
- â |
|
- . |
|
- ʻ |
|
- ð |
|
- º |
|
- ə |
|
- ā |
|
- ̇ |
|
- '!' |
|
- þ |
|
- <sos/eos> |
|
src_token_list: |
|
- <blank> |
|
- <unk> |
|
- <space> |
|
- E |
|
- A |
|
- O |
|
- S |
|
- N |
|
- R |
|
- I |
|
- L |
|
- D |
|
- T |
|
- C |
|
- U |
|
- M |
|
- P |
|
- . |
|
- B |
|
- G |
|
- V |
|
- F |
|
- H |
|
- Y |
|
- ',' |
|
- ó |
|
- '"' |
|
- Q |
|
- í |
|
- J |
|
- á |
|
- Z |
|
- ñ |
|
- X |
|
- ú |
|
- K |
|
- '!' |
|
- '?' |
|
- W |
|
- é |
|
- ':' |
|
- '-' |
|
- ¿ |
|
- ¡ |
|
- Á |
|
- '''' |
|
- ; |
|
- ü |
|
- ’ |
|
- — |
|
- É |
|
- ö |
|
- ã |
|
- Ó |
|
- ‘ |
|
- ō |
|
- “ |
|
- â |
|
- – |
|
- Ú |
|
- ë |
|
- ä |
|
- _ |
|
- ā |
|
- ´ |
|
- ū |
|
- ― |
|
- ¨ |
|
- ø |
|
- ô |
|
- ê |
|
- æ |
|
- Í |
|
- ì |
|
- ć |
|
- Ñ |
|
- č |
|
- е |
|
- À |
|
- à |
|
- '`' |
|
- ゴ |
|
- ː |
|
- '|' |
|
- Ş |
|
- ‹ |
|
- › |
|
- Š |
|
- Č |
|
- ё |
|
- š |
|
- ï |
|
- … |
|
- ß |
|
- ř |
|
- ă |
|
- ʻ |
|
- ý |
|
- ° |
|
- ė |
|
- ò |
|
- ミ |
|
- 箱 |
|
- 消 |
|
- し |
|
- ム |
|
- ś |
|
- „ |
|
- Ś |
|
- ə |
|
- 鮨 |
|
- 鮓 |
|
- ł |
|
- Ö |
|
- û |
|
- · |
|
- Ä |
|
- ń |
|
- « |
|
- » |
|
- ذ |
|
- ه |
|
- ب |
|
- ي |
|
- ة |
|
- ṃ |
|
- ě |
|
- ‧ |
|
- İ |
|
- ı |
|
- Ø |
|
- î |
|
- ī |
|
- ț |
|
- Æ |
|
- Þ |
|
- Ϙ |
|
- ª |
|
- の |
|
- Е |
|
- ð |
|
- '=' |
|
- Ð |
|
- '&' |
|
- ž |
|
- ” |
|
- œ |
|
- <sos/eos> |
|
unit_token_list: |
|
- '2' |
|
- '179' |
|
- '408' |
|
- '66' |
|
- '135' |
|
- '442' |
|
- '7' |
|
- '130' |
|
- '106' |
|
- '112' |
|
- '195' |
|
- '363' |
|
- '278' |
|
- '249' |
|
- '280' |
|
- '243' |
|
- '279' |
|
- '197' |
|
- '16' |
|
- '270' |
|
- '483' |
|
- '212' |
|
- '437' |
|
- '313' |
|
- '429' |
|
- '110' |
|
- '19' |
|
- '142' |
|
- '152' |
|
- '175' |
|
- '84' |
|
- '34' |
|
- '359' |
|
- '14' |
|
- '269' |
|
- '267' |
|
- '41' |
|
- '60' |
|
- '190' |
|
- '450' |
|
- '180' |
|
- '171' |
|
- '209' |
|
- '348' |
|
- '55' |
|
- '383' |
|
- '56' |
|
- '158' |
|
- '17' |
|
- '200' |
|
- '53' |
|
- '35' |
|
- '390' |
|
- '122' |
|
- '255' |
|
- '491' |
|
- '452' |
|
- '471' |
|
- '420' |
|
- '306' |
|
- '11' |
|
- '54' |
|
- '9' |
|
- '26' |
|
- '29' |
|
- '454' |
|
- '104' |
|
- '107' |
|
- '30' |
|
- '147' |
|
- '257' |
|
- '448' |
|
- '51' |
|
- '232' |
|
- '74' |
|
- '43' |
|
- '294' |
|
- '151' |
|
- '146' |
|
- '226' |
|
- '45' |
|
- '461' |
|
- '63' |
|
- '369' |
|
- '244' |
|
- '4' |
|
- '76' |
|
- '131' |
|
- '27' |
|
- '327' |
|
- '177' |
|
- '204' |
|
- '139' |
|
- '358' |
|
- '6' |
|
- '284' |
|
- '310' |
|
- '415' |
|
- '182' |
|
- '407' |
|
- '326' |
|
- '319' |
|
- '231' |
|
- '88' |
|
- '476' |
|
- '109' |
|
- '166' |
|
- '417' |
|
- '456' |
|
- '105' |
|
- '354' |
|
- '0' |
|
- '318' |
|
- '336' |
|
- '314' |
|
- '159' |
|
- '281' |
|
- '413' |
|
- '95' |
|
- '73' |
|
- '296' |
|
- '422' |
|
- '432' |
|
- '39' |
|
- '431' |
|
- '36' |
|
- '447' |
|
- '468' |
|
- '427' |
|
- '378' |
|
- '248' |
|
- '322' |
|
- '47' |
|
- '220' |
|
- '82' |
|
- '181' |
|
- '391' |
|
- '494' |
|
- '344' |
|
- '435' |
|
- '178' |
|
- '61' |
|
- '129' |
|
- '114' |
|
- '302' |
|
- '392' |
|
- '150' |
|
- '223' |
|
- '79' |
|
- '438' |
|
- '262' |
|
- '371' |
|
- '203' |
|
- '239' |
|
- '488' |
|
- '247' |
|
- '283' |
|
- '416' |
|
- '68' |
|
- '395' |
|
- '184' |
|
- '474' |
|
- '141' |
|
- '89' |
|
- '342' |
|
- '13' |
|
- '298' |
|
- '125' |
|
- '191' |
|
- '165' |
|
- '24' |
|
- '441' |
|
- '227' |
|
- '196' |
|
- '258' |
|
- '133' |
|
- '168' |
|
- '64' |
|
- '123' |
|
- '400' |
|
- '81' |
|
- '217' |
|
- '253' |
|
- '132' |
|
- '285' |
|
- '28' |
|
- '188' |
|
- '375' |
|
- '213' |
|
- '242' |
|
- '236' |
|
- '453' |
|
- '225' |
|
- '164' |
|
- '261' |
|
- '374' |
|
- '272' |
|
- '325' |
|
- '495' |
|
- '460' |
|
- '330' |
|
- '48' |
|
- '451' |
|
- '323' |
|
- '458' |
|
- '263' |
|
- '144' |
|
- '160' |
|
- '149' |
|
- '406' |
|
- '77' |
|
- '33' |
|
- '368' |
|
- '332' |
|
- '205' |
|
- '50' |
|
- '290' |
|
- '401' |
|
- '490' |
|
- '331' |
|
- '436' |
|
- '5' |
|
- '186' |
|
- '288' |
|
- '148' |
|
- '219' |
|
- '215' |
|
- '93' |
|
- '434' |
|
- '103' |
|
- '489' |
|
- '21' |
|
- '92' |
|
- '386' |
|
- '97' |
|
- '328' |
|
- '121' |
|
- '301' |
|
- '46' |
|
- '224' |
|
- '154' |
|
- '80' |
|
- '352' |
|
- '96' |
|
- '124' |
|
- '69' |
|
- '457' |
|
- '83' |
|
- '52' |
|
- '85' |
|
- '62' |
|
- '380' |
|
- '410' |
|
- '167' |
|
- '333' |
|
- '31' |
|
- '315' |
|
- '78' |
|
- '271' |
|
- '10' |
|
- '492' |
|
- '49' |
|
- '208' |
|
- '295' |
|
- '86' |
|
- '199' |
|
- '445' |
|
- '140' |
|
- '357' |
|
- '187' |
|
- '161' |
|
- '238' |
|
- '351' |
|
- '155' |
|
- '193' |
|
- '345' |
|
- '486' |
|
- '37' |
|
- '266' |
|
- '185' |
|
- '143' |
|
- '361' |
|
- '174' |
|
- '430' |
|
- '349' |
|
- '23' |
|
- '423' |
|
- '388' |
|
- '309' |
|
- '470' |
|
- '169' |
|
- '370' |
|
- '463' |
|
- '245' |
|
- '320' |
|
- '237' |
|
- '316' |
|
- '277' |
|
- '482' |
|
- '218' |
|
- '198' |
|
- '117' |
|
- '428' |
|
- '340' |
|
- '475' |
|
- '418' |
|
- '275' |
|
- '299' |
|
- '297' |
|
- '90' |
|
- '260' |
|
- '276' |
|
- '137' |
|
- '366' |
|
- '353' |
|
- '341' |
|
- '241' |
|
- '496' |
|
- '228' |
|
- '287' |
|
- '214' |
|
- '264' |
|
- '108' |
|
- '44' |
|
- '350' |
|
- '3' |
|
- '286' |
|
- '303' |
|
- '12' |
|
- '372' |
|
- '156' |
|
- '321' |
|
- '116' |
|
- '385' |
|
- '194' |
|
- '360' |
|
- '119' |
|
- '145' |
|
- '22' |
|
- '414' |
|
- '462' |
|
- '70' |
|
- '449' |
|
- '251' |
|
- '387' |
|
- '466' |
|
- '273' |
|
- '440' |
|
- '58' |
|
- '304' |
|
- '162' |
|
- '404' |
|
- '15' |
|
- '176' |
|
- '384' |
|
- '293' |
|
- '397' |
|
- '173' |
|
- '59' |
|
- '485' |
|
- '75' |
|
- '102' |
|
- '282' |
|
- '233' |
|
- '115' |
|
- '210' |
|
- '222' |
|
- '18' |
|
- '498' |
|
- '99' |
|
- '398' |
|
- '91' |
|
- '221' |
|
- '396' |
|
- '479' |
|
- '300' |
|
- '339' |
|
- '367' |
|
- '459' |
|
- '20' |
|
- '216' |
|
- '426' |
|
- '87' |
|
- '382' |
|
- '424' |
|
- '446' |
|
- '1' |
|
- '265' |
|
- '172' |
|
- '346' |
|
- '259' |
|
- '183' |
|
- '113' |
|
- '307' |
|
- '311' |
|
- '201' |
|
- '170' |
|
- '240' |
|
- '25' |
|
- '291' |
|
- '393' |
|
- '444' |
|
- '292' |
|
- '334' |
|
- '234' |
|
- '379' |
|
- '153' |
|
- '42' |
|
- '250' |
|
- '409' |
|
- '464' |
|
- '394' |
|
- '256' |
|
- '399' |
|
- '465' |
|
- '381' |
|
- '364' |
|
- '157' |
|
- '356' |
|
- '268' |
|
- '65' |
|
- '343' |
|
- '338' |
|
- '493' |
|
- '100' |
|
- '405' |
|
- '421' |
|
- '111' |
|
- '289' |
|
- '365' |
|
- '246' |
|
- '8' |
|
- '101' |
|
- '163' |
|
- '252' |
|
- '138' |
|
- '72' |
|
- '373' |
|
- '362' |
|
- '120' |
|
- '425' |
|
- '480' |
|
- '32' |
|
- '254' |
|
- '202' |
|
- '484' |
|
- '412' |
|
- '473' |
|
- '71' |
|
- '355' |
|
- '443' |
|
- '134' |
|
- '324' |
|
- '118' |
|
- '402' |
|
- '230' |
|
- '67' |
|
- '98' |
|
- '335' |
|
- '317' |
|
- '57' |
|
- '329' |
|
- '229' |
|
- '419' |
|
- '94' |
|
- '128' |
|
- '376' |
|
- '433' |
|
- '192' |
|
- '235' |
|
- '38' |
|
- '312' |
|
- '347' |
|
- '499' |
|
- '274' |
|
- '389' |
|
- '127' |
|
- '439' |
|
- '207' |
|
- '478' |
|
- '403' |
|
- '467' |
|
- '411' |
|
- '455' |
|
- '337' |
|
- '469' |
|
- '206' |
|
- '497' |
|
- '136' |
|
- '481' |
|
- '487' |
|
- '40' |
|
- '477' |
|
- '472' |
|
- '189' |
|
- '308' |
|
- '377' |
|
- '305' |
|
- '211' |
|
- '126' |
|
- <unk> |
|
- <sos/eos> |
|
odim: null |
|
init: null |
|
input_size: null |
|
output_size: 500 |
|
asr_ctc: true |
|
st_ctc: true |
|
asr_ctc_conf: |
|
dropout_rate: 0.0 |
|
ctc_type: builtin |
|
reduce: true |
|
ignore_nan_grad: null |
|
zero_infinity: true |
|
st_ctc_conf: |
|
dropout_rate: 0.0 |
|
ctc_type: builtin |
|
reduce: true |
|
ignore_nan_grad: null |
|
zero_infinity: true |
|
model_conf: |
|
ignore_id: -1 |
|
report_cer: true |
|
report_wer: true |
|
report_bleu: true |
|
sym_space: <space> |
|
sym_blank: <blank> |
|
extract_feats_in_collect_stats: true |
|
use_preprocessor: true |
|
tgt_token_type: char |
|
src_token_type: char |
|
tgt_bpemodel: null |
|
src_bpemodel: null |
|
non_linguistic_symbols: null |
|
cleaner: null |
|
tgt_g2p: null |
|
src_g2p: null |
|
losses: |
|
- name: asr_ctc |
|
type: ctc |
|
conf: |
|
weight: 1.6 |
|
- name: src_attn |
|
type: attention |
|
conf: |
|
weight: 8.0 |
|
smoothing: 0.2 |
|
padding_idx: -1 |
|
- name: tgt_attn |
|
type: attention |
|
conf: |
|
weight: 8.0 |
|
smoothing: 0.2 |
|
padding_idx: -1 |
|
- name: st_ctc |
|
type: ctc |
|
conf: |
|
weight: 1.6 |
|
- name: synthesis |
|
type: attention |
|
conf: |
|
weight: 1.6 |
|
smoothing: 0.2 |
|
padding_idx: -1 |
|
speech_volume_normalize: null |
|
rir_scp: null |
|
rir_apply_prob: 1.0 |
|
noise_scp: null |
|
noise_apply_prob: 1.0 |
|
noise_db_range: '13_15' |
|
short_noise_thres: 0.5 |
|
frontend: default |
|
frontend_conf: |
|
n_fft: 512 |
|
win_length: 400 |
|
hop_length: 160 |
|
fs: 16k |
|
tgt_feats_extract: null |
|
tgt_feats_extract_conf: {} |
|
specaug: specaug |
|
specaug_conf: |
|
apply_time_warp: true |
|
time_warp_window: 5 |
|
time_warp_mode: bicubic |
|
apply_freq_mask: true |
|
freq_mask_width_range: |
|
- 0 |
|
- 27 |
|
num_freq_mask: 2 |
|
apply_time_mask: true |
|
time_mask_width_ratio_range: |
|
- 0.0 |
|
- 0.05 |
|
num_time_mask: 10 |
|
src_normalize: global_mvn |
|
src_normalize_conf: |
|
stats_file: exp/s2st_stats_raw_es_en/train/src_feats_stats.npz |
|
tgt_normalize: utterance_mvn |
|
tgt_normalize_conf: {} |
|
preencoder: null |
|
preencoder_conf: {} |
|
encoder: transformer |
|
encoder_conf: |
|
input_layer: conv2d |
|
num_blocks: 12 |
|
linear_units: 2048 |
|
dropout_rate: 0.1 |
|
output_size: 256 |
|
attention_heads: 4 |
|
attention_dropout_rate: 0.0 |
|
normalize_before: true |
|
postencoder: null |
|
postencoder_conf: {} |
|
asr_decoder: transformer |
|
asr_decoder_conf: |
|
input_layer: embed |
|
num_blocks: 2 |
|
linear_units: 2048 |
|
attention_heads: 4 |
|
st_decoder: transformer |
|
st_decoder_conf: |
|
input_layer: embed |
|
num_blocks: 2 |
|
linear_units: 2048 |
|
attention_heads: 4 |
|
aux_attention: null |
|
aux_attention_conf: {} |
|
unit_encoder: null |
|
unit_encoder_conf: {} |
|
synthesizer: discrete_unit |
|
synthesizer_conf: |
|
input_layer: embed |
|
num_blocks: 6 |
|
linear_units: 2048 |
|
attention_heads: 8 |
|
loss: tacotron |
|
loss_conf: {} |
|
required: |
|
- output_dir |
|
version: '202308' |
|
distributed: true |
|
``` |
|
|
|
</details> |
|
|
|
|
|
|
|
### Citing ESPnet |
|
|
|
```BibTex |
|
@inproceedings{watanabe2018espnet, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
title={{ESPnet}: End-to-End Speech Processing Toolkit}, |
|
year={2018}, |
|
booktitle={Proceedings of Interspeech}, |
|
pages={2207--2211}, |
|
doi={10.21437/Interspeech.2018-1456}, |
|
url={http://dx.doi.org/10.21437/Interspeech.2018-1456} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
or arXiv: |
|
|
|
```bibtex |
|
@misc{watanabe2018espnet, |
|
title={ESPnet: End-to-End Speech Processing Toolkit}, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
year={2018}, |
|
eprint={1804.00015}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
``` |