Amir Ramezani commited on
Commit
8d55a79
·
1 Parent(s): 1202a51

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ espeak-ng-data/ru_dict filter=lfs diff=lfs merge=lfs -text
37
+ espeak-ng-data/cmn_dict filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.5
2
+
3
+ # Set the working directory to /code
4
+ WORKDIR /code
5
+
6
+ # Copy the current directory contents into the container at .
7
+ COPY . .
8
+
9
+ # upgrade pip
10
+ RUN python -m pip install --upgrade pip
11
+
12
+ # Install requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
16
+ ENTRYPOINT ["uvicorn"]
17
+ CMD ["app:app", "--host", "0.0.0.0", "--port", "7860"]
18
+ EXPOSE 7860
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Tts
3
- emoji: 💻
4
- colorFrom: green
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  ---
 
1
  ---
2
  title: Tts
3
+ emoji: 👀
4
+ colorFrom: gray
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
  ---
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import sys
3
+ import os
4
+ import numpy as np
5
+ import pandas as pd
6
+ import soundfile as sf
7
+ import sherpa_onnx
8
+ from fastapi import FastAPI
9
+ from fastapi.responses import StreamingResponse
10
+ import uvicorn
11
+
12
+ ljspeech_config = sherpa_onnx.OfflineTtsConfig(model=sherpa_onnx.OfflineTtsModelConfig(vits=sherpa_onnx.OfflineTtsVitsModelConfig(model='model.onnx', lexicon='', data_dir='espeak-ng-data', tokens='tokens.txt'), num_threads=4))
13
+ if not ljspeech_config.validate():
14
+ raise ValueError("Please check your config")
15
+
16
+ ljspeech = sherpa_onnx.OfflineTts(ljspeech_config)
17
+ app = FastAPI()
18
+
19
+ @app.get("/tts", response_class=StreamingResponse)
20
+ async def do_tts(text: str):
21
+ audio = ljspeech.generate(text)
22
+ f = io.BytesIO()
23
+ sf.write(f, audio.samples, audio.sample_rate, 'PCM_24', format='WAV')
24
+ f.seek(0)
25
+ return StreamingResponse(f, media_type='audio/wav')
26
+
27
+ if __name__ == '__main__':
28
+ uvicorn.run(app, host='0.0.0.0', port=7860)
config.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "vits",
3
+ "run_name": "",
4
+ "run_description": "",
5
+ "epochs": 10000,
6
+ "batch_size": null,
7
+ "eval_batch_size": null,
8
+ "mixed_precision": false,
9
+ "scheduler_after_epoch": true,
10
+ "run_eval": true,
11
+ "test_delay_epochs": 0,
12
+ "print_eval": false,
13
+ "print_step": 25,
14
+ "tb_plot_step": 100,
15
+ "tb_model_param_stats": false,
16
+ "save_step": 10000,
17
+ "checkpoint": true,
18
+ "keep_all_best": false,
19
+ "keep_after": 10000,
20
+ "num_loader_workers": null,
21
+ "num_eval_loader_workers": 0,
22
+ "use_noise_augment": false,
23
+ "output_path": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "audio": {
27
+ "fft_size": 1024,
28
+ "win_length": 1024,
29
+ "hop_length": 256,
30
+ "frame_shift_ms": null,
31
+ "frame_length_ms": null,
32
+ "stft_pad_mode": "reflect",
33
+ "sample_rate": 22050,
34
+ "resample": false,
35
+ "preemphasis": 0.0,
36
+ "ref_level_db": 20,
37
+ "do_sound_norm": false,
38
+ "log_func": "np.log10",
39
+ "do_trim_silence": true,
40
+ "trim_db": 45,
41
+ "power": 1.5,
42
+ "griffin_lim_iters": 60,
43
+ "num_mels": 80,
44
+ "mel_fmin": 0.0,
45
+ "mel_fmax": null,
46
+ "spec_gain": 20,
47
+ "do_amp_to_db_linear": true,
48
+ "do_amp_to_db_mel": true,
49
+ "signal_norm": true,
50
+ "min_level_db": -100,
51
+ "symmetric_norm": true,
52
+ "max_norm": 4.0,
53
+ "clip_norm": true,
54
+ "stats_path": null
55
+ },
56
+ "use_phonemes": true,
57
+ "phoneme_language": "en",
58
+ "compute_input_seq_cache": false,
59
+ "text_cleaner": "phoneme_cleaners",
60
+ "phonemizer": "espeak",
61
+ "enable_eos_bos_chars": false,
62
+ "test_sentences_file": "",
63
+ "phoneme_cache_path": null,
64
+ "characters":{
65
+ "characters_class": "TTS.tts.models.vits.VitsCharacters",
66
+ "pad": "_",
67
+ "eos": "",
68
+ "bos": "",
69
+ "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
70
+ "punctuations":";:,.!?¡¿—…\"«»“” ",
71
+ "phonemes":"ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
72
+ },
73
+ "batch_group_size": 0,
74
+ "loss_masking": null,
75
+ "min_seq_len": 13,
76
+ "max_seq_len": 200,
77
+ "compute_f0": false,
78
+ "compute_linear_spec": true,
79
+ "add_blank": true,
80
+ "datasets": [
81
+ {
82
+ "name": "",
83
+ "path": "",
84
+ "meta_file_train": "",
85
+ "ununsed_speakers": null,
86
+ "meta_file_val": "",
87
+ "meta_file_attn_mask": ""
88
+ }
89
+ ],
90
+ "optimizer": "AdamW",
91
+ "optimizer_params": {
92
+ "betas": [
93
+ 0.8,
94
+ 0.99
95
+ ],
96
+ "eps": 1e-09,
97
+ "weight_decay": 0.01
98
+ },
99
+ "lr_scheduler": "",
100
+ "lr_scheduler_params": {},
101
+ "test_sentences": [
102
+ "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
103
+ "Be a voice, not an echo.",
104
+ "I'm sorry Dave. I'm afraid I can't do that.",
105
+ "This cake is great. It's so delicious and moist.",
106
+ "Prior to November 22, 1963."
107
+ ],
108
+ "use_speaker_embedding": false,
109
+ "use_d_vector_file": false,
110
+ "d_vector_dim": 0,
111
+ "model_args": {
112
+ "num_chars": 179,
113
+ "out_channels": 513,
114
+ "spec_segment_size": 32,
115
+ "hidden_channels": 192,
116
+ "hidden_channels_ffn_text_encoder": 768,
117
+ "num_heads_text_encoder": 2,
118
+ "num_layers_text_encoder": 6,
119
+ "kernel_size_text_encoder": 3,
120
+ "dropout_p_text_encoder": 0.1,
121
+ "kernel_size_posterior_encoder": 5,
122
+ "dilation_rate_posterior_encoder": 1,
123
+ "num_layers_posterior_encoder": 16,
124
+ "kernel_size_flow": 5,
125
+ "dilation_rate_flow": 1,
126
+ "num_layers_flow": 4,
127
+ "resblock_type_decoder": "1",
128
+ "resblock_kernel_sizes_decoder": [
129
+ 3,
130
+ 7,
131
+ 11
132
+ ],
133
+ "resblock_dilation_sizes_decoder": [
134
+ [
135
+ 1,
136
+ 3,
137
+ 5
138
+ ],
139
+ [
140
+ 1,
141
+ 3,
142
+ 5
143
+ ],
144
+ [
145
+ 1,
146
+ 3,
147
+ 5
148
+ ]
149
+ ],
150
+ "upsample_rates_decoder": [
151
+ 8,
152
+ 8,
153
+ 2,
154
+ 2
155
+ ],
156
+ "upsample_initial_channel_decoder": 512,
157
+ "upsample_kernel_sizes_decoder": [
158
+ 16,
159
+ 16,
160
+ 4,
161
+ 4
162
+ ],
163
+ "use_sdp": true,
164
+ "noise_scale": 1.0,
165
+ "inference_noise_scale": 0.667,
166
+ "length_scale": 1,
167
+ "noise_scale_dp": 1.0,
168
+ "inference_noise_scale_dp": 0.8,
169
+ "max_inference_len": null,
170
+ "init_discriminator": false,
171
+ "use_spectral_norm_disriminator": false,
172
+ "use_speaker_embedding": false,
173
+ "num_speakers": 0,
174
+ "speakers_file": null,
175
+ "speaker_embedding_channels": 256,
176
+ "use_d_vector_file": false,
177
+ "d_vector_dim": 0,
178
+ "detach_dp_input": true
179
+ },
180
+ "grad_clip": [
181
+ 5,
182
+ 5
183
+ ],
184
+ "lr_gen": 0.0002,
185
+ "lr_disc": 0.0002,
186
+ "lr_scheduler_gen": "ExponentialLR",
187
+ "lr_scheduler_gen_params": {
188
+ "gamma": 0.999875,
189
+ "last_epoch": -1
190
+ },
191
+ "lr_scheduler_disc": "ExponentialLR",
192
+ "lr_scheduler_disc_params": {
193
+ "gamma": 0.999875,
194
+ "last_epoch": -1
195
+ },
196
+ "kl_loss_alpha": 1.0,
197
+ "disc_loss_alpha": 1.0,
198
+ "gen_loss_alpha": 1.0,
199
+ "feat_loss_alpha": 1.0,
200
+ "mel_loss_alpha": 45.0,
201
+ "return_wav": true,
202
+ "r": 1
203
+ }
espeak-ng-data/af_dict ADDED
Binary file (121 kB). View file
 
espeak-ng-data/am_dict ADDED
Binary file (63.9 kB). View file
 
espeak-ng-data/an_dict ADDED
Binary file (6.69 kB). View file
 
espeak-ng-data/ar_dict ADDED
Binary file (478 kB). View file
 
espeak-ng-data/as_dict ADDED
Binary file (5.01 kB). View file
 
espeak-ng-data/az_dict ADDED
Binary file (43.8 kB). View file
 
espeak-ng-data/ba_dict ADDED
Binary file (2.1 kB). View file
 
espeak-ng-data/be_dict ADDED
Binary file (2.65 kB). View file
 
espeak-ng-data/bg_dict ADDED
Binary file (87.1 kB). View file
 
espeak-ng-data/bn_dict ADDED
Binary file (90 kB). View file
 
espeak-ng-data/bpy_dict ADDED
Binary file (5.23 kB). View file
 
espeak-ng-data/bs_dict ADDED
Binary file (47.1 kB). View file
 
espeak-ng-data/ca_dict ADDED
Binary file (45.6 kB). View file
 
espeak-ng-data/chr_dict ADDED
Binary file (2.86 kB). View file
 
espeak-ng-data/cmn_dict ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109aaa7708d3727382acb3ae41d8e2094a7e2bb9f651a81835be22a6f08071fe
3
+ size 1566335
espeak-ng-data/cs_dict ADDED
Binary file (49.6 kB). View file
 
espeak-ng-data/cv_dict ADDED
Binary file (1.34 kB). View file
 
espeak-ng-data/cy_dict ADDED
Binary file (43.1 kB). View file
 
espeak-ng-data/da_dict ADDED
Binary file (245 kB). View file
 
espeak-ng-data/de_dict ADDED
Binary file (68.3 kB). View file
 
espeak-ng-data/el_dict ADDED
Binary file (72.8 kB). View file
 
espeak-ng-data/en_dict ADDED
Binary file (167 kB). View file
 
espeak-ng-data/eo_dict ADDED
Binary file (4.67 kB). View file
 
espeak-ng-data/es_dict ADDED
Binary file (49.3 kB). View file
 
espeak-ng-data/et_dict ADDED
Binary file (44.3 kB). View file
 
espeak-ng-data/eu_dict ADDED
Binary file (48.8 kB). View file
 
espeak-ng-data/fa_dict ADDED
Binary file (292 kB). View file
 
espeak-ng-data/fi_dict ADDED
Binary file (43.9 kB). View file
 
espeak-ng-data/fr_dict ADDED
Binary file (63.7 kB). View file
 
espeak-ng-data/ga_dict ADDED
Binary file (52.7 kB). View file
 
espeak-ng-data/gd_dict ADDED
Binary file (49.1 kB). View file
 
espeak-ng-data/gn_dict ADDED
Binary file (3.25 kB). View file
 
espeak-ng-data/grc_dict ADDED
Binary file (3.43 kB). View file
 
espeak-ng-data/gu_dict ADDED
Binary file (82.5 kB). View file
 
espeak-ng-data/hak_dict ADDED
Binary file (3.34 kB). View file
 
espeak-ng-data/haw_dict ADDED
Binary file (2.44 kB). View file
 
espeak-ng-data/he_dict ADDED
Binary file (6.96 kB). View file
 
espeak-ng-data/hi_dict ADDED
Binary file (92.1 kB). View file
 
espeak-ng-data/hr_dict ADDED
Binary file (49.4 kB). View file
 
espeak-ng-data/ht_dict ADDED
Binary file (1.8 kB). View file
 
espeak-ng-data/hu_dict ADDED
Binary file (154 kB). View file
 
espeak-ng-data/hy_dict ADDED
Binary file (62.3 kB). View file
 
espeak-ng-data/ia_dict ADDED
Binary file (331 kB). View file
 
espeak-ng-data/id_dict ADDED
Binary file (43.5 kB). View file
 
espeak-ng-data/intonations ADDED
Binary file (2.04 kB). View file