Deddy commited on
Commit
23e21df
·
verified ·
1 Parent(s): 468e248

Upload 22 files

Browse files
app.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ████████╗████████╗███████╗
3
+ ╚══██╔══╝╚══██╔══╝██╔════╝
4
+ ██║ ██║ ███████╗
5
+ ██║ ██║ ╚════██║
6
+ ██║ ██║ ███████║
7
+ ╚═╝ ╚═╝ ╚══════╝
8
+ ██╗███╗ ██╗██████╗ ██████╗ ███╗ ██╗███████╗███████╗██╗ █████╗ ██╗ ██╗██╗ ██╗
9
+ ██║████╗ ██║██╔══██╗██╔═══██╗████╗ ██║██╔════╝██╔════╝██║██╔══██╗██║ ██╔╝██║ ██║
10
+ ██║██╔██╗ ██║██║ ██║██║ ██║██╔██╗ ██║█████╗ ███████╗██║███████║█████╔╝ ██║ ██║
11
+ ██║██║╚██╗██║██║ ██║██║ ██║██║╚██╗██║██╔══╝ ╚════██║██║██╔══██║██╔═██╗ ██║ ██║
12
+ ██║██║ ╚████║██████╔╝╚██████╔╝██║ ╚████║███████╗███████║██║██║ ██║██║ ██╗╚██████╔╝
13
+ ╚═╝╚═╝ ╚═══╝╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝╚══════╝╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝
14
+
15
+ Script ini dibuat oleh __drat
16
+
17
+ Petunjuk:
18
+ 1. Script ini digunakan untuk menghasilkan suara berbasis teks dengan berbagai pilihan pembicara.
19
+ 2. Teknologi yang digunakan meliputi model text-to-speech (TTS) yang canggih dengan konversi teks ke fonem (G2P).
20
+ 3. Model yang dipakai dilatih khusus untuk bahasa Indonesia, Jawa, dan Sunda.
21
+ 4. Antarmuka dibuat dengan menggunakan Gradio dengan tema kustom bernama MetafisikTheme.
22
+
23
+ Cara Menggunakan:
24
+ 1. Masukkan teks yang ingin diubah menjadi suara.
25
+ 2. Pilih kecepatan bicara yang diinginkan.
26
+ 3. Pilih bahasa dan pembicara yang diinginkan.
27
+ 4. Klik tombol "Lakukan Inferensi Audio" untuk menghasilkan suara.
28
+ """
29
+
30
+ import gradio as gr
31
+ import platform
32
+ import json
33
+ from pathlib import Path
34
+ import uuid
35
+ import html
36
+ import subprocess
37
+ import time
38
+ from g2p_id import G2P
39
+ from themes import MetafisikTheme # Impor tema custom dari themes.py
40
+
41
+ # Inisialisasi G2P (Grapheme to Phoneme)
42
+ g2p = G2P()
43
+
44
+ # Fungsi untuk mengecek apakah sistem operasi adalah macOS
45
+ def is_mac_os():
46
+ return platform.system() == 'Darwin'
47
+
48
+ # Parameter default untuk konfigurasi
49
+ params = {
50
+ "activate": True,
51
+ "autoplay": True,
52
+ "show_text": True,
53
+ "remove_trailing_dots": False,
54
+ "voice": "default.wav",
55
+ "language": "Indonesian",
56
+ "model_path": "checkpoint_1260000-inference.pth",
57
+ "config_path": "config.json",
58
+ "out_path": "output.wav"
59
+ }
60
+
61
+ SAMPLE_RATE = 16000
62
+ device = None
63
+
64
+ # Set nama pembicara default
65
+ default_speaker_name = "ardi"
66
+
67
+ # Fungsi untuk mengubah teks menjadi urutan yang sesuai untuk model
68
+ def text_to_sequence(text):
69
+ # Implementasikan sesuai dengan kebutuhan model Anda
70
+ # Sebagai contoh, ini adalah placeholder
71
+ sequence = [ord(char) for char in text]
72
+ return sequence
73
+
74
+ # Fungsi untuk menghasilkan suara dengan progress bar
75
+ def gen_voice(text, speaker_label, speed, language, progress=gr.Progress()):
76
+ speaker_mapping = {
77
+ "Wibowo - Suara jantan berwibawa": "wibowo",
78
+ "Ardi - Suara lembut dan hangat": "ardi",
79
+ "Gadis - Suara perempuan yang merdu": "gadis",
80
+ "Juminten - Suara perempuan jawa (bahasa jawa)": "JV-00264",
81
+ "Asep - Suara lelaki sunda (bahasa sunda)": "SU-00060"
82
+ }
83
+ speaker = speaker_mapping.get(speaker_label, default_speaker_name)
84
+
85
+ progress(0, desc="Menginisialisasi G2P")
86
+ text = html.unescape(text)
87
+ text_to_tts = g2p(text) # Konversi teks ke format TTS menggunakan G2P
88
+ time.sleep(1)
89
+ progress(0.2, desc="Mengonversi teks ke TTS")
90
+
91
+ short_uuid = str(uuid.uuid4())[:8]
92
+ output_file = Path(f'outputs/{speaker}-{short_uuid}.wav')
93
+
94
+ # Perintah untuk menjalankan TTS
95
+ command = [
96
+ "tts",
97
+ "--text", text_to_tts,
98
+ "--model_path", params["model_path"],
99
+ "--config_path", params["config_path"],
100
+ "--speaker_idx", speaker,
101
+ "--out_path", str(output_file)
102
+ ]
103
+
104
+ progress(0.5, desc="Menjalankan proses TTS")
105
+ result = subprocess.run(command, capture_output=True, text=True)
106
+ time.sleep(1)
107
+ if result.returncode != 0:
108
+ print(f"Error: {result.stderr}")
109
+ return None
110
+
111
+ progress(1, desc="Selesai")
112
+ return str(output_file)
113
+
114
+ # Fungsi untuk memperbarui daftar pembicara
115
+ def update_speakers():
116
+ speakers = [
117
+ ("Wibowo - Suara jantan berwibawa", "wibowo"),
118
+ ("Ardi - Suara lembut dan hangat", "ardi"),
119
+ ("Gadis - Suara perempuan yang merdu", "gadis"),
120
+ ("Juminten - Suara perempuan jawa (bahasa jawa)", "JV-00264"),
121
+ ("Asep - Suara lelaki sunda (bahasa sunda)", "SU-00060")
122
+ ]
123
+ return speakers
124
+
125
+ # Fungsi untuk memperbarui dropdown pembicara
126
+ def update_dropdown(_=None, selected_speaker=default_speaker_name):
127
+ choices = update_speakers()
128
+ dropdown_choices = {label: label for label, value in choices}
129
+ return gr.Dropdown(choices=dropdown_choices, value=selected_speaker, label="Pilih Pembicara", interactive=True, allow_custom_value=True)
130
+
131
+ # Memuat data bahasa
132
+ with open(Path('languages.json'), encoding='utf8') as f:
133
+ languages = json.load(f)
134
+
135
+ # Antarmuka Gradio dengan tema MetafisikTheme
136
+ with gr.Blocks(theme=MetafisikTheme()) as app:
137
+
138
+ gr.Markdown("### TTS Bahasa Indonesia", elem_id="main-title")
139
+
140
+ with gr.Row():
141
+ with gr.Column():
142
+ text_input = gr.Textbox(lines=2, label="Teks", value="Halo, saya adalah pembicara virtual.", elem_id="text-input")
143
+ speed_slider = gr.Slider(label='Kecepatan Bicara', minimum=0.1, maximum=1.99, value=0.8, step=0.01, elem_id="speed-slider")
144
+ language_dropdown = gr.Dropdown(list(languages.keys()), label="Bahasa", value="Indonesian", elem_id="language-dropdown")
145
+ submit_button = gr.Button("🗣️ Lakukan Inferensi Audio", elem_id="submit-button")
146
+ explanation = gr.HTML("""
147
+ <div style="margin-top: 20px; color: gray;">
148
+ <h4>Kegunaan Aplikasi</h4>
149
+ <p>Aplikasi ini digunakan untuk menghasilkan suara berbasis teks dengan berbagai pilihan pembicara.
150
+ Teknologi yang digunakan meliputi model text-to-speech (TTS) yang canggih dengan konversi teks ke fonem.
151
+ Model yang dipakai dilatih khusus untuk bahasa Indonesia, Jawa dan Sunda.</p>
152
+ <h4>Cara Penggunaan</h4>
153
+ <ol>
154
+ <li>Masukkan teks yang ingin diubah menjadi suara.</li>
155
+ <li>Pilih kecepatan bicara yang diinginkan.</li>
156
+ <li>Pilih bahasa dan pembicara yang diinginkan.</li>
157
+ <li>Klik tombol "Lakukan Inferensi Audio" untuk menghasilkan suara.</li>
158
+ </ol>
159
+ <p></p>
160
+ <p>Semoga <b>Energi Semesta Digital</b> selalu bersama Anda!</p>
161
+ </div>
162
+ """)
163
+
164
+ with gr.Column():
165
+ with gr.Row():
166
+ gr.Image("ardi.jpg", label="Ardi")
167
+ gr.Image("gadis.jpg", label="Gadis")
168
+ gr.Image("wibowo.jpg", label="Wibowo")
169
+
170
+ speaker_dropdown = update_dropdown()
171
+ refresh_button = gr.Button("👨‍👨‍👦 Segarkan Pembicara", elem_id="refresh-button")
172
+ audio_output = gr.Audio(elem_id="audio-output")
173
+
174
+ refresh_button.click(fn=update_dropdown, inputs=[], outputs=speaker_dropdown)
175
+
176
+ submit_button.click(
177
+ fn=gen_voice,
178
+ inputs=[text_input, speaker_dropdown, speed_slider, language_dropdown],
179
+ outputs=audio_output
180
+ )
181
+
182
+ gr.HTML("""
183
+ <footer style="text-align: center; margin-top: 20px; color:silver;">
184
+ Energi Semesta Digital © 2024 __drat. | 🇮🇩 Untuk Indonesia Jaya!
185
+ </footer>
186
+ """)
187
+
188
+ if __name__ == "__main__":
189
+ app.launch()
ardi.jpg ADDED
audio_samples/asep.wav ADDED
Binary file (748 kB). View file
 
audio_samples/gadis.wav ADDED
Binary file (865 kB). View file
 
audio_samples/juminten.wav ADDED
Binary file (903 kB). View file
 
audio_samples/wibowo.wav ADDED
Binary file (825 kB). View file
 
checkpoint_1260000-inference.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399e41d1e2a704056b96b82692dd7d0fa3cc351ea20d277f534f7a656522eb74
3
+ size 345999149
config.json ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/workspace/TTS",
3
+ "logger_uri": null,
4
+ "run_name": "vits_indonesian_multispeaker",
5
+ "project_name": null,
6
+ "run_description": "\ud83d\udc38Coqui trainer run.",
7
+ "print_step": 25,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "tensorboard",
12
+ "log_model_step": 10000,
13
+ "save_step": 10000,
14
+ "save_n_checkpoints": 5,
15
+ "save_checkpoints": true,
16
+ "save_all_best": false,
17
+ "save_best_after": 10000,
18
+ "target_loss": null,
19
+ "print_eval": true,
20
+ "test_delay_epochs": -1,
21
+ "run_eval": true,
22
+ "run_eval_steps": null,
23
+ "distributed_backend": "nccl",
24
+ "distributed_url": "tcp://localhost:54321",
25
+ "mixed_precision": false,
26
+ "epochs": 1000,
27
+ "batch_size": 32,
28
+ "eval_batch_size": 8,
29
+ "grad_clip": [
30
+ 1000,
31
+ 1000
32
+ ],
33
+ "scheduler_after_epoch": true,
34
+ "lr": 0.001,
35
+ "optimizer": "AdamW",
36
+ "optimizer_params": {
37
+ "betas": [
38
+ 0.8,
39
+ 0.99
40
+ ],
41
+ "eps": 1e-09,
42
+ "weight_decay": 0.01
43
+ },
44
+ "lr_scheduler": "",
45
+ "lr_scheduler_params": {},
46
+ "use_grad_scaler": false,
47
+ "cudnn_enable": true,
48
+ "cudnn_deterministic": false,
49
+ "cudnn_benchmark": false,
50
+ "training_seed": 54321,
51
+ "model": "vits",
52
+ "num_loader_workers": 4,
53
+ "num_eval_loader_workers": 4,
54
+ "use_noise_augment": false,
55
+ "audio": {
56
+ "fft_size": 1024,
57
+ "sample_rate": 22050,
58
+ "win_length": 1024,
59
+ "hop_length": 256,
60
+ "num_mels": 80,
61
+ "mel_fmin": 0,
62
+ "mel_fmax": null
63
+ },
64
+ "use_phonemes": false,
65
+ "phonemizer": null,
66
+ "phoneme_language": "en-us",
67
+ "compute_input_seq_cache": true,
68
+ "text_cleaner": "basic_cleaners",
69
+ "enable_eos_bos_chars": false,
70
+ "test_sentences_file": "",
71
+ "phoneme_cache_path": "/workspace/TTS/phoneme_cache",
72
+ "characters": {
73
+ "characters_class": "TTS.tts.models.vits.VitsCharacters",
74
+ "vocab_dict": null,
75
+ "pad": "<PAD>",
76
+ "eos": "<EOS>",
77
+ "bos": "<BOS>",
78
+ "blank": "<BLNK>",
79
+ "characters": "abdefhijklmnoprstuwxz\u014b\u0254\u0259\u025b\u0261\u026a\u0272\u0283\u028a\u0292\u0294\u02c8",
80
+ "punctuations": " !,.?",
81
+ "phonemes": null,
82
+ "is_unique": true,
83
+ "is_sorted": true
84
+ },
85
+ "add_blank": true,
86
+ "batch_group_size": 0,
87
+ "loss_masking": null,
88
+ "min_audio_len": 1,
89
+ "max_audio_len": Infinity,
90
+ "min_text_len": 1,
91
+ "max_text_len": Infinity,
92
+ "compute_f0": false,
93
+ "compute_linear_spec": true,
94
+ "precompute_num_workers": 0,
95
+ "start_by_longest": false,
96
+ "datasets": [
97
+ {
98
+ "name": "coqui",
99
+ "path": "dataset",
100
+ "meta_file_train": "metadata-wibowo.csv",
101
+ "ignored_speakers": null,
102
+ "language": "",
103
+ "meta_file_val": "",
104
+ "meta_file_attn_mask": ""
105
+ },
106
+ {
107
+ "name": "coqui",
108
+ "path": "dataset",
109
+ "meta_file_train": "metadata-ardi.csv",
110
+ "ignored_speakers": null,
111
+ "language": "",
112
+ "meta_file_val": "",
113
+ "meta_file_attn_mask": ""
114
+ },
115
+ {
116
+ "name": "coqui",
117
+ "path": "dataset",
118
+ "meta_file_train": "metadata-gadis.csv",
119
+ "ignored_speakers": null,
120
+ "language": "",
121
+ "meta_file_val": "",
122
+ "meta_file_attn_mask": ""
123
+ },
124
+ {
125
+ "name": "coqui",
126
+ "path": "dataset",
127
+ "meta_file_train": "metadata-javanese.csv",
128
+ "ignored_speakers": null,
129
+ "language": "",
130
+ "meta_file_val": "",
131
+ "meta_file_attn_mask": ""
132
+ },
133
+ {
134
+ "name": "coqui",
135
+ "path": "dataset",
136
+ "meta_file_train": "metadata-sundanese.csv",
137
+ "ignored_speakers": null,
138
+ "language": "",
139
+ "meta_file_val": "",
140
+ "meta_file_attn_mask": ""
141
+ }
142
+ ],
143
+ "test_sentences": [
144
+ [
145
+ "\u02c8budi \u02c8makan \u02c8tahu, \u02c8soto, dan \u02c8tempe",
146
+ "wibowo",
147
+ null,
148
+ null
149
+ ],
150
+ [
151
+ "\u02c8tadi \u02c8pa\u0261i \u02c8ali dan \u02c8\u0283afi s\u0259\u02c8dan\u0294 m\u0259n\u0294\u02c8\u0261unakan \u02c8m\u0254t\u0254r di \u02c8kantor \u02c8m\u0259r\u025bka.",
152
+ "ardi",
153
+ null,
154
+ null
155
+ ],
156
+ [
157
+ "\u02c8ardi dan \u02c8thomas \u02c8m\u0259nud\u0292u \u02c8k\u0259 \u02c8s\u0259kolah \u02c8pada \u02c8puk\u028al \u02c8s\u0259pul\u028ah \u02c8pa\u0261i.",
158
+ "gadis",
159
+ null,
160
+ null
161
+ ],
162
+ [
163
+ "\u02c8ardi dan \u02c8thomas \u02c8m\u0259nud\u0292u \u02c8k\u0259 \u02c8s\u0259kolah \u02c8pada \u02c8puk\u028al \u02c8s\u0259pul\u028ah \u02c8pa\u0261i.",
164
+ "JV-00264",
165
+ null,
166
+ null
167
+ ],
168
+ [
169
+ "\u02c8ardi dan \u02c8thomas \u02c8m\u0259nud\u0292u \u02c8k\u0259 \u02c8s\u0259kolah \u02c8pada \u02c8puk\u028al \u02c8s\u0259pul\u028ah \u02c8pa\u0261i.",
170
+ "SU-00060",
171
+ null,
172
+ null
173
+ ]
174
+ ],
175
+ "eval_split_max_size": null,
176
+ "eval_split_size": 0.01,
177
+ "use_speaker_weighted_sampler": false,
178
+ "speaker_weighted_sampler_alpha": 1.0,
179
+ "use_language_weighted_sampler": false,
180
+ "language_weighted_sampler_alpha": 1.0,
181
+ "use_length_weighted_sampler": false,
182
+ "length_weighted_sampler_alpha": 1.0,
183
+ "model_args": {
184
+ "num_chars": 40,
185
+ "out_channels": 513,
186
+ "spec_segment_size": 32,
187
+ "hidden_channels": 192,
188
+ "hidden_channels_ffn_text_encoder": 768,
189
+ "num_heads_text_encoder": 2,
190
+ "num_layers_text_encoder": 6,
191
+ "kernel_size_text_encoder": 3,
192
+ "dropout_p_text_encoder": 0.1,
193
+ "dropout_p_duration_predictor": 0.5,
194
+ "kernel_size_posterior_encoder": 5,
195
+ "dilation_rate_posterior_encoder": 1,
196
+ "num_layers_posterior_encoder": 16,
197
+ "kernel_size_flow": 5,
198
+ "dilation_rate_flow": 1,
199
+ "num_layers_flow": 4,
200
+ "resblock_type_decoder": "1",
201
+ "resblock_kernel_sizes_decoder": [
202
+ 3,
203
+ 7,
204
+ 11
205
+ ],
206
+ "resblock_dilation_sizes_decoder": [
207
+ [
208
+ 1,
209
+ 3,
210
+ 5
211
+ ],
212
+ [
213
+ 1,
214
+ 3,
215
+ 5
216
+ ],
217
+ [
218
+ 1,
219
+ 3,
220
+ 5
221
+ ]
222
+ ],
223
+ "upsample_rates_decoder": [
224
+ 8,
225
+ 8,
226
+ 2,
227
+ 2
228
+ ],
229
+ "upsample_initial_channel_decoder": 512,
230
+ "upsample_kernel_sizes_decoder": [
231
+ 16,
232
+ 16,
233
+ 4,
234
+ 4
235
+ ],
236
+ "periods_multi_period_discriminator": [
237
+ 2,
238
+ 3,
239
+ 5,
240
+ 7,
241
+ 11
242
+ ],
243
+ "use_sdp": true,
244
+ "noise_scale": 1.0,
245
+ "inference_noise_scale": 0.33,
246
+ "length_scale": 1,
247
+ "noise_scale_dp": 1.0,
248
+ "inference_noise_scale_dp": 0.33,
249
+ "max_inference_len": null,
250
+ "init_discriminator": true,
251
+ "use_spectral_norm_disriminator": false,
252
+ "use_speaker_embedding": true,
253
+ "num_speakers": 83,
254
+ "speakers_file": "speakers.pth",
255
+ "d_vector_file": null,
256
+ "speaker_embedding_channels": 256,
257
+ "use_d_vector_file": false,
258
+ "d_vector_dim": 0,
259
+ "detach_dp_input": true,
260
+ "use_language_embedding": false,
261
+ "embedded_language_dim": 4,
262
+ "num_languages": 0,
263
+ "language_ids_file": null,
264
+ "use_speaker_encoder_as_loss": false,
265
+ "speaker_encoder_config_path": "",
266
+ "speaker_encoder_model_path": "",
267
+ "condition_dp_on_speaker": true,
268
+ "freeze_encoder": false,
269
+ "freeze_DP": false,
270
+ "freeze_PE": false,
271
+ "freeze_flow_decoder": false,
272
+ "freeze_waveform_decoder": false,
273
+ "encoder_sample_rate": null,
274
+ "interpolate_z": true,
275
+ "reinit_DP": true,
276
+ "reinit_text_encoder": false
277
+ },
278
+ "lr_gen": 0.0002,
279
+ "lr_disc": 0.0002,
280
+ "lr_scheduler_gen": "ExponentialLR",
281
+ "lr_scheduler_gen_params": {
282
+ "gamma": 0.999875,
283
+ "last_epoch": -1
284
+ },
285
+ "lr_scheduler_disc": "ExponentialLR",
286
+ "lr_scheduler_disc_params": {
287
+ "gamma": 0.999875,
288
+ "last_epoch": -1
289
+ },
290
+ "kl_loss_alpha": 1.0,
291
+ "disc_loss_alpha": 1.0,
292
+ "gen_loss_alpha": 1.0,
293
+ "feat_loss_alpha": 1.0,
294
+ "mel_loss_alpha": 45.0,
295
+ "dur_loss_alpha": 1.0,
296
+ "speaker_encoder_loss_alpha": 1.0,
297
+ "return_wav": true,
298
+ "r": 1,
299
+ "num_speakers": 0,
300
+ "use_speaker_embedding": true,
301
+ "speakers_file": "speakers.pth",
302
+ "speaker_embedding_channels": 256,
303
+ "language_ids_file": null,
304
+ "use_language_embedding": false,
305
+ "use_d_vector_file": false,
306
+ "d_vector_file": null,
307
+ "d_vector_dim": 0
308
+ }
g2p-id/.DS_Store ADDED
Binary file (6.15 kB). View file
 
g2p-id/.gitignore ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ .DS_Store
163
+ .backup/
164
+ .data/
g2p-id/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .g2p import G2P
2
+
3
+ __version__ = "0.0.5"
g2p-id/data/dict.json ADDED
The diff for this file is too large to render. See raw diff
 
g2p-id/g2p.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+
5
+ import numpy as np
6
+ import onnxruntime
7
+ from nltk.tokenize import TweetTokenizer
8
+ from sacremoses import MosesDetokenizer
9
+
10
+ from .syllable_splitter import SyllableSplitter
11
+
12
+ ABJAD_MAPPING = {
13
+ "a": "a",
14
+ "b": "bé",
15
+ "c": "cé",
16
+ "d": "dé",
17
+ "e": "é",
18
+ "f": "èf",
19
+ "g": "gé",
20
+ "h": "ha",
21
+ "i": "i",
22
+ "j": "jé",
23
+ "k": "ka",
24
+ "l": "èl",
25
+ "m": "èm",
26
+ "n": "èn",
27
+ "o": "o",
28
+ "p": "pé",
29
+ "q": "ki",
30
+ "r": "èr",
31
+ "s": "ès",
32
+ "t": "té",
33
+ "u": "u",
34
+ "v": "vé",
35
+ "w": "wé",
36
+ "x": "èks",
37
+ "y": "yé",
38
+ "z": "zèt",
39
+ }
40
+
41
+ PHONETIC_MAPPING = {
42
+ "sy": "ʃ",
43
+ "ny": "ɲ",
44
+ "ng": "ŋ",
45
+ "dj": "dʒ",
46
+ "'": "ʔ",
47
+ "c": "tʃ",
48
+ "é": "e",
49
+ "è": "ɛ",
50
+ "ê": "ə",
51
+ "g": "ɡ",
52
+ "I": "ɪ",
53
+ "j": "dʒ",
54
+ "ô": "ɔ",
55
+ "q": "k",
56
+ "U": "ʊ",
57
+ "v": "f",
58
+ "x": "ks",
59
+ "y": "j",
60
+ }
61
+
62
+
63
+ dirname = os.path.dirname(__file__)
64
+
65
+ # Predict pronounciation with BERT Masking
66
+ # Read more: https://w11wo.github.io/posts/2022/04/predicting-phonemes-with-bert/
67
+ class Predictor:
68
+ def __init__(self, model_path):
69
+ # fmt: off
70
+ self.vocab = ['', '[UNK]', 'a', 'n', 'ê', 'e', 'i', 'r', 'k', 's', 't', 'g', 'm', 'u', 'l', 'p', 'o', 'd', 'b', 'h', 'c', 'j', 'y', 'f', 'w', 'v', 'z', 'x', 'q', '[mask]']
71
+ self.mask_token_id = self.vocab.index("[mask]")
72
+ # fmt: on
73
+ self.session = onnxruntime.InferenceSession(model_path)
74
+
75
+ def predict(self, word: str) -> str:
76
+ """
77
+ Predict the phonetic representation of a word.
78
+
79
+ Args:
80
+ word (str): The word to predict.
81
+
82
+ Returns:
83
+ str: The predicted phonetic representation of the word.
84
+ """
85
+ text = [self.vocab.index(c) if c != "e" else self.mask_token_id for c in word]
86
+ text.extend([0] * (32 - len(text))) # Pad to 32 tokens
87
+ inputs = np.array([text], dtype=np.int64)
88
+ (predictions,) = self.session.run(None, {"input_4": inputs})
89
+
90
+ # find masked idx token
91
+ _, masked_index = np.where(inputs == self.mask_token_id)
92
+
93
+ # get prediction at those masked index only
94
+ mask_prediction = predictions[0][masked_index]
95
+ predicted_ids = np.argmax(mask_prediction, axis=1)
96
+
97
+ # replace mask with predicted token
98
+ for i, idx in enumerate(masked_index):
99
+ text[idx] = predicted_ids[i]
100
+
101
+ return "".join([self.vocab[i] for i in text if i != 0])
102
+
103
+
104
+ class G2P:
105
+ def __init__(self):
106
+ self.tokenizer = TweetTokenizer()
107
+ self.detokenizer = MosesDetokenizer(lang="id")
108
+
109
+ dict_path = os.path.join(dirname, "data/dict.json")
110
+ with open(dict_path) as f:
111
+ self.dict = json.load(f)
112
+
113
+ model_path = os.path.join(dirname, "model/bert_pron.onnx")
114
+ self.predictor = Predictor(model_path)
115
+
116
+ self.syllable_splitter = SyllableSplitter()
117
+
118
+ def __call__(self, text: str) -> str:
119
+ """
120
+ Convert text to phonetic representation.
121
+
122
+ Args:
123
+ text (str): The text to convert.
124
+
125
+ Returns:
126
+ str: The phonetic representation of the text.
127
+ """
128
+ text = text.lower()
129
+ text = re.sub(r"[^ a-z0-9'\.,?!-]", "", text)
130
+ text = text.replace("-", " ")
131
+
132
+ prons = []
133
+ words = self.tokenizer.tokenize(text)
134
+ for word in words:
135
+ # PUEBI pronunciation
136
+ if word in self.dict:
137
+ pron = self.dict[word]
138
+ elif len(word) == 1 and word in ABJAD_MAPPING:
139
+ pron = ABJAD_MAPPING[word]
140
+ elif "e" not in word or not word.isalpha():
141
+ pron = word
142
+ elif "e" in word:
143
+ pron = self.predictor.predict(word)
144
+
145
+ # Replace alofon /e/ with e (temporary)
146
+ pron = pron.replace("é", "e")
147
+ pron = pron.replace("è", "e")
148
+
149
+ # Replace /x/ with /s/
150
+ if pron.startswith("x"):
151
+ pron = "s" + pron[1:]
152
+
153
+ sylls = self.syllable_splitter.split_syllables(pron)
154
+ # Decide where to put the stress
155
+ stress_loc = len(sylls) - 1
156
+ if len(sylls) > 1 and "ê" in sylls[-2]:
157
+ if "ê" in sylls[-1]:
158
+ stress_loc = len(sylls) - 2
159
+ else:
160
+ stress_loc = len(sylls)
161
+
162
+ # Apply rules on syllable basis
163
+ # All alophone are set to tense by default
164
+ # and will be changed to lax if needed
165
+ alophone = {"e": "é", "o": "o"}
166
+ alophone_map = {"i": "I", "u": "U", "e": "è", "o": "ô"}
167
+ for i, syll in enumerate(sylls, start=1):
168
+ # Put Syllable stress
169
+ if i == stress_loc:
170
+ syll = "ˈ" + syll
171
+
172
+ # Alophone syllable rules
173
+ for v in ["e", "o"]:
174
+ # Replace with lax allphone [��, ɔ] if
175
+ # in closed final syllables
176
+ if v in syll and not syll.endswith(v) and i == len(sylls):
177
+ alophone[v] = alophone_map[v]
178
+
179
+ # Alophone syllable stress rules
180
+ for v in ["i", "u"]:
181
+ # Replace with lax allphone [ɪ, ʊ] if
182
+ # in the middle of syllable without stress
183
+ # and not ends with coda nasal [m, n, ng] (except for final syllable)
184
+ if (
185
+ v in syll
186
+ and not syll.startswith("ˈ")
187
+ and not syll.endswith(v)
188
+ and (
189
+ not any(syll.endswith(x) for x in ["m", "n", "ng"])
190
+ or i == len(sylls)
191
+ )
192
+ ):
193
+ syll = syll.replace(v, alophone_map[v])
194
+
195
+ if syll.endswith("nk"):
196
+ syll = syll[:-2] + "ng"
197
+ elif syll.endswith("d"):
198
+ syll = syll[:-1] + "t"
199
+ elif syll.endswith("b"):
200
+ syll = syll[:-1] + "p"
201
+ elif syll.endswith("k") or (
202
+ syll.endswith("g") and not syll.endswith("ng")
203
+ ):
204
+ syll = syll[:-1] + "'"
205
+ sylls[i - 1] = syll
206
+
207
+ pron = "".join(sylls)
208
+ # Apply phonetic and alophone mapping
209
+ for v in alophone:
210
+ if v == "o" and pron.count("o") == 1:
211
+ continue
212
+ pron = pron.replace(v, alophone[v])
213
+ for g, p in PHONETIC_MAPPING.items():
214
+ pron = pron.replace(g, p)
215
+ pron = pron.replace("kh", "x")
216
+
217
+ prons.append(pron)
218
+ prons.append(" ")
219
+
220
+ return self.detokenizer.detokenize(prons)
g2p-id/model/bert_pron.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bc9b45f1cdeff4dc473f722627e94db4e3ff0ba7a2b066e542a0fa46f49d330
3
+ size 1295867
g2p-id/syllable_splitter.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copied from https://github.com/fahadh4ilyas/syllable_splitter
2
+ # MIT License
3
+ import re
4
+
5
+
6
+ class SyllableSplitter:
7
+ def __init__(self):
8
+ self.consonant = set(
9
+ [
10
+ "b",
11
+ "c",
12
+ "d",
13
+ "f",
14
+ "g",
15
+ "h",
16
+ "j",
17
+ "k",
18
+ "l",
19
+ "m",
20
+ "n",
21
+ "p",
22
+ "q",
23
+ "r",
24
+ "s",
25
+ "t",
26
+ "v",
27
+ "w",
28
+ "x",
29
+ "y",
30
+ "z",
31
+ "ng",
32
+ "ny",
33
+ "sy",
34
+ "ch",
35
+ "dh",
36
+ "gh",
37
+ "kh",
38
+ "ph",
39
+ "sh",
40
+ "th",
41
+ ]
42
+ )
43
+ self.double_consonant = set(["ll", "ks", "rs", "rt", "nk", "nd"])
44
+ self.vocal = set(["a", "e", "ê", "é", "è", "i", "o", "u"])
45
+
46
+ def split_letters(self, string):
47
+ letters = []
48
+ arrange = []
49
+
50
+ while string != "":
51
+ letter = string[:2]
52
+
53
+ if letter in self.double_consonant:
54
+ if string[2:] != "" and string[2] in self.vocal:
55
+ letters += [letter[0]]
56
+ arrange += ["c"]
57
+ string = string[1:]
58
+ else:
59
+ letters += [letter]
60
+ arrange += ["c"]
61
+ string = string[2:]
62
+ elif letter in self.consonant:
63
+ letters += [letter]
64
+ arrange += ["c"]
65
+ string = string[2:]
66
+ elif letter in self.vocal:
67
+ letters += [letter]
68
+ arrange += ["v"]
69
+ string = string[2:]
70
+ else:
71
+ letter = string[0]
72
+
73
+ if letter in self.consonant:
74
+ letters += [letter]
75
+ arrange += ["c"]
76
+ string = string[1:]
77
+ elif letter in self.vocal:
78
+ letters += [letter]
79
+ arrange += ["v"]
80
+ string = string[1:]
81
+ else:
82
+ letters += [letter]
83
+ arrange += ["s"]
84
+ string = string[1:]
85
+
86
+ return letters, "".join(arrange)
87
+
88
+ def split_syllables_from_letters(self, letters, arrange):
89
+ consonant_index = re.search(r"vc{2,}", arrange)
90
+ while consonant_index:
91
+ i = consonant_index.start() + 1
92
+ letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
93
+ arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
94
+ consonant_index = re.search(r"vc{2,}", arrange)
95
+
96
+ vocal_index = re.search(r"v{2,}", arrange)
97
+ while vocal_index:
98
+ i = vocal_index.start()
99
+ letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
100
+ arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
101
+ vocal_index = re.search(r"v{2,}", arrange)
102
+
103
+ vcv_index = re.search(r"vcv", arrange)
104
+ while vcv_index:
105
+ i = vcv_index.start()
106
+ letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
107
+ arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
108
+ vcv_index = re.search(r"vcv", arrange)
109
+
110
+ sep_index = re.search(r"[cvs]s", arrange)
111
+ while sep_index:
112
+ i = sep_index.start()
113
+ letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
114
+ arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
115
+ sep_index = re.search(r"[cvs]s", arrange)
116
+
117
+ sep_index = re.search(r"s[cvs]", arrange)
118
+ while sep_index:
119
+ i = sep_index.start()
120
+ letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
121
+ arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
122
+ sep_index = re.search(r"s[cvs]", arrange)
123
+ return "".join(letters).split("|")
124
+
125
+ def split_syllables(self, string):
126
+ letters, arrange = self.split_letters(string)
127
+ return self.split_syllables_from_letters(letters, arrange)
gadis.jpg ADDED
languages.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Arabic": "ar",
3
+ "Chinese": "zh-cn",
4
+ "Czech": "cs",
5
+ "Dutch": "nl",
6
+ "English": "en",
7
+ "French": "fr",
8
+ "German": "de",
9
+ "Hungarian": "hu",
10
+ "Indonesian": "id",
11
+ "Italian": "it",
12
+ "Japanese": "ja",
13
+ "Korean": "ko",
14
+ "Polish": "pl",
15
+ "Portuguese": "pt",
16
+ "Russian": "ru",
17
+ "Spanish": "es",
18
+ "Turkish": "tr"
19
+ }
outputs/.DS_Store ADDED
Binary file (6.15 kB). View file
 
speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51f3840be3e92a96805c5ec81ee0948a44f965e77d980b0ad59fe0f661c2d17
3
+ size 1839
targets/.DS_Store ADDED
Binary file (6.15 kB). View file
 
themes.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ████████╗████████╗███████╗
3
+ ╚══██╔══╝╚══██╔══╝██╔════╝
4
+ ██║ ██║ ███████╗
5
+ ██║ ██║ ╚════██║
6
+ ██║ ██║ ███████║
7
+ ╚═╝ ╚═╝ ╚══════╝
8
+ ██╗███╗ ██╗██████╗ ██████╗ ███╗ ██╗███████╗███████╗██╗ █████╗ ██╗ ██╗██╗ ██╗
9
+ ██║████╗ ██║██╔══██╗██╔═══██╗████╗ ██║██╔════╝██╔════╝██║██╔══██╗██║ ██╔╝██║ ██║
10
+ ██║██╔██╗ ██║██║ ██║██║ ██║██╔██╗ ██║█████╗ ███████╗██║███████║█████╔╝ ██║ ██║
11
+ ██║██║╚██╗██║██║ ██║██║ ██║██║╚██╗██║██╔══╝ ╚════██║██║██╔══██║██╔═██╗ ██║ ██║
12
+ ██║██║ ╚████║██████╔╝╚██████╔╝██║ ╚████║███████╗███████║██║██║ ██║██║ ██╗╚██████╔╝
13
+ ╚═╝╚═╝ ╚═══╝╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝╚══════╝╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝
14
+
15
+ Script ini dibuat oleh __drat
16
+
17
+ Petunjuk:
18
+ 1. Script ini digunakan untuk menghasilkan suara berbasis teks dengan berbagai pilihan pembicara.
19
+ 2. Teknologi yang digunakan meliputi model text-to-speech (TTS) yang canggih dengan konversi teks ke fonem (G2P).
20
+ 3. Model yang dipakai dilatih khusus untuk bahasa Indonesia, Jawa, dan Sunda.
21
+ 4. Antarmuka dibuat dengan menggunakan Gradio dengan tema kustom bernama MetafisikTheme.
22
+
23
+ Cara Menggunakan:
24
+ 1. Masukkan teks yang ingin diubah menjadi suara.
25
+ 2. Pilih kecepatan bicara yang diinginkan.
26
+ 3. Pilih bahasa dan pembicara yang diinginkan.
27
+ 4. Klik tombol "Lakukan Inferensi Audio" untuk menghasilkan suara.
28
+ """
29
+
30
+ from __future__ import annotations
31
+ from typing import Iterable
32
+ from gradio.themes.base import Base
33
+ from gradio.themes.utils import colors, fonts, sizes
34
+
35
+ class MetafisikTheme(Base):
36
+ def __init__(
37
+ self,
38
+ *,
39
+ primary_hue: colors.Color | str = colors.orange,
40
+ secondary_hue: colors.Color | str = colors.yellow,
41
+ neutral_hue: colors.Color | str = colors.gray,
42
+ spacing_size: sizes.Size | str = sizes.spacing_md,
43
+ radius_size: sizes.Size | str = sizes.radius_md,
44
+ text_size: sizes.Size | str = sizes.text_lg,
45
+ font: fonts.Font
46
+ | str
47
+ | Iterable[fonts.Font | str] = (
48
+ fonts.GoogleFont("Quicksand"),
49
+ "ui-sans-serif",
50
+ "sans-serif",
51
+ ),
52
+ font_mono: fonts.Font
53
+ | str
54
+ | Iterable[fonts.Font | str] = (
55
+ fonts.GoogleFont("IBM Plex Mono"),
56
+ "ui-monospace",
57
+ "monospace",
58
+ ),
59
+ ):
60
+ super().__init__(
61
+ primary_hue=primary_hue,
62
+ secondary_hue=secondary_hue,
63
+ neutral_hue=neutral_hue,
64
+ spacing_size=spacing_size,
65
+ radius_size=radius_size,
66
+ text_size=text_size,
67
+ font=font,
68
+ font_mono=font_mono,
69
+ )
70
+ super().set(
71
+ body_background_fill="linear-gradient(to bottom, #FFFFE0, #FFFFFF)", # Gradient from light yellow to white
72
+ body_background_fill_dark="linear-gradient(to bottom, #FFFFE0, #FFFFFF)", # Same gradient for dark mode
73
+ button_primary_background_fill="linear-gradient(90deg, #FFA500, #FF4500)", # Orange to dark orange gradient
74
+ button_primary_background_fill_hover="linear-gradient(90deg, #FFB347, #FF6347)", # Lighter orange gradient
75
+ button_primary_text_color="white",
76
+ button_primary_background_fill_dark="linear-gradient(90deg, #FF8C00, #FF4500)", # Darker orange gradient
77
+ slider_color="*secondary_300",
78
+ slider_color_dark="*secondary_600",
79
+ block_title_text_weight="600",
80
+ block_border_width="3px",
81
+ block_shadow="*shadow_drop_lg",
82
+ button_shadow="*shadow_drop_lg",
83
+ button_large_padding="32px",
84
+ )
wibowo.jpg ADDED