huseinzol05 commited on
Commit
c319bc7
1 Parent(s): 66bff45

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +72 -0
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ms
4
+ ---
5
+
6
+ # Malay VITS Multispeaker clean V2
7
+
8
+ **This model intended to use by [malaya-speech](https://github.com/mesolitica/malaya-speech) only, it is possible to not use the library but make sure the character vocabulary is correct**.
9
+
10
+ ## how to
11
+
12
+ ```python
13
+ from huggingface_hub import snapshot_download
14
+ from malaya_speech.torch_model.vits.model_infer import SynthesizerTrn
15
+ from malaya_speech.torch_model.vits.commons import intersperse
16
+ from malaya_speech.utils.text import TTS_SYMBOLS
17
+ from malaya_speech.tts import load_text_ids
18
+ import torch
19
+ import os
20
+ import json
21
+
22
+ try:
23
+ from malaya_boilerplate.hparams import HParams
24
+ except BaseException:
25
+ from malaya_boilerplate.train.config import HParams
26
+
27
+ folder = snapshot_download(repo_id="mesolitica/VITS-multispeaker-clean-v2")
28
+
29
+ with open(os.path.join(folder, 'config.json')) as fopen:
30
+ hps = HParams(**json.load(fopen))
31
+
32
+ model = SynthesizerTrn(
33
+ len(TTS_SYMBOLS),
34
+ hps.data.filter_length // 2 + 1,
35
+ hps.train.segment_size // hps.data.hop_length,
36
+ n_speakers=hps.data.n_speakers,
37
+ **hps.model,
38
+ ).eval()
39
+ model.load_state_dict(torch.load(os.path.join(folder, 'model.pth'), map_location='cpu'))
40
+
41
+ speaker_id = {
42
+ 'Ariff': 0,
43
+ 'Ayu': 1,
44
+ 'Bunga': 2,
45
+ 'Danial': 3,
46
+ 'Elina': 4,
47
+ 'Kamarul': 5,
48
+ 'Osman': 6,
49
+ 'Yasmin': 7
50
+ }
51
+ normalizer = load_text_ids(pad_to = None, understand_punct = True, is_lower = False)
52
+
53
+ t, ids = normalizer.normalize('saya nak makan nasi ayam yang sedap, lagi lazat, dan hidup sangatlah susah kan.', add_fullstop = False)
54
+ if hps.data.add_blank:
55
+ ids = intersperse(ids, 0)
56
+ ids = torch.LongTensor(ids)
57
+ ids_lengths = torch.LongTensor([ids.size(0)])
58
+ ids = ids.unsqueeze(0)
59
+ sid = 0
60
+ sid = torch.tensor([sid])
61
+
62
+ with torch.no_grad():
63
+ audio = model.infer(
64
+ ids,
65
+ ids_lengths,
66
+ noise_scale=0.0,
67
+ noise_scale_w=0.0,
68
+ length_scale=1.0,
69
+ sid=sid,
70
+ )
71
+ y_ = audio[0].numpy()
72
+ ```