Update README.md
#3
by
ylacombe
- opened
README.md
CHANGED
@@ -124,10 +124,10 @@ description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder.
|
|
124 |
prompt = "Hey, how are you doing today?"
|
125 |
description = "A female speaker with a British accent delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
|
126 |
|
127 |
-
|
128 |
-
prompt_input_ids = tokenizer(prompt, return_tensors="pt").
|
129 |
|
130 |
-
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
131 |
audio_arr = generation.cpu().numpy().squeeze()
|
132 |
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
|
133 |
```
|
@@ -163,10 +163,10 @@ description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder.
|
|
163 |
prompt = "अरे, तुम आज कैसे हो?"
|
164 |
description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
|
165 |
|
166 |
-
|
167 |
-
prompt_input_ids = tokenizer(prompt, return_tensors="pt").
|
168 |
|
169 |
-
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
170 |
audio_arr = generation.cpu().numpy().squeeze()
|
171 |
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
|
172 |
```
|
@@ -191,10 +191,10 @@ description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder.
|
|
191 |
prompt = "अरे, तुम आज कैसे हो?"
|
192 |
description = "Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."
|
193 |
|
194 |
-
|
195 |
-
prompt_input_ids = tokenizer(prompt, return_tensors="pt").
|
196 |
|
197 |
-
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
198 |
audio_arr = generation.cpu().numpy().squeeze()
|
199 |
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
|
200 |
```
|
|
|
124 |
prompt = "Hey, how are you doing today?"
|
125 |
description = "A female speaker with a British accent delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
|
126 |
|
127 |
+
description_input_ids = description_tokenizer(description, return_tensors="pt").to(device)
|
128 |
+
prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
|
129 |
|
130 |
+
generation = model.generate(input_ids=description_input_ids.input_ids, attention_mask=description_input_ids.attention_mask, prompt_input_ids=prompt_input_ids.input_ids, prompt_attention_mask=prompt_input_ids.attention_mask)
|
131 |
audio_arr = generation.cpu().numpy().squeeze()
|
132 |
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
|
133 |
```
|
|
|
163 |
prompt = "अरे, तुम आज कैसे हो?"
|
164 |
description = "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up."
|
165 |
|
166 |
+
description_input_ids = description_tokenizer(description, return_tensors="pt").to(device)
|
167 |
+
prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
|
168 |
|
169 |
+
generation = model.generate(input_ids=description_input_ids.input_ids, attention_mask=description_input_ids.attention_mask, prompt_input_ids=prompt_input_ids.input_ids, prompt_attention_mask=prompt_input_ids.attention_mask)
|
170 |
audio_arr = generation.cpu().numpy().squeeze()
|
171 |
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
|
172 |
```
|
|
|
191 |
prompt = "अरे, तुम आज कैसे हो?"
|
192 |
description = "Divya's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise."
|
193 |
|
194 |
+
description_input_ids = description_tokenizer(description, return_tensors="pt").to(device)
|
195 |
+
prompt_input_ids = tokenizer(prompt, return_tensors="pt").to(device)
|
196 |
|
197 |
+
generation = model.generate(input_ids=description_input_ids.input_ids, attention_mask=description_input_ids.attention_mask, prompt_input_ids=prompt_input_ids.input_ids, prompt_attention_mask=prompt_input_ids.attention_mask)
|
198 |
audio_arr = generation.cpu().numpy().squeeze()
|
199 |
sf.write("indic_tts_out.wav", audio_arr, model.config.sampling_rate)
|
200 |
```
|