Baybars commited on
Commit
fc52d83
β€’
1 Parent(s): 6de7952

about page template added

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. infer_onnx.py +42 -3
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
infer_onnx.py CHANGED
@@ -1,7 +1,6 @@
1
  import numpy as np
2
  import onnxruntime
3
 
4
- import utils
5
  from text import text_to_sequence, sequence_to_text
6
  import torch
7
  import gradio as gr
@@ -9,10 +8,13 @@ import soundfile as sf
9
  import tempfile
10
  import yaml
11
  import json
 
12
 
13
  from huggingface_hub import hf_hub_download
14
  from time import perf_counter
15
 
 
 
16
  def intersperse(lst, item):
17
  result = [item] * (len(lst) * 2 + 1)
18
  result[1::2] = lst
@@ -190,6 +192,41 @@ For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) t
190
 
191
  """
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  article = "Training and demo by The Language Technologies Unit from Barcelona Supercomputing Center."
194
 
195
  vits2_inference = gr.Interface(
@@ -203,7 +240,7 @@ vits2_inference = gr.Interface(
203
  gr.Dropdown(
204
  choices=speakers,
205
  label="Speaker id",
206
- value='caf_08106',
207
  info=f"Models are trained on 47 speakers. You can prompt the model using one of these speaker ids."
208
  ),
209
  gr.Slider(
@@ -227,12 +264,14 @@ vits2_inference = gr.Interface(
227
  outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath")]
228
  )
229
 
 
 
230
  demo = gr.Blocks()
231
 
232
  with demo:
233
  gr.Markdown(title)
234
  gr.Markdown(description)
235
- gr.TabbedInterface([vits2_inference], ["Multispeaker"])
236
  gr.Markdown(article)
237
 
238
  demo.queue(max_size=10)
 
1
  import numpy as np
2
  import onnxruntime
3
 
 
4
  from text import text_to_sequence, sequence_to_text
5
  import torch
6
  import gradio as gr
 
8
  import tempfile
9
  import yaml
10
  import json
11
+ import os
12
 
13
  from huggingface_hub import hf_hub_download
14
  from time import perf_counter
15
 
16
+ DEFAULT_SPEAKER_ID = os.environ.get("DEFAULT_SPEAKER_ID", default="caf_08106")
17
+
18
  def intersperse(lst, item):
19
  result = [item] * (len(lst) * 2 + 1)
20
  result[1::2] = lst
 
192
 
193
  """
194
 
195
+ about = """
196
+ ## πŸ“„ About
197
+ The TTS test about.
198
+
199
+ ## Samples
200
+
201
+ <table style="font-size:16px">
202
+ <col width="205">
203
+ <col width="205">
204
+ <thead>
205
+ <tr>
206
+ <td>Col1</td>
207
+ <td>Col2</td>
208
+ <td>Col3</td>
209
+ </tr>
210
+ </thead>
211
+ <tbody>
212
+ <tr>
213
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
214
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
215
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
216
+ </tr>
217
+ <tr>
218
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
219
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
220
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
221
+ </tr>
222
+ <tr>
223
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
224
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
225
+ <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
226
+ </tr>
227
+ </tbody></table>
228
+ """
229
+
230
  article = "Training and demo by The Language Technologies Unit from Barcelona Supercomputing Center."
231
 
232
  vits2_inference = gr.Interface(
 
240
  gr.Dropdown(
241
  choices=speakers,
242
  label="Speaker id",
243
+ value=DEFAULT_SPEAKER_ID,
244
  info=f"Models are trained on 47 speakers. You can prompt the model using one of these speaker ids."
245
  ),
246
  gr.Slider(
 
264
  outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath")]
265
  )
266
 
267
+ about_article = gr.Markdown(about)
268
+
269
  demo = gr.Blocks()
270
 
271
  with demo:
272
  gr.Markdown(title)
273
  gr.Markdown(description)
274
+ gr.TabbedInterface([vits2_inference, about_article], ["Demo", "About"])
275
  gr.Markdown(article)
276
 
277
  demo.queue(max_size=10)