Plachta commited on
Commit
512efa6
·
1 Parent(s): 26adb3f

Added examples

Browse files
app.py CHANGED
@@ -31,6 +31,7 @@ from models.vallex import VALLE
31
  from utils.g2p import PhonemeBpeTokenizer
32
  from descriptions import *
33
  from macros import *
 
34
 
35
  import gradio as gr
36
  import whisper
@@ -503,6 +504,11 @@ def main():
503
  btn_mp.click(make_npz_prompt,
504
  inputs=[textbox_mp, upload_audio_prompt, record_audio_prompt, textbox_transcript],
505
  outputs=[text_output, prompt_output])
 
 
 
 
 
506
  with gr.Tab("Make prompt"):
507
  gr.Markdown(make_prompt_md)
508
  with gr.Row():
@@ -523,6 +529,11 @@ def main():
523
  btn_2.click(make_npz_prompt,
524
  inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
525
  outputs=[text_output_2, prompt_output_2])
 
 
 
 
 
526
  with gr.Tab("Infer from prompt"):
527
  gr.Markdown(infer_from_prompt_md)
528
  with gr.Row():
@@ -543,8 +554,13 @@ def main():
543
  btn_3.click(infer_from_prompt,
544
  inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
545
  outputs=[text_output_3, audio_output_3])
 
 
 
 
 
546
  with gr.Tab("Infer long text"):
547
- gr.Markdown("This is a long text generation demo. You can use this to generate long audio. ")
548
  with gr.Row():
549
  with gr.Column():
550
  textbox_4 = gr.TextArea(label="Text",
 
31
  from utils.g2p import PhonemeBpeTokenizer
32
  from descriptions import *
33
  from macros import *
34
+ from examples import *
35
 
36
  import gradio as gr
37
  import whisper
 
504
  btn_mp.click(make_npz_prompt,
505
  inputs=[textbox_mp, upload_audio_prompt, record_audio_prompt, textbox_transcript],
506
  outputs=[text_output, prompt_output])
507
+ gr.Examples(examples=infer_from_audio_examples,
508
+ inputs=[textbox, language_dropdown, accent_dropdown, upload_audio_prompt, record_audio_prompt, textbox_transcript],
509
+ outputs=[text_output, audio_output],
510
+ fn=infer_from_audio,
511
+ cache_examples=True,)
512
  with gr.Tab("Make prompt"):
513
  gr.Markdown(make_prompt_md)
514
  with gr.Row():
 
529
  btn_2.click(make_npz_prompt,
530
  inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
531
  outputs=[text_output_2, prompt_output_2])
532
+ gr.Examples(examples=make_npz_prompt_examples,
533
+ inputs=[textbox2, upload_audio_prompt_2, record_audio_prompt_2, textbox_transcript2],
534
+ outputs=[text_output_2, prompt_output_2],
535
+ fn=make_npz_prompt,
536
+ cache_examples=True,)
537
  with gr.Tab("Infer from prompt"):
538
  gr.Markdown(infer_from_prompt_md)
539
  with gr.Row():
 
554
  btn_3.click(infer_from_prompt,
555
  inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
556
  outputs=[text_output_3, audio_output_3])
557
+ gr.Examples(examples=infer_from_prompt_examples,
558
+ inputs=[textbox_3, language_dropdown_3, accent_dropdown_3, preset_dropdown_3, prompt_file],
559
+ outputs=[text_output_3, audio_output_3],
560
+ fn=infer_from_prompt,
561
+ cache_examples=True,)
562
  with gr.Tab("Infer long text"):
563
+ gr.Markdown(long_text_md)
564
  with gr.Row():
565
  with gr.Column():
566
  textbox_4 = gr.TextArea(label="Text",
descriptions.py CHANGED
@@ -24,4 +24,9 @@ Faster than **"Infer from audio"**.<br>
24
  You need to **"Make prompt"** first, and upload the encoded prompt (a `.npz` file)
25
  """
26
 
 
 
 
 
 
27
  long_text_example = "Just a few years ago, there were no legions of deep learning scientists developing intelligent products and services at major companies and startups. When we entered the field, machine learning did not command headlines in daily newspapers. Our parents had no idea what machine learning was, let alone why we might prefer it to a career in medicine or law. Machine learning was a blue skies academic discipline whose industrial significance was limited to a narrow set of real-world applications, including speech recognition and computer vision. Moreover, many of these applications required so much domain knowledge that they were often regarded as entirely separate areas for which machine learning was one small component. At that time, neural networks—the predecessors of the deep learning methods that we focus on in this book—were generally regarded as outmoded."
 
24
  You need to **"Make prompt"** first, and upload the encoded prompt (a `.npz` file)
25
  """
26
 
27
+ long_text_md = """
28
+ Very long text is chunked into several sentences, and each sentence is synthesized separately.<br>
29
+ Please make a prompt or use a preset prompt to infer long text.
30
+ """
31
+
32
  long_text_example = "Just a few years ago, there were no legions of deep learning scientists developing intelligent products and services at major companies and startups. When we entered the field, machine learning did not command headlines in daily newspapers. Our parents had no idea what machine learning was, let alone why we might prefer it to a career in medicine or law. Machine learning was a blue skies academic discipline whose industrial significance was limited to a narrow set of real-world applications, including speech recognition and computer vision. Moreover, many of these applications required so much domain knowledge that they were often regarded as entirely separate areas for which machine learning was one small component. At that time, neural networks—the predecessors of the deep learning methods that we focus on in this book—were generally regarded as outmoded."
examples.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ infer_from_audio_examples = [
2
+ ["This is how this machine has taken my voice.", 'English', 'no-accent', "prompts/en-2.wav", None, "Wow, look at that! That's no ordinary Teddy bear!"],
3
+ ["我喜欢抽电子烟,尤其是锐刻五代。", '中文', 'no-accent', "prompts/zh-1.wav", None, "今天我很荣幸,"],
4
+ ["私の声を真似するのはそんなに面白いですか?", '日本語', 'no-accent', "prompts/ja-2.ogg", None, "初めまして、朝武よしのです。"],
5
+ ["你可以听得出来我有多困。", '中文', 'no-accent', "prompts/en-1.wav", None, ""],
6
+ ["この文は、クロスリンガル合成の例です。", '日本語', 'no-accent', "prompts/zh-2.wav", None, ""],
7
+ ["Actually, I can't speak English, but this machine helped me do it.", 'English', 'no-accent', "prompts/ja-1.wav", None, ""],
8
+ ]
9
+
10
+ make_npz_prompt_examples = [
11
+ ["Gem-trader", "prompts/en-2.wav", None, "Wow, look at that! That's no ordinary Teddy bear!"],
12
+ ["Ding Zhen", "prompts/zh-1.wav", None, "今天我很荣幸,"],
13
+ ["Yoshino", "prompts/ja-2.ogg", None, "初めまして、朝武よしのです。"],
14
+ ["Sleepy-woman", "prompts/en-1.wav", None, ""],
15
+ ["Yae", "prompts/zh-2.wav", None, ""],
16
+ ["Cafe", "prompts/ja-1.wav", None, ""],
17
+ ]
18
+
19
+ infer_from_prompt_examples = [
20
+ ["A prompt contains voice, prosody and emotion information of a certain speaker.", "English", "no-accent", "vctk_1", None],
21
+ ["This prompt is made with an audio of three seconds.", "English", "no-accent", "librispeech_1", None],
22
+ ["This prompt is made with Chinese speech", "English", "no-accent", "seel", None],
23
+ ]
24
+
prompts/en-1.wav ADDED
Binary file (213 kB). View file
 
prompts/en-2.wav ADDED
Binary file (552 kB). View file
 
prompts/ja-1.wav ADDED
Binary file (195 kB). View file
 
prompts/ja-2.ogg ADDED
Binary file (31.4 kB). View file
 
prompts/zh-1.wav ADDED
Binary file (176 kB). View file
 
prompts/zh-2.wav ADDED
Binary file (272 kB). View file