yesyesjaewook commited on
Commit
00c7109
β€’
1 Parent(s): 195fa7e

Update TTS

Browse files
Files changed (4) hide show
  1. README.md +2 -1
  2. app.py +103 -27
  3. packages.txt +0 -0
  4. requirements.txt +4 -3
README.md CHANGED
@@ -4,7 +4,8 @@ emoji: πŸ’©
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.11.0
 
8
  app_file: app.py
9
  pinned: false
10
  license: wtfpl
 
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
7
+ python_version: 3.9
8
+ sdk_version: 3.24.1
9
  app_file: app.py
10
  pinned: false
11
  license: wtfpl
app.py CHANGED
@@ -7,23 +7,97 @@ import numpy as np
7
  import torch
8
  from espnet2.bin.tts_inference import Text2Speech
9
 
10
- models = {
11
- "jaewook": Text2Speech.from_pretrained("yesyesjaewook/jets-jaewook-ko"),
12
- "jaewook_ukten": Text2Speech.from_pretrained("yesyesjaewook/jets-jaewook-ukten-ko"),
13
- }
14
 
15
  emotions = {
16
- "angry": 1,
17
- "embarrassed": 2,
18
- "happy": 3,
19
- "neutral": 4,
20
- "sad": 5,
21
- }
22
-
23
- states = {
24
- "보톡 😐": ("jaewook", "neutral"),
25
- "얡텐 πŸ€ͺ": ("jaewook_ukten", "neutral"),
26
- "얡얡텐 🀯": ("jaewook_ukten", "angry"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
 
29
 
@@ -37,33 +111,35 @@ def endswith_punctuation(text: str) -> bool:
37
  return re.search(r"[.?!]", text) is not None
38
 
39
 
40
- def synthesize(text: str, state: str) -> Tuple[int, np.array]:
41
  text = text.strip()
42
  if not endswith_punctuation(text):
43
  text += "."
44
 
45
- print(f"[{datetime.now().isoformat()}] <{state}> {text}")
46
 
47
- model, emotion = states[state]
48
- output = models[model](text, lids=np.array(emotions[emotion]))
49
- return (models[model].fs, float32_to_pcm16(output["wav"]))
50
 
51
 
52
  with gr.Blocks() as demo:
53
- gr.Markdown("# μ†‘μž¬μš± μ‚΄λ €λ‚΄κΈ° ν”„λ‘œμ νŠΈ")
54
 
55
  with gr.Row():
56
  with gr.Column():
57
- state = gr.Radio(label="μƒνƒœ", choices=[*states.keys()], value="보톡 😐")
58
- text = gr.Textbox(label="ν…μŠ€νŠΈ", value="κ·Έλ™μ•ˆ μ‚¬λž‘ν•΄μ£Όμ‹ λΆ„λ“€μ—κ²ŒλŠ” κ°μ‚¬ν•˜λ‹€λŠ” 말만 μ „ν•˜κ³ μ‹ΆμŠ΅λ‹ˆλ‹€", lines=3)
59
-
 
 
60
  synthesize_button = gr.Button("ν•©μ„±")
61
-
62
- with gr.Column():
63
  with gr.Box():
64
  ouptut = gr.Audio()
65
  gr.Markdown("μŒμ„± λ‹€μš΄λ‘œλ“œλŠ” ν”Œλ ˆμ΄μ–΄ 였λ₯Έμͺ½μ˜ Β·Β·Β· 메뉴 클릭 λΆ€νƒλ“œλ¦½λ‹ˆλ‹€ πŸ™")
66
 
67
- synthesize_button.click(fn=synthesize, inputs=[text, state], outputs=ouptut)
 
 
 
68
 
69
  demo.launch()
 
7
  import torch
8
  from espnet2.bin.tts_inference import Text2Speech
9
 
10
+ model = Text2Speech.from_pretrained("yesyesjaewook/jets-jaewook-ko")
 
 
 
11
 
12
  emotions = {
13
+ "κ°€μ—Ύλ‹€": 1,
14
+ "κ°‘κ°‘ν•˜λ‹€": 2,
15
+ "κ°‘μž‘μŠ€λŸ½λ‹€": 3,
16
+ "κ²½λ©ΈμŠ€λŸ½λ‹€": 4,
17
+ "고맙닀": 5,
18
+ "κ³ ν†΅μŠ€λŸ½λ‹€": 6,
19
+ "κ³€λž€ν•˜λ‹€": 7,
20
+ "κ΄΄λ‘­λ‹€": 8,
21
+ "ꡉμž₯ν•˜λ‹€": 9,
22
+ "그립닀": 10,
23
+ "κΈ‰ν•˜λ‹€": 11,
24
+ "κΈ°λ§‰νžˆλ‹€": 12,
25
+ "κΈ΄λ°€ν•˜λ‹€": 13,
26
+ "κΊΌλ¦ΌμΉ™ν•˜λ‹€": 14,
27
+ "λ”μ°ν•˜λ‹€": 15,
28
+ "λ‚œλ°μ—†λ‹€": 16,
29
+ "λ‚¨μ‚¬μŠ€λŸ½λ‹€": 17,
30
+ "λƒ‰μ •ν•˜λ‹€": 18,
31
+ "λ…Ήλ…Ήν•˜λ‹€": 19,
32
+ "λ‹€κΈ‰ν•˜λ‹€": 20,
33
+ "λ‹΄λ°±ν•˜λ‹€": 21,
34
+ "λŒ€λ™μ†Œμ΄ν•˜λ‹€": 22,
35
+ "두렡닀": 23,
36
+ "λ‘₯κ·ΈμŠ€λ¦„ν•˜λ‹€": 24,
37
+ "λšœλ ·ν•˜λ‹€": 25,
38
+ "λ§Œμ‘±μŠ€λŸ½λ‹€": 26,
39
+ "λ§ν•˜λ‹€": 27,
40
+ "λ§€μ •ν•˜λ‹€": 28,
41
+ "λͺ»λ˜λ‹€": 29,
42
+ "λ¬΄κ΄€μ‹¬ν•˜λ‹€": 30,
43
+ "λ¬΄λ‘€ν•˜λ‹€": 31,
44
+ "λ­‰ν΄ν•˜λ‹€": 32,
45
+ "λ°‰λ‹€": 33,
46
+ "λ°˜κ°‘λ‹€": 34,
47
+ "λ°°μ€λ§λ•ν•˜λ‹€": 35,
48
+ "버겁닀": 36,
49
+ "λ³΄μž˜κ²ƒμ—†λ‹€": 37,
50
+ "λΆ€λ„λŸ½λ‹€": 38,
51
+ "λΆ€λ‹Ήν•˜λ‹€": 39,
52
+ "λΆ€μœ ν•˜λ‹€": 40,
53
+ "λΆˆμŒν•˜λ‹€": 41,
54
+ "λΆˆν–‰ν•˜λ‹€": 42,
55
+ "λΆˆν™•μ‹€ν•˜λ‹€": 43,
56
+ "λ»”λ»”μŠ€λŸ½λ‹€": 44,
57
+ "λΌˆμ•„ν”„λ‹€": 45,
58
+ "μ‚¬λž‘μŠ€λŸ½λ‹€": 46,
59
+ "μ„œλŸ½λ‹€": 47,
60
+ "μ„­μ„­ν•˜λ‹€": 48,
61
+ "μˆ˜μƒν•˜λ‹€": 49,
62
+ "μŠ¬ν”„λ‹€": 50,
63
+ "μ‹œλ¬΄λ£©ν•˜λ‹€": 51,
64
+ "μ‹¬λž€ν•˜λ‹€": 52,
65
+ "쓰리닀": 53,
66
+ "μ“Έμ“Έν•˜λ‹€": 54,
67
+ "μ•„λ‹ˆκΌ½λ‹€": 55,
68
+ "μ•„λ“ν•˜λ‹€": 56,
69
+ "아쉽닀": 57,
70
+ "μ•„μ°”ν•˜λ‹€": 58,
71
+ "μ•…λž„ν•˜λ‹€": 59,
72
+ "μ•ˆνƒ€κΉλ‹€": 60,
73
+ "μ•½ν•˜λ‹€": 61,
74
+ "μ–•λ‹€": 62,
75
+ "μ–΄λ ΄ν’‹ν•˜λ‹€": 63,
76
+ "어리λ‘₯μ ˆν•˜λ‹€": 64,
77
+ "μ–΅μšΈν•˜λ‹€": 65,
78
+ "μ–Έμ§’λ‹€": 66,
79
+ "μ—„λ°€ν•˜λ‹€": 67,
80
+ "μ—„μ€‘ν•˜λ‹€": 68,
81
+ "μ—‰λš±ν•˜λ‹€": 69,
82
+ "μ˜κ΄‘μŠ€λŸ½λ‹€": 70,
83
+ "μ˜ˆμ‚¬λ‘­λ‹€": 71,
84
+ "μ™Έλ‘­λ‹€": 72,
85
+ "μœ„ν—˜ν•˜λ‹€": 73,
86
+ "μ˜μ‹¬μŠ€λŸ½λ‹€": 74,
87
+ "μžλž‘μŠ€λŸ½λ‹€": 75,
88
+ "μžμ„Έν•˜λ‹€": 76,
89
+ "μžμœ λ‘­λ‹€": 77,
90
+ "μ‘°λ§ˆμ‘°λ§ˆν•˜λ‹€": 78,
91
+ "즐겁닀": 79,
92
+ "μ§œμ¦μŠ€λŸ½λ‹€": 80,
93
+ "μ°½ν”Όν•˜λ‹€": 81,
94
+ "μΉ˜μš•μŠ€λŸ½λ‹€": 82,
95
+ "ν•œμ‹¬ν•˜λ‹€": 83,
96
+ "ν–‰λ³΅ν•˜λ‹€": 84,
97
+ "ν˜Όλž€ν•˜λ‹€": 85,
98
+ "ν™€κ°€λΆ„ν•˜λ‹€": 86,
99
+ "ν™”λͺ©ν•˜λ‹€": 87,
100
+ "νλ­‡ν•˜λ‹€": 88,
101
  }
102
 
103
 
 
111
  return re.search(r"[.?!]", text) is not None
112
 
113
 
114
+ def synthesize(text: str, emotion: str) -> Tuple[int, np.array]:
115
  text = text.strip()
116
  if not endswith_punctuation(text):
117
  text += "."
118
 
119
+ print(f"[{datetime.now().isoformat()}] <{emotion}> {text}")
120
 
121
+ output = model(text, lids=np.array(emotions[emotion]))
122
+ return (model.fs, float32_to_pcm16(output["wav"]))
 
123
 
124
 
125
  with gr.Blocks() as demo:
126
+ gr.Markdown("# μ†‘μž¬μš± TTS ν”„λ‘œμ νŠΈ")
127
 
128
  with gr.Row():
129
  with gr.Column():
130
+ text = gr.Textbox(
131
+ label="ν…μŠ€νŠΈ",
132
+ value="κ·Έλ™μ•ˆ μ‚¬λž‘ν•΄μ£Όμ‹ λΆ„λ“€μ—κ²ŒλŠ” κ°μ‚¬ν•˜λ‹€λŠ” 말만 μ „ν•˜κ³ μ‹ΆμŠ΅λ‹ˆλ‹€. 제 방솑 ν–‡μˆ˜λ‘œ 3년이 흐λ₯΄κ³  μ§€κΈˆ μ—¬κΈ°κΉŒμ§€ μ˜€λŠ”λ° ꡉμž₯히 λ…Έλ ₯λ§Žμ΄ν–ˆμŠ΅λ‹ˆλ‹€.",
133
+ lines=3,
134
+ )
135
  synthesize_button = gr.Button("ν•©μ„±")
 
 
136
  with gr.Box():
137
  ouptut = gr.Audio()
138
  gr.Markdown("μŒμ„± λ‹€μš΄λ‘œλ“œλŠ” ν”Œλ ˆμ΄μ–΄ 였λ₯Έμͺ½μ˜ Β·Β·Β· 메뉴 클릭 λΆ€νƒλ“œλ¦½λ‹ˆλ‹€ πŸ™")
139
 
140
+ with gr.Column():
141
+ emotion = gr.Radio(label="감정", choices=[*emotions.keys()], value="λ‹΄λ°±ν•˜λ‹€")
142
+
143
+ synthesize_button.click(fn=synthesize, inputs=[text, emotion], outputs=ouptut)
144
 
145
  demo.launch()
packages.txt DELETED
File without changes
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
- espnet_model_zoo
2
  g2pk
3
- git+https://github.com/espnet/espnet
4
  numpy
5
- rich
 
 
 
1
+ espnet-model-zoo==0.1.7
2
  g2pk
 
3
  numpy
4
+ rich
5
+ torch~=1.13.0
6
+ typeguard==2.13.3