Mira1sen commited on
Commit
6b69b8f
1 Parent(s): 1585cde

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -7
  2. tts_gradio.py +279 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
  title: '1623'
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: '1623'
3
+ app_file: tts_gradio.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.36.1
 
 
6
  ---
 
 
tts_gradio.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ # 外部库
5
+ import re
6
+ import requests
7
+ import argparse
8
+ import time
9
+ import os
10
+ import re
11
+ import tempfile
12
+ # import librosa
13
+ import numpy as np
14
+ # import torch
15
+ # from torch import no_grad, LongTensor
16
+ # import commons
17
+ import gradio as gr
18
+ # import gradio.utils as gr_utils
19
+ # import gradio.processing_utils as gr_processing_utils
20
+
21
+ all_example = "Today is a wonderful day to build something people love!"
22
+
23
+ microsoft_model_list = [
24
+ "en-US-JennyMultilingualNeural",
25
+ "en-US-RyanMultilingualNeural",
26
+ "en-US-AndrewMultilingualNeural",
27
+ "en-US-AvaMultilingualNeural",
28
+ "en-US-BrianMultilingualNeural",
29
+ "en-US-EmmaMultilingualNeural",
30
+ "en-US-AlloyMultilingualNeural",
31
+ "en-US-EchoMultilingualNeural",
32
+ "en-US-FableMultilingualNeural",
33
+ "en-US-OnyxMultilingualNeural",
34
+ "en-US-NovaMultilingualNeural",
35
+ "en-US-ShimmerMultilingualNeural",
36
+ "en-US-AlloyMultilingualNeuralHD",
37
+ "en-US-EchoMultilingualNeuralHD",
38
+ "en-US-FableMultilingualNeuralHD",
39
+ "en-US-OnyxMultilingualNeuralHD",
40
+ "en-US-NovaMultilingualNeuralHD4",
41
+ "en-US-ShimmerMultilingualNeuralHD"
42
+ ]
43
+
44
+ openai_model_list = [
45
+ "alloy",
46
+ "echo",
47
+ "fable",
48
+ "onyx",
49
+ "nova",
50
+ "shimmer"
51
+ ]
52
+
53
+ eleven_voice_id = [
54
+ "21m00Tcm4TlvDq8ikWAM",
55
+ "29vD33N1CtxCmqQRPOHJ",
56
+ "2EiwWnXFnvU5JabPnv8n",
57
+ "5Q0t7uMcjvnagumLfvZi",
58
+ "AZnzlk1XvdvUeBnXmlld",
59
+ "CYw3kZ02Hs0563khs1Fj",
60
+ "D38z5RcWu1voky8WS1ja",
61
+ "EXAVITQu4vr4xnSDxMaL",
62
+ "ErXwobaYiN019PkySvjV",
63
+ "GBv7mTt0atIp3Br8iCZE",
64
+ "IKne3meq5aSn9XLyUdCD",
65
+ "JBFqnCBsd6RMkjVDRZzb",
66
+ "LcfcDJNUP1GQjkzn1xUU",
67
+ "MF3mGyEYCl7XYWbV9V6O",
68
+ "N2lVS1w4EtoT3dr4eOWO",
69
+ "ODq5zmih8GrVes37Dizd",
70
+ "SOYHLrjzK2X1ezoPC6cr",
71
+ "TX3LPaxmHKxFdv7VOQHJ",
72
+ "ThT5KcBeYPX3keUQqHPh",
73
+ "TxGEqnHWrfWFTfGW9XjX",
74
+ "VR6AewLTigWG4xSOukaG",
75
+ "XB0fDUnXU5powFXDhCwa",
76
+ "Xb7hH8MSUJpSbSDYk0k2",
77
+ "XrExE9yKIg1WjnnlVkGX",
78
+ "ZQe5CZNOzWyzPSCn5a3c",
79
+ "Zlb1dXrM653N07WRdFW3",
80
+ "bVMeCyTHy58xNoL34h3p",
81
+ "flq6f7yk4E4fJM5XTYuZ",
82
+ "g5CIjZEefAph4nQFvHAz",
83
+ "iP95p4xoKVk53GoZ742B",
84
+ "jBpfuIE2acCO8z3wKNLl",
85
+ "jsCqWAovK2LkecY7zXl4",
86
+ "nPczCjzI2devNBz1zQrb",
87
+ "oWAxZDx7w5VEj9dCyTzz",
88
+ "onwK4e9ZLuTAKqWW03F9",
89
+ "pFZP5JQG7iQjIQuC4Bku",
90
+ "pMsXgVXv3BLzUgSXRplE",
91
+ "pNInz6obpgDQGcFmaJgB",
92
+ "piTKgcLEGmPE4e6mEKli",
93
+ "pqHfZKP75CvOlQylNhV4",
94
+ "t0jbNlBVZ17f02VDIeMI",
95
+ "yoZ06aMxZJJ28mfd3POQ",
96
+ "z9fAnlkpzviPz146aGWa",
97
+ "zcAOhNBS3c14rBihAFp1",
98
+ "zrHiDhphv9ZnVXBqCLjz",
99
+ ]
100
+
101
+ eleven_name = [
102
+ "Rachel",
103
+ "Drew",
104
+ "Clyde",
105
+ "Paul",
106
+ "Domi",
107
+ "Dave",
108
+ "Fin",
109
+ "Sarah",
110
+ "Antoni",
111
+ "Thomas",
112
+ "Charlie",
113
+ "George",
114
+ "Emily",
115
+ "Elli",
116
+ "Callum",
117
+ "Patrick",
118
+ "Harry",
119
+ "Liam",
120
+ "Dorothy",
121
+ "Josh",
122
+ "Arnold",
123
+ "Charlotte",
124
+ "Alice",
125
+ "Matilda",
126
+ "James",
127
+ "Joseph",
128
+ "Jeremy",
129
+ "Michael",
130
+ "Ethan",
131
+ "Chris",
132
+ "Gigi",
133
+ "Freya",
134
+ "Brian",
135
+ "Grace",
136
+ "Daniel",
137
+ "Lily",
138
+ "Serena",
139
+ "Adam",
140
+ "Nicole",
141
+ "Bill",
142
+ "Jessie",
143
+ "Sam",
144
+ "Glinda",
145
+ "Giovanni",
146
+ "Mimi",
147
+ ]
148
+ eleven_id_model_name_dict = dict(zip(eleven_name, eleven_voice_id))
149
+
150
+ def openai(text, name):
151
+
152
+ headers = {
153
+ 'Authorization': 'Bearer ' + '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',
154
+ 'Content-Type': 'application/json',
155
+ }
156
+
157
+ json_data = {
158
+ 'model': 'tts-1-hd',
159
+ 'input': text,
160
+ 'voice': name,
161
+ }
162
+
163
+ response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, json=json_data)
164
+
165
+ # Note: json_data will not be serialized by requests
166
+ # exactly as it was in the original request.
167
+ #data = '{\n "model": "tts-1",\n "input": "The quick brown fox jumped over the lazy dog.",\n "voice": "alloy"\n }'
168
+ #response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, data=data)
169
+ out_arr = np.frombuffer(response.content, dtype=np.uint8)
170
+ return "Success", (24000,out_arr)
171
+
172
+ def elevenlabs(text,name):
173
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{eleven_id_model_name_dict[name]}"
174
+ CHUNK_SIZE = 1024
175
+ #url = "https://api.elevenlabs.io/v1/text-to-speech/<voice-id>"
176
+
177
+ headers = {
178
+ "Accept": "audio/mpeg",
179
+ "Content-Type": "application/json",
180
+ "xi-api-key": "a3391f0e3ff8472b61978dbb70ccc6fe"
181
+ }
182
+
183
+ data = {
184
+ "text": text,
185
+ "model_id": "eleven_monolingual_v1",
186
+ "voice_settings": {
187
+ "stability": 0.5,
188
+ "similarity_boost": 0.5
189
+ }
190
+ }
191
+
192
+ response = requests.post(url, json=data, headers=headers)
193
+ # with open('output.mp3', 'wb') as f:
194
+ # for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
195
+ # if chunk:
196
+ # f.write(chunk)
197
+ return "Success", response
198
+
199
+ def microsoft(text, name, style="Neural"):
200
+ """
201
+ :param text:
202
+ :param name:
203
+ :param style:
204
+ :return:
205
+ """
206
+ headers = {
207
+ 'Ocp-Apim-Subscription-Key': '1f1ef0ce53b84261be94fab81df7e628',
208
+ 'Content-Type': 'application/ssml+xml',
209
+ 'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
210
+ 'User-Agent': 'curl',
211
+ }
212
+
213
+ data = ("<speak version='1.0' xml:lang='en-US'>"
214
+ f"<voice xml:lang='en-US' name='{name}'>" # xml:gender='Female'
215
+ f"{text}"
216
+ "</voice>"
217
+ "</speak>")
218
+
219
+ response = requests.post(
220
+ 'https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1',
221
+ headers=headers,
222
+ data=data,
223
+ )
224
+ # breakpoint()
225
+ timestamp = int(time.time()*10000)
226
+ path = f'/tmp/output_{timestamp}.wav' # TODO: disk might full.
227
+ with open(path, 'wb') as f:
228
+ f.write(response.content)
229
+ return "Success", path
230
+
231
+ if __name__ == '__main__':
232
+ parser = argparse.ArgumentParser()
233
+ parser.add_argument('--device', type=str, default='cuda')
234
+ parser.add_argument("--share", action="store_true", default=True, help="share gradio app")
235
+ parser.add_argument("--port", type=int, default=8081, help="port")
236
+ parser.add_argument('--model_info_path', type=str, default='/gluster/speech_data/info.json')
237
+ args = parser.parse_args()
238
+
239
+ app = gr.Blocks()
240
+ with app:
241
+ gr.Markdown("## English TTS Demo")
242
+ with gr.Tabs():
243
+
244
+ with gr.TabItem("11Labs"):
245
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
246
+ tts_input2 = gr.Dropdown(eleven_name, label="name")
247
+ tts_submit = gr.Button("Generate", variant="primary")
248
+ tts_output1 = gr.Textbox(label="Output Message")
249
+ tts_output2 = gr.Audio(label="Output Audio")
250
+ tts_submit.click(elevenlabs, [tts_input1, tts_input2],
251
+ [tts_output1, tts_output2])
252
+
253
+ with gr.TabItem("微软"):
254
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
255
+ tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
256
+ tts_submit = gr.Button("Generate", variant="primary")
257
+ tts_output1 = gr.Textbox(label="Output Message")
258
+ #tts_output2 = gr.Textbox(label="Output Audio")
259
+ tts_output2 = gr.Audio(label="Output Audio")
260
+ tts_submit.click(microsoft, [tts_input1, tts_input2],
261
+ [tts_output1, tts_output2])
262
+
263
+ with gr.TabItem("openai"):
264
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
265
+ tts_input2 = gr.Dropdown(openai_model_list, label="name")
266
+ tts_submit = gr.Button("Generate", variant="primary")
267
+ tts_output1 = gr.Textbox(label="Output Message")
268
+ tts_output2 = gr.Audio(label="Output Audio")
269
+ tts_submit.click(openai, [tts_input1, tts_input2],
270
+ [tts_output1, tts_output2])
271
+
272
+ app.queue(max_size=10)
273
+ app.launch(share=True)
274
+ # _, audio = microsoft(all_example,microsoft_model_list[0])
275
+ # breakpoint()
276
+ # print(audio)
277
+ # with open("test97.mp3", "wb") as f:
278
+ # f.write(audio.content)
279
+