daswer123 commited on
Commit
cf9596a
·
verified ·
1 Parent(s): 4983b30

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +238 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import string
3
+ import gradio as gr
4
+ from hailuo_tts import HailuoTTS
5
+ import os
6
+
7
+ # Global variable to store TTS instance
8
+ tts_instance = None
9
+
10
+ def authorize(api_key, group_id):
11
+ """Authorization function and TTS instance creation"""
12
+ global tts_instance
13
+ try:
14
+ tts_instance = HailuoTTS.create(api_key=api_key, group_id=group_id)
15
+ return gr.update(visible=True), gr.update(visible=False)
16
+ except Exception as e:
17
+ return gr.update(visible=False), gr.update(visible=True, value=f"Authorization error: {str(e)}")
18
+
19
+ def on_model_change(model):
20
+ """Interface update when model changes"""
21
+ show_emotions = model == "turbo"
22
+ return gr.update(visible=show_emotions)
23
+
24
+ def text_to_speech(text, model, voice, speed, volume, pitch, emotion, language,
25
+ sample_rate, bitrate, audio_format, channel):
26
+ """Text to speech generation function"""
27
+ global tts_instance
28
+ try:
29
+ # Update settings
30
+ tts_instance.set_model(model)
31
+ tts_instance.set_voice(voice)
32
+ tts_instance.set_voice_params(speed=float(speed), volume=float(volume), pitch=int(pitch))
33
+
34
+ if model == "turbo" and emotion:
35
+ tts_instance.set_emotion(emotion)
36
+
37
+ if language != "auto":
38
+ tts_instance.set_language_boost(language)
39
+
40
+ # Update audio settings
41
+ tts_instance.update_audio_settings(
42
+ sample_rate=int(sample_rate),
43
+ bitrate=int(bitrate),
44
+ format=audio_format,
45
+ channel=int(channel)
46
+ )
47
+
48
+ # Generate speech
49
+ output_path = f"output.{audio_format}"
50
+ tts_instance.text_to_speech(text, output_path)
51
+
52
+ return output_path, "Audio generated successfully!"
53
+ except Exception as e:
54
+ return None, f"Error: {str(e)}"
55
+
56
+ def generate_random_voice_id():
57
+ return "random_" + ''.join(random.choices(string.ascii_letters + string.digits, k=12))
58
+
59
+ def show_voice_id_input(use_custom_voice_id):
60
+ return gr.update(visible=not use_custom_voice_id)
61
+
62
+ def clone_voice(audio_file, voice_id, noise_reduction, preview_text, accuracy, volume_normalize,use_custom_voice_id):
63
+ """Voice cloning function"""
64
+ global tts_instance
65
+ try:
66
+ # Upload file
67
+ file_id = tts_instance.upload_voice_file(audio_file.name)
68
+
69
+ voice_id = voice_id if not use_custom_voice_id else generate_random_voice_id()
70
+ print(voice_id)
71
+
72
+ # Clone voice
73
+ response, demo_path = tts_instance.clone_voice(
74
+ file_id=file_id,
75
+ voice_id=voice_id,
76
+ noise_reduction=noise_reduction,
77
+ preview_text=preview_text,
78
+ accuracy=float(accuracy),
79
+ volume_normalize=volume_normalize
80
+ )
81
+
82
+ return demo_path, f"Voice cloned successfully! Voice ID: {voice_id}"
83
+ except Exception as e:
84
+ return None, f"Error: {str(e)}"
85
+
86
+ # Create interface
87
+ with gr.Blocks() as app:
88
+ # Authorization screen
89
+ with gr.Accordion("Authorization", open=True):
90
+ gr.Markdown("""
91
+ # Hailio TTS - Text-to-Speech Service
92
+
93
+ ## Important Links
94
+ 1. List of supported languages: https://www.hailuo.ai/audio
95
+ 2. Get your API credentials:
96
+ - Group ID and API Key can be found at:
97
+ - https://intl.minimaxi.com/user-center/basic-information
98
+ - https://intl.minimaxi.com/user-center/basic-information/interface-key
99
+
100
+ ## Pricing
101
+ - Turbo Model: $50 per 1M characters
102
+ - HD Model: $30 per 1M characters
103
+ - Voice Cloning:
104
+ - Verified voice clone: $3 per voice
105
+ - Unverified voice clone: Free
106
+ """)
107
+ with gr.Row(visible=True) as auth_row:
108
+ with gr.Column():
109
+ api_key = gr.Textbox(label="API Key",type="password", placeholder="Enter your API key")
110
+ group_id = gr.Textbox(label="Group ID",type="password", placeholder="Enter your Group ID")
111
+ auth_btn = gr.Button("Authorize")
112
+ auth_error = gr.Textbox(label="Status", interactive=False)
113
+
114
+ # Main interface (initially hidden)
115
+ with gr.Tabs(visible=False) as tabs:
116
+ # TTS tab
117
+
118
+ with gr.Tab("Text to Speech"):
119
+ with gr.Row():
120
+ with gr.Column():
121
+ # Main parameters
122
+ text_input = gr.Textbox(label="Text", placeholder="Enter text for speech", lines=5)
123
+ model = gr.Dropdown(choices=["turbo", "hd"], value="hd",info="Emotions work only with turbo model", label="Model")
124
+ voice = gr.Dropdown(choices=HailuoTTS.VOICES, allow_custom_value=True, value="Friendly_Person", label="VoiceId", info="You can set a custom value here, for example you can specify the voice ID that you cloned in another tab, but keep in mind the note written in clone voice")
125
+
126
+ with gr.Row():
127
+ speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, label="Speed")
128
+ volume = gr.Slider(minimum=0, maximum=10, value=1.0, label="Volume")
129
+ pitch = gr.Slider(minimum=-12, maximum=12, value=0, step=1, label="Pitch")
130
+
131
+ # Additional parameters
132
+ emotion = gr.Dropdown(choices=HailuoTTS.EMOTIONS, label="Emotion", visible=False)
133
+ language = gr.Dropdown(choices=HailuoTTS.SUPPORTED_LANGUAGES, value="auto", label="Language Boost",info="Language Boost increases the accuracy of the voice, but only work with supported languages")
134
+
135
+ # Audio settings in accordion
136
+ with gr.Accordion("Audio Settings", open=True):
137
+ with gr.Row():
138
+ sample_rate = gr.Radio(
139
+ choices=HailuoTTS.AUDIO_CONSTRAINTS["sample_rate"],
140
+ value=HailuoTTS.AUDIO_CONSTRAINTS["sample_rate"][-1],
141
+ label="Sample Rate"
142
+ )
143
+ bitrate = gr.Radio(
144
+ choices=HailuoTTS.AUDIO_CONSTRAINTS["bitrate"],
145
+ value=HailuoTTS.AUDIO_CONSTRAINTS["bitrate"][-1],
146
+ label="Bitrate"
147
+ )
148
+ with gr.Row():
149
+ audio_format = gr.Radio(
150
+ choices=HailuoTTS.AUDIO_CONSTRAINTS["format"],
151
+ value=HailuoTTS.AUDIO_CONSTRAINTS["format"][0],
152
+ label="Format"
153
+ )
154
+ channel = gr.Radio(
155
+ choices=HailuoTTS.AUDIO_CONSTRAINTS["channel"],
156
+ value=HailuoTTS.AUDIO_CONSTRAINTS["channel"][0],
157
+ label="Channels"
158
+ )
159
+
160
+ # Generation button and output
161
+ with gr.Column():
162
+ tts_output = gr.Audio(label="Result")
163
+ tts_status = gr.Textbox(label="Status", interactive=False)
164
+ tts_btn = gr.Button("Generate")
165
+
166
+ # Clone Voice tab
167
+ with gr.Tab("Clone Voice"):
168
+ gr.Markdown("""
169
+ ### File Requirements:
170
+ - Formats: MP3, M4A, WAV
171
+ - Duration: 10s to 5min
172
+ - Size: Less than 20MB
173
+ - Quality: Clear voice recording with minimal background noise
174
+ - Content: Natural speech in any language
175
+ """)
176
+
177
+ with gr.Row():
178
+ with gr.Column():
179
+ # Cloning parameters
180
+ audio_file = gr.File(label="Audio File", file_types=["audio"])
181
+ use_custom_voice_id = gr.Checkbox(label="Random Voice ID",value=True,info="If you check this checkbox, you will be able to use a custom voice ID")
182
+ voice_id = gr.Textbox(label="Voice ID",visible=False, placeholder="Minimum 8 characters, letters and numbers,first letter must be a letter")
183
+
184
+ with gr.Row():
185
+ noise_reduction = gr.Checkbox(label="Noise Reduction", value=False)
186
+ volume_normalize = gr.Checkbox(label="Volume Normalization", value=False)
187
+
188
+ preview_text = gr.Textbox(label="Preview Text (max 300 characters)",max_length=300, value="Test voice", lines=2)
189
+ accuracy = gr.Slider(minimum=0, maximum=1, value=0.7, label="Accuracy")
190
+
191
+ with gr.Column():
192
+ clone_output = gr.Audio(label="Preview")
193
+ clone_status = gr.Textbox(label="Status", interactive=False)
194
+ clone_btn = gr.Button("Clone")
195
+ gr.Markdown("""
196
+ # Important Notes:
197
+ 1. When you get a voice preview, it is synthesized using the turbo model.
198
+ 2. You don't pay $3 for voice cloning. You only pay for synthesis.
199
+ 3. You can copy the resulting ID and use it in the TTS tab. Please note that as soon as you use it at least once, you will be charged $3 for voice creation. It will be linked to your account. Make sure to save this ID somewhere to use it in TTS later.
200
+ 4. Unverified voice cloning is free, but it life time is limited to 7 days.
201
+ """)
202
+
203
+ # Event handlers
204
+ auth_btn.click(
205
+ authorize,
206
+ inputs=[api_key, group_id],
207
+ outputs=[tabs, auth_error]
208
+ )
209
+
210
+ model.change(
211
+ on_model_change,
212
+ inputs=[model],
213
+ outputs=[emotion]
214
+ )
215
+
216
+ tts_btn.click(
217
+ text_to_speech,
218
+ inputs=[
219
+ text_input, model, voice, speed, volume, pitch, emotion, language,
220
+ sample_rate, bitrate, audio_format, channel
221
+ ],
222
+ outputs=[tts_output, tts_status]
223
+ )
224
+
225
+ clone_btn.click(
226
+ clone_voice,
227
+ inputs=[audio_file, voice_id, noise_reduction, preview_text, accuracy, volume_normalize,use_custom_voice_id],
228
+ outputs=[clone_output, clone_status]
229
+ )
230
+
231
+ use_custom_voice_id.change(
232
+ show_voice_id_input,
233
+ inputs=[use_custom_voice_id],
234
+ outputs=[voice_id]
235
+ )
236
+ # Launch interface
237
+ if __name__ == "__main__":
238
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ requests
2
+ uuid
3
+ gradio
4
+ hailuo-tts-api