Vijish commited on
Commit
1565043
·
verified ·
1 Parent(s): 56fd60f

Create app4

Browse files
Files changed (1) hide show
  1. app4 +67 -0
app4 ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import base64
3
+ import numpy as np
4
+ from scipy.io import wavfile
5
+ from voice_processing import tts, get_model_names, voice_mapping
6
+ from io import BytesIO
7
+ import asyncio
8
+ from pydub import AudioSegment
9
+
10
+ async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
11
+ edge_tts_voice = voice_mapping.get(selected_voice)
12
+ if not edge_tts_voice:
13
+ return {"error": f"Invalid voice '{selected_voice}'."}, None
14
+
15
+ voice_upload_file = None
16
+ if use_uploaded_voice and voice_upload is not None:
17
+ with open(voice_upload.name, 'rb') as f:
18
+ voice_upload_file = f.read()
19
+
20
+ # Process the text input or uploaded voice
21
+ info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
22
+ model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
23
+ )
24
+
25
+ _, audio_output = tts_output_data
26
+
27
+ # Return audio data as bytes
28
+ audio_bytes = None
29
+ if isinstance(audio_output, np.ndarray):
30
+ byte_io = BytesIO()
31
+ wavfile.write(byte_io, 40000, audio_output)
32
+ byte_io.seek(0)
33
+ audio_segment = AudioSegment.from_wav(byte_io)
34
+ mp3_bytes = audio_segment.export(format="mp3").read()
35
+ audio_bytes = mp3_bytes
36
+ else:
37
+ audio_segment = AudioSegment.from_file(BytesIO(audio_output), format="wav")
38
+ mp3_bytes = audio_segment.export(format="mp3").read()
39
+ audio_bytes = mp3_bytes
40
+
41
+ return audio_bytes
42
+
43
+ def get_models():
44
+ return get_model_names()
45
+
46
+ def get_voices():
47
+ return list(voice_mapping.keys())
48
+
49
+ iface = gr.Interface(
50
+ fn=convert_tts,
51
+ inputs=[
52
+ gr.Dropdown(choices=get_models(), label="Model", interactive=True),
53
+ gr.Textbox(label="Text", placeholder="Enter text here"),
54
+ gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
55
+ gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
56
+ gr.Checkbox(label="Use Uploaded Voice"),
57
+ gr.File(label="Voice File")
58
+ ],
59
+ outputs=[
60
+ gr.Audio(label="Result Audio")
61
+
62
+
63
+ ],
64
+ title="Text-to-Speech Conversion"
65
+ )
66
+
67
+ iface.launch(concurrency_limit=2)