Vijish commited on
Commit
d359e5f
·
verified ·
1 Parent(s): a7b962c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import base64
3
+ import numpy as np
4
+ from scipy.io import wavfile
5
+ from voice_processing import process_files_concurrently, get_model_names, voice_mapping
6
+ from io import BytesIO
7
+ import asyncio
8
+ import aiofiles
9
+ from aiofiles.os import stat as aio_stat
10
+ import os
11
+
12
+ # Limit concurrency to avoid overwhelming the system
13
+ MAX_CONCURRENT_TASKS = 5
14
+
15
+ async def convert_tts(model_name, text_files, selected_voice, slang_rate, use_uploaded_voice, voice_upload_files):
16
+ edge_tts_voice = voice_mapping.get(selected_voice)
17
+ if not edge_tts_voice:
18
+ return {"error": f"Invalid voice '{selected_voice}'."}, None
19
+
20
+ voice_upload_file_data = []
21
+ if use_uploaded_voice and voice_upload_files is not None:
22
+ for file in voice_upload_files:
23
+ async with aiofiles.open(file.name, 'rb') as f:
24
+ voice_upload_file_data.append(await f.read())
25
+
26
+ text_file_data = []
27
+ if not use_uploaded_voice and text_files is not None:
28
+ for file in text_files:
29
+ async with aiofiles.open(file.name, 'r') as f:
30
+ content = await f.read()
31
+ if len(content) > 5000:
32
+ return {"error": "Each text file should be at most 5000 characters."}, None
33
+ text_file_data.append(content)
34
+
35
+ # Check total combined text length
36
+ if not use_uploaded_voice and sum(len(text) for text in text_file_data) > 50000:
37
+ return {"error": "Total combined text length should be at most 50,000 characters."}, None
38
+
39
+ # Ensure limits on the number of files
40
+ if len(text_files if text_files else []) > 20 or len(voice_upload_files if voice_upload_files else []) > 20:
41
+ return {"error": "You can upload at most 20 text or voice files."}, None
42
+
43
+ # Process the text inputs or uploaded voice files concurrently
44
+ if use_uploaded_voice:
45
+ results = await process_files_concurrently(voice_upload_file_data, model_name, "", edge_tts_voice, slang_rate, use_uploaded_voice)
46
+ else:
47
+ results = await process_files_concurrently(text_file_data, model_name, "", edge_tts_voice, slang_rate, use_uploaded_voice)
48
+
49
+ info_list = []
50
+ audio_uris = []
51
+
52
+ for result in results:
53
+ info, edge_tts_output_path, tts_output_data, edge_output_file = result
54
+ if tts_output_data is not None:
55
+ _, audio_output = tts_output_data
56
+
57
+ # Convert audio output to bytes
58
+ audio_bytes = None
59
+ if isinstance(audio_output, np.ndarray):
60
+ byte_io = BytesIO()
61
+ wavfile.write(byte_io, 40000, audio_output)
62
+ byte_io.seek(0)
63
+ audio_bytes = byte_io.read()
64
+ else:
65
+ audio_bytes = audio_output
66
+
67
+ # Create a data URI for the audio
68
+ audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
69
+ audio_uris.append(audio_data_uri)
70
+
71
+ info_list.append(info)
72
+
73
+ return {"info": info_list}, audio_uris
74
+
75
+ def get_models():
76
+ return get_model_names()
77
+
78
+ def get_voices():
79
+ return list(voice_mapping.keys())
80
+
81
+ iface = gr.Interface(
82
+ fn=convert_tts,
83
+ inputs=[
84
+ gr.Dropdown(choices=get_models(), label="Model", interactive=True),
85
+ gr.File(label="Text Files", type="file", file_count="multiple"),
86
+ gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
87
+ gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
88
+ gr.Checkbox(label="Use Uploaded Voice"),
89
+ gr.File(label="Voice Files", type="file", file_count="multiple")
90
+ ],
91
+ outputs=[
92
+ gr.JSON(label="Info"),
93
+ gr.Gallery(label="Audio URIs")
94
+ ],
95
+ title="Text-to-Speech Conversion"
96
+ )
97
+
98
+ iface.launch()