Vijish commited on
Commit
e94f976
·
verified ·
1 Parent(s): eb0534b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -40
app.py CHANGED
@@ -5,13 +5,20 @@ from scipy.io import wavfile
5
  from voice_processing import tts, get_model_names, voice_mapping
6
  from io import BytesIO
7
  import asyncio
 
 
8
 
9
- # Constants for limits
10
- MAX_TEXT_FILES = 20 # Maximum number of text files processed concurrently
11
- MAX_WORDS = 5000 # Maximum number of words processed concurrently
12
- BATCH_SIZE = 5 # Number of texts to process in parallel
 
 
13
 
14
- async def process_tts_request(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
 
 
 
15
  edge_tts_voice = voice_mapping.get(selected_voice)
16
  if not edge_tts_voice:
17
  return {"error": f"Invalid voice '{selected_voice}'."}, None
@@ -38,32 +45,8 @@ async def process_tts_request(model_name, tts_text, selected_voice, slang_rate,
38
  else:
39
  audio_bytes = audio_output
40
 
41
- return {"info": info}, audio_bytes
42
-
43
- async def convert_tts(model_name, tts_texts, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
44
- # Enforce limits
45
- if len(tts_texts) > MAX_TEXT_FILES:
46
- return {"error": f"Number of text files should not exceed {MAX_TEXT_FILES}."}, None
47
-
48
- word_count = sum(len(tts_text.split()) for tts_text in tts_texts)
49
- if word_count > MAX_WORDS:
50
- return {"error": f"Total number of words should not exceed {MAX_WORDS}."}, None
51
-
52
- # Process texts in batches
53
- results = []
54
- for i in range(0, len(tts_texts), BATCH_SIZE):
55
- batch_texts = tts_texts[i:i+BATCH_SIZE]
56
- tasks = [
57
- process_tts_request(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload)
58
- for tts_text in batch_texts
59
- ]
60
- batch_results = await asyncio.gather(*tasks)
61
- results.extend(batch_results)
62
-
63
- info_list = [{"info": info} for info, _ in results]
64
- audio_uris = [f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" for _, audio_bytes in results]
65
-
66
- return info_list, audio_uris
67
 
68
  def get_models():
69
  return get_model_names()
@@ -71,24 +54,36 @@ def get_models():
71
  def get_voices():
72
  return list(voice_mapping.keys())
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  iface = gr.Interface(
75
- fn=convert_tts,
76
  inputs=[
77
- gr.Dropdown(choices=get_models(), label="Model", interactive=True),
78
- gr.Textbox(label="Text", placeholder="Enter text here (one per line)", lines=10, interactive=True), # Allow multiple lines of text input
79
- gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
80
  gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
81
  gr.Checkbox(label="Use Uploaded Voice"),
82
  gr.File(label="Voice File")
83
  ],
84
  outputs=[
85
  gr.JSON(label="Info"),
86
- gr.JSON(label="Audio URIs")
 
87
  ],
88
- title="Text-to-Speech Conversion"
89
  )
90
 
91
  iface.launch()
92
-
93
-
94
-
 
5
  from voice_processing import tts, get_model_names, voice_mapping
6
  from io import BytesIO
7
  import asyncio
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ import multiprocessing
10
 
11
+ # Determine the optimal number of threads
12
+ def get_optimal_threads():
13
+ cpu_count = multiprocessing.cpu_count()
14
+ # Assuming you want to use 75% of the available cores
15
+ optimal_threads = int(cpu_count * 0.75)
16
+ return optimal_threads
17
 
18
+ # Initialize the ThreadPoolExecutor with the optimal number of threads
19
+ executor = ThreadPoolExecutor(max_workers=get_optimal_threads())
20
+
21
+ async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
22
  edge_tts_voice = voice_mapping.get(selected_voice)
23
  if not edge_tts_voice:
24
  return {"error": f"Invalid voice '{selected_voice}'."}, None
 
45
  else:
46
  audio_bytes = audio_output
47
 
48
+ audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
49
+ return {"info": info}, audio_data_uri
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def get_models():
52
  return get_model_names()
 
54
  def get_voices():
55
  return list(voice_mapping.keys())
56
 
57
+ def parallel_convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
58
+ loop = asyncio.new_event_loop()
59
+ asyncio.set_event_loop(loop)
60
+ return loop.run_until_complete(
61
+ convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload)
62
+ )
63
+
64
+ def run_parallel_conversion(inputs):
65
+ futures = [
66
+ executor.submit(parallel_convert_tts, *input_data) for input_data in inputs
67
+ ]
68
+ results = [future.result() for future in futures]
69
+ return results
70
+
71
  iface = gr.Interface(
72
+ fn=run_parallel_conversion,
73
  inputs=[
74
+ gr.Dropdown(choices=get_models(), label="Model", interactive=True, multiselect=True),
75
+ gr.Textbox(label="Text", placeholder="Enter text here", lines=2),
76
+ gr.Dropdown(choices=get_voices(), label="Voice", interactive=True, multiselect=True),
77
  gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
78
  gr.Checkbox(label="Use Uploaded Voice"),
79
  gr.File(label="Voice File")
80
  ],
81
  outputs=[
82
  gr.JSON(label="Info"),
83
+ gr.Textbox(label="Audio URI")
84
+
85
  ],
86
+ title="Parallel Text-to-Speech Conversion"
87
  )
88
 
89
  iface.launch()