Vijish commited on
Commit
8398686
·
verified ·
1 Parent(s): 5f79421

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -34
app.py CHANGED
@@ -5,18 +5,7 @@ from scipy.io import wavfile
5
  from voice_processing import tts, get_model_names, voice_mapping
6
  from io import BytesIO
7
  import asyncio
8
- from concurrent.futures import ThreadPoolExecutor
9
- import multiprocessing
10
-
11
- # Determine the optimal number of threads
12
- def get_optimal_threads():
13
- cpu_count = multiprocessing.cpu_count()
14
- # Assuming you want to use 75% of the available cores
15
- optimal_threads = int(cpu_count * 0.75)
16
- return optimal_threads
17
-
18
- # Initialize the ThreadPoolExecutor with the optimal number of threads
19
- executor = ThreadPoolExecutor(max_workers=get_optimal_threads())
20
 
21
  async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
22
  edge_tts_voice = voice_mapping.get(selected_voice)
@@ -28,14 +17,12 @@ async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uplo
28
  with open(voice_upload.name, 'rb') as f:
29
  voice_upload_file = f.read()
30
 
31
- # Process the text input or uploaded voice
32
  info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
33
  model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
34
  )
35
 
36
  _, audio_output = tts_output_data
37
 
38
- # Return audio data as bytes
39
  audio_bytes = None
40
  if isinstance(audio_output, np.ndarray):
41
  byte_io = BytesIO()
@@ -48,39 +35,68 @@ async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uplo
48
  audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
49
  return {"info": info}, audio_data_uri
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def get_models():
52
  return get_model_names()
53
 
54
  def get_voices():
55
  return list(voice_mapping.keys())
56
 
57
- def parallel_convert_tts(input_data):
58
- model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload = input_data
59
- loop = asyncio.new_event_loop()
60
- asyncio.set_event_loop(loop)
61
- return loop.run_until_complete(
62
- convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload)
63
- )
64
-
65
- def run_parallel_conversion(inputs):
66
- futures = [
67
- executor.submit(parallel_convert_tts, input_data) for input_data in inputs
68
- ]
69
- results = [future.result() for future in futures]
70
- return results
71
-
72
  iface = gr.Interface(
73
- fn=run_parallel_conversion,
74
  inputs=[
75
- gr.JSON(label="Batch Inputs")
 
 
 
 
 
76
  ],
77
  outputs=[
78
  gr.JSON(label="Info"),
79
- gr.JSON(label="Audio URIs")
80
  ],
81
- title="Parallel Text-to-Speech Conversion"
 
 
 
 
 
 
 
 
 
82
  )
83
 
84
- iface.launch(share=True)
 
 
 
 
 
 
85
 
86
 
 
5
  from voice_processing import tts, get_model_names, voice_mapping
6
  from io import BytesIO
7
  import asyncio
8
+ import json
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
11
  edge_tts_voice = voice_mapping.get(selected_voice)
 
17
  with open(voice_upload.name, 'rb') as f:
18
  voice_upload_file = f.read()
19
 
 
20
  info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
21
  model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
22
  )
23
 
24
  _, audio_output = tts_output_data
25
 
 
26
  audio_bytes = None
27
  if isinstance(audio_output, np.ndarray):
28
  byte_io = BytesIO()
 
35
  audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
36
  return {"info": info}, audio_data_uri
37
 
38
+ async def batch_convert_tts(json_file):
39
+ results = []
40
+ tasks = []
41
+
42
+ with open(json_file.name, 'r') as file:
43
+ batch_data = json.load(file)
44
+
45
+ for entry in batch_data:
46
+ model_name = entry.get("model_name")
47
+ tts_text = entry.get("text")
48
+ selected_voice = entry.get("voice")
49
+ slang_rate = entry.get("slang_rate", 0.5)
50
+ use_uploaded_voice = entry.get("use_uploaded_voice", False)
51
+ voice_upload = entry.get("voice_upload", None)
52
+
53
+ tasks.append(convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload))
54
+
55
+ responses = await asyncio.gather(*tasks)
56
+
57
+ for response in responses:
58
+ results.append({"info": response[0], "audio_uri": response[1]})
59
+
60
+ return results
61
+
62
  def get_models():
63
  return get_model_names()
64
 
65
  def get_voices():
66
  return list(voice_mapping.keys())
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  iface = gr.Interface(
69
+ fn=convert_tts,
70
  inputs=[
71
+ gr.Dropdown(choices=get_models(), label="Model", interactive=True),
72
+ gr.Textbox(label="Text", placeholder="Enter text here"),
73
+ gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
74
+ gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
75
+ gr.Checkbox(label="Use Uploaded Voice"),
76
+ gr.File(label="Voice File")
77
  ],
78
  outputs=[
79
  gr.JSON(label="Info"),
80
+ gr.Textbox(label="Audio URI")
81
  ],
82
+ title="Text-to-Speech Conversion",
83
+ allow_flagging="never"
84
+ )
85
+
86
+ batch_iface = gr.Interface(
87
+ fn=batch_convert_tts,
88
+ inputs=gr.File(label="JSON File"),
89
+ outputs=gr.JSON(label="Batch Results"),
90
+ title="Batch Text-to-Speech Conversion",
91
+ allow_flagging="never"
92
  )
93
 
94
+ app = gr.TabbedInterface(
95
+ interface_list=[iface, batch_iface],
96
+ tab_names=["Single Conversion", "Batch Conversion"]
97
+ )
98
+
99
+ app.launch()
100
+
101
 
102