File size: 2,397 Bytes
0787439 d5183ee 9eb2389 d5183ee 62c32a4 257975d 62c32a4 8d4ed80 62c32a4 0826e3b 62c32a4 686aada 0787439 62c32a4 686aada 62c32a4 d5183ee 62c32a4 70d89e0 62c32a4 2463bb1 62c32a4 d5183ee 62c32a4 0787439 686aada d5183ee 686aada d5183ee 686aada 0787439 686aada f635f72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names, voice_mapping
from io import BytesIO
import asyncio # Import asyncio
# Define an asynchronous function for the Gradio interface
async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
edge_tts_voice = voice_mapping.get(selected_voice)
if not edge_tts_voice:
return {"error": f"Invalid voice '{selected_voice}'."}, None
voice_upload_file = None
if use_uploaded_voice and voice_upload is not None:
with open(voice_upload.name, 'rb') as f:
voice_upload_file = f.read()
# Create task for parallel processing
task = (
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
)
# Asynchronous call to your tts processing function using parallel processing
result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task])
info, _, (tgt_sr, audio_output) = result[0]
# Process audio output to bytes
audio_bytes = None
if isinstance(audio_output, np.ndarray):
byte_io = BytesIO()
wavfile.write(byte_io, tgt_sr, audio_output)
byte_io.seek(0)
audio_bytes = byte_io.read()
else:
audio_bytes = audio_output
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
return {"info": info}, audio_data_uri
def get_models():
return get_model_names()
def get_voices():
return list(voice_mapping.keys())
# Initialize the Gradio interface
iface = gr.Interface(
fn=convert_tts,
inputs=[
gr.Dropdown(choices=get_models(), label="Model", interactive=True),
gr.Textbox(label="Text", placeholder="Enter text here"),
gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
gr.Checkbox(label="Use Uploaded Voice"),
gr.File(label="Voice File")
],
outputs=[
gr.JSON(label="Info"),
gr.Textbox(label="Audio URI")
],
title="Text-to-Speech Conversion"
).queue() # Remove the 'concurrency_count' parameter
# Launch the interface with max_threads
iface.launch(debug=True, max_threads=10) # Set max_threads to control concurrency |