import gradio as gr import base64 import numpy as np from scipy.io import wavfile from voice_processing import parallel_tts, get_model_names, voice_mapping from io import BytesIO import asyncio import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload): try: edge_tts_voice = voice_mapping.get(selected_voice) if not edge_tts_voice: raise ValueError(f"Invalid voice '{selected_voice}'.") voice_upload_file = None if use_uploaded_voice and voice_upload is not None: with open(voice_upload.name, 'rb') as f: voice_upload_file = f.read() # Create task for parallel processing task = ( model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file ) # Asynchronous call to your tts processing function using parallel processing result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task]) info, _, (tgt_sr, audio_output) = result[0] # Process audio output to bytes audio_bytes = None if isinstance(audio_output, np.ndarray): byte_io = BytesIO() wavfile.write(byte_io, tgt_sr, audio_output) byte_io.seek(0) audio_bytes = byte_io.read() else: audio_bytes = audio_output audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" return {"info": info}, audio_data_uri except Exception as e: logger.exception("Error in convert_tts") return {"error": str(e)}, None def get_models(): return get_model_names() def get_voices(): return list(voice_mapping.keys()) # Initialize the Gradio interface iface = gr.Interface( fn=convert_tts, inputs=[ gr.Dropdown(choices=get_models(), label="Model", interactive=True), gr.Textbox(label="Text", placeholder="Enter text here"), gr.Dropdown(choices=get_voices(), label="Voice", interactive=True), gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), gr.Checkbox(label="Use Uploaded Voice"), gr.File(label="Voice File") ], outputs=[ gr.JSON(label="Info"), gr.Audio(label="Generated Audio", type="uri") ], title="Text-to-Speech Conversion" ).queue(concurrency_count=16) # Adjust based on your server's capacity # Launch the interface if __name__ == "__main__": iface.launch(debug=True)