import gradio as gr import base64 import numpy as np from scipy.io import wavfile from voice_processing import parallel_tts, get_model_names from io import BytesIO import asyncio # Import asyncio # Define an asynchronous function for the Gradio interface async def convert_tts(model_name, audio_file, slang_rate): if audio_file is None: return {"error": "No audio file uploaded."}, None try: # Create task for parallel processing task = (model_name, None, None, slang_rate, True, audio_file) # Asynchronous call to processing function result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task]) if result[0] is None or len(result[0]) != 3: return {"error": "Processing failed"}, None info, _, (tgt_sr, audio_output) = result[0] # Process audio output to bytes if audio_output is None: return {"error": "No audio output generated"}, None audio_bytes = None if isinstance(audio_output, np.ndarray): byte_io = BytesIO() wavfile.write(byte_io, tgt_sr, audio_output) byte_io.seek(0) audio_bytes = byte_io.read() else: audio_bytes = audio_output audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" return {"info": info}, audio_data_uri except Exception as e: print(f"Error in convert_tts: {str(e)}") return {"error": str(e)}, None def get_models(): return get_model_names() # Initialize the Gradio interface iface = gr.Interface( fn=convert_tts, inputs=[ gr.Dropdown(choices=get_models(), label="Model", interactive=True), gr.Audio(label="Upload Audio", type="filepath"), gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), ], outputs=[ gr.JSON(label="Info"), gr.Audio(label="Converted Audio") ], title="Voice Conversion" ).queue() # Launch the interface with max_threads iface.launch(debug=True, max_threads=10) # Set max_threads to control concurrency