import gradio as gr import base64 import numpy as np from scipy.io import wavfile from voice_processing import tts, get_model_names, voice_mapping from io import BytesIO import json from concurrent.futures import ThreadPoolExecutor, as_completed import asyncio async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload): edge_tts_voice = voice_mapping.get(selected_voice) if not edge_tts_voice: return {"error": f"Invalid voice '{selected_voice}'."}, None voice_upload_file = None if use_uploaded_voice and voice_upload is not None: with open(voice_upload.name, 'rb') as f: voice_upload_file = f.read() info, edge_tts_output_path, tts_output_data, edge_output_file = await tts( model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file ) _, audio_output = tts_output_data audio_bytes = None if isinstance(audio_output, np.ndarray): byte_io = BytesIO() wavfile.write(byte_io, 40000, audio_output) byte_io.seek(0) audio_bytes = byte_io.read() else: audio_bytes = audio_output audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" return {"info": info}, audio_data_uri def convert_tts_sync(*args): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) return loop.run_until_complete(convert_tts(*args)) def batch_convert_tts(json_input): results = [] try: batch_data = json.loads(json_input) except Exception as e: return {"error": f"Failed to parse JSON input: {str(e)}"} with ThreadPoolExecutor() as executor: future_to_entry = { executor.submit( convert_tts_sync, entry.get("model_name"), entry.get("text"), entry.get("voice"), entry.get("slang_rate", 0.5), entry.get("use_uploaded_voice", False), entry.get("voice_upload", None) ): entry for entry in batch_data } for future in as_completed(future_to_entry): entry = future_to_entry[future] try: result = future.result() results.append({"info": result[0], "audio_uri": result[1]}) except Exception as e: results.append({"error": str(e)}) return json.dumps(results, indent=4) def get_models(): return get_model_names() def get_voices(): return list(voice_mapping.keys()) iface = gr.Interface( fn=convert_tts_sync, inputs=[ gr.Dropdown(choices=get_models(), label="Model", interactive=True), gr.Textbox(label="Text", placeholder="Enter text here"), gr.Dropdown(choices=get_voices(), label="Voice", interactive=True), gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), gr.Checkbox(label="Use Uploaded Voice"), gr.File(label="Voice File") ], outputs=[ gr.JSON(label="Info"), gr.Textbox(label="Audio URI") ], title="Text-to-Speech Conversion", allow_flagging="never" ) batch_iface = gr.Interface( fn=batch_convert_tts, inputs=gr.Textbox(label="JSON Input", lines=20, placeholder='Paste your JSON input here'), outputs=gr.JSON(label="Batch Results"), title="Batch Text-to-Speech Conversion", allow_flagging="never" ) app = gr.TabbedInterface( interface_list=[iface, batch_iface], tab_names=["Single Conversion", "Batch Conversion"] ) app.launch()