import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names, voice_mapping
from io import BytesIO
import asyncio
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
    try:
        edge_tts_voice = voice_mapping.get(selected_voice)
        if not edge_tts_voice:
            raise ValueError(f"Invalid voice '{selected_voice}'.")

        voice_upload_file = None
        if use_uploaded_voice and voice_upload is not None:
            with open(voice_upload.name, 'rb') as f:
                voice_upload_file = f.read()

        # Create task for parallel processing
        task = (
            model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
        )
        
        # Asynchronous call to your tts processing function using parallel processing
        result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task])
        info, _, (tgt_sr, audio_output) = result[0]

        # Process audio output to bytes
        audio_bytes = None
        if isinstance(audio_output, np.ndarray):
            byte_io = BytesIO()
            wavfile.write(byte_io, tgt_sr, audio_output)
            byte_io.seek(0)
            audio_bytes = byte_io.read()
        else:
            audio_bytes = audio_output

        audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
        return {"info": info}, audio_data_uri

    except Exception as e:
        logger.exception("Error in convert_tts")
        return {"error": str(e)}, None

def get_models():
    return get_model_names()

def get_voices():
    return list(voice_mapping.keys())

# Initialize the Gradio interface
iface = gr.Interface(
    fn=convert_tts,
    inputs=[
        gr.Dropdown(choices=get_models(), label="Model", interactive=True),
        gr.Textbox(label="Text", placeholder="Enter text here"),
        gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
        gr.Checkbox(label="Use Uploaded Voice"),
        gr.File(label="Voice File")
    ],
    outputs=[
        gr.JSON(label="Info"),
        gr.Audio(label="Generated Audio", type="uri")
    ],
    title="Text-to-Speech Conversion"
).queue(concurrency_count=16)  # Adjust based on your server's capacity

# Launch the interface
if __name__ == "__main__":
    iface.launch(debug=True)