import gradio as gr
import edge_tts
import io
import os
import time
import asyncio
from docx import Document

AUDIO_DIR = 'audio_files'  # Thư mục để lưu tệp âm thanh
MAX_FILE_AGE = 24 * 60 * 60  # Thời gian lưu trữ tệp âm thanh (24 giờ)

# Hàm để lấy tất cả các giọng nói có sẵn
async def get_voices():
    voices = await edge_tts.list_voices()
    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}

async def text_to_speech(text, lang):
    tts = edge_tts.Communicate(text, voice=lang)
    
    # Đường dẫn cho tệp âm thanh
    os.makedirs(AUDIO_DIR, exist_ok=True)  # Tạo thư mục nếu chưa tồn tại
    audio_file_name = f"{time.time()}.mp3"
    audio_file_path = os.path.join(AUDIO_DIR, audio_file_name)

    # Lưu âm thanh vào tệp
    await tts.save(audio_file_path)  # Lưu trực tiếp vào đường dẫn hợp lệ
    delete_old_audio_files()  # Xóa các tệp âm thanh cũ
    return audio_file_path, audio_file_path  # Trả về đường dẫn tệp âm thanh

def delete_old_audio_files():
    now = time.time()
    for file_name in os.listdir(AUDIO_DIR):
        file_path = os.path.join(AUDIO_DIR, file_name)
        if now - os.path.getmtime(file_path) > MAX_FILE_AGE:
            os.remove(file_path)

async def txt_to_speech(file, lang):
    with open(file.name, 'r') as f:
        text = f.read()
    return await text_to_speech(text, lang)

async def docx_to_speech(file, lang):
    doc = Document(file.name)
    text = "\n".join([para.text for para in doc.paragraphs])  # Lấy tất cả văn bản từ các đoạn
    return await text_to_speech(text, lang)

# Tạo giao diện Gradio
async def create_interface():
    voices = await get_voices()  # Lấy danh sách giọng nói

    with gr.Blocks() as iface:
        with gr.Tab("Text to Speech"):
            gr.Markdown("### Convert text to speech")
            text_input = gr.Textbox(lines=10, label="Enter your text here:")
            lang_input = gr.Dropdown(choices=list(voices.keys()), label="Select language:")  # Cập nhật dropdown giọng nói
            
            audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File")
            gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(text_to_speech(text, voices[lang])),
                                        inputs=[text_input, lang_input], 
                                        outputs=[audio_output, file_output])

        with gr.Tab("TXT to Speech"):
            gr.Markdown("### Convert .txt file to speech")
            file_input = gr.File(label="Upload your .txt file")
            lang_input_file = gr.Dropdown(choices=list(voices.keys()), label="Select language:")  # Cập nhật dropdown giọng nói
            
            audio_output_file, file_output_file = gr.Audio(label="Audio"), gr.File(label="Audio File")
            gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(txt_to_speech(file, voices[lang])),
                                        inputs=[file_input, lang_input_file], 
                                        outputs=[audio_output_file, file_output_file])

        with gr.Tab("DOCX to Speech"):
            gr.Markdown("### Convert .docx file to speech")
            docx_file_input = gr.File(label="Upload your .docx file")
            lang_input_docx = gr.Dropdown(choices=list(voices.keys()), label="Select language:")  # Cập nhật dropdown giọng nói
            
            audio_output_docx, file_output_docx = gr.Audio(label="Audio"), gr.File(label="Audio File")
            gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(docx_to_speech(file, voices[lang])),
                                        inputs=[docx_file_input, lang_input_docx], 
                                        outputs=[audio_output_docx, file_output_docx])

    iface.launch(enable_queue=True)

# Chạy ứng dụng
asyncio.run(create_interface())