Spaces:
Sleeping
Sleeping
import gradio as gr | |
import edge_tts | |
import io | |
import os | |
import time | |
import asyncio | |
from docx import Document | |
AUDIO_DIR = 'audio_files' # Thư mục lưu tệp âm thanh | |
MAX_FILE_AGE = 24 * 60 * 60 # Thời gian tối đa lưu trữ tệp âm thanh (24 giờ) | |
async def text_to_speech(text, lang): | |
tts = edge_tts.Communicate(text, voice=lang) | |
audio_fp = io.BytesIO() # Tạo một file âm thanh trong bộ nhớ | |
metadata_fp = io.BytesIO() # Tạo một file metadata trong bộ nhớ | |
# Lưu âm thanh vào bộ nhớ | |
await tts.save(audio_fp, 'audio/mp3') # Định dạng mp3 | |
audio_fp.seek(0) | |
os.makedirs(AUDIO_DIR, exist_ok=True) # Đảm bảo thư mục tồn tại | |
audio_file_name = f"{time.time()}.mp3" | |
audio_file_path = os.path.join(AUDIO_DIR, audio_file_name) | |
# Lưu tệp âm thanh vào thư mục AUDIO_DIR | |
with open(audio_file_path, 'wb') as f: | |
f.write(audio_fp.read()) | |
delete_old_audio_files() | |
return audio_file_path, audio_file_path # Trả về đường dẫn tệp âm thanh | |
def delete_old_audio_files(): | |
now = time.time() | |
for file_name in os.listdir(AUDIO_DIR): | |
file_path = os.path.join(AUDIO_DIR, file_name) | |
if now - os.path.getmtime(file_path) > MAX_FILE_AGE: | |
os.remove(file_path) | |
async def txt_to_speech(file, lang): | |
with open(file.name, 'r') as f: | |
text = f.read() | |
return await text_to_speech(text, lang) | |
async def docx_to_speech(file, lang): | |
doc = Document(file.name) | |
text = "\n".join([para.text for para in doc.paragraphs]) # Lấy tất cả văn bản từ các đoạn | |
return await text_to_speech(text, lang) | |
# Tạo giao diện Gradio | |
with gr.Blocks() as iface: | |
with gr.Tab("Text to Speech"): | |
gr.Markdown("### Convert text to speech") | |
text_input = gr.Textbox(lines=10, label="Enter your text here:") | |
lang_input = gr.Dropdown(choices=["vi-VN-NamMinhNeural", "en-US-JennyNeural"], label="Select language:") | |
audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File") | |
gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(text_to_speech(text, lang)), | |
inputs=[text_input, lang_input], | |
outputs=[audio_output, file_output]) | |
with gr.Tab("TXT to Speech"): | |
gr.Markdown("### Convert .txt file to speech") | |
file_input = gr.File(label="Upload your .txt file") | |
lang_input_file = gr.Dropdown(choices=["vi-VN-NamMinhNeural", "en-US-JennyNeural"], label="Select language:") | |
audio_output_file, file_output_file = gr.Audio(label="Audio"), gr.File(label="Audio File") | |
gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(txt_to_speech(file, lang)), | |
inputs=[file_input, lang_input_file], | |
outputs=[audio_output_file, file_output_file]) | |
with gr.Tab("DOCX to Speech"): | |
gr.Markdown("### Convert .docx file to speech") | |
docx_file_input = gr.File(label="Upload your .docx file") | |
lang_input_docx = gr.Dropdown(choices=["vi-VN-NamMinhNeural", "en-US-JennyNeural"], label="Select language:") | |
audio_output_docx, file_output_docx = gr.Audio(label="Audio"), gr.File(label="Audio File") | |
gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(docx_to_speech(file, lang)), | |
inputs=[docx_file_input, lang_input_docx], | |
outputs=[audio_output_docx, file_output_docx]) | |
iface.launch(enable_queue=True) | |