edge-TTS

Sleeping

App Files Files Community

edge-TTS / app.py

vuxuanhoan

Update app.py

cbac426 verified 6 months ago

raw

history blame

3.65 kB

	import gradio as gr
	import edge_tts
	import io
	import os
	import time
	import asyncio
	from docx import Document

	AUDIO_DIR = 'audio_files' # Thư mục lưu tệp âm thanh
	MAX_FILE_AGE = 24 * 60 * 60 # Thời gian tối đa lưu trữ tệp âm thanh (24 giờ)

	async def text_to_speech(text, lang):
	tts = edge_tts.Communicate(text, voice=lang)
	audio_fp = io.BytesIO() # Tạo một file âm thanh trong bộ nhớ
	metadata_fp = io.BytesIO() # Tạo một file metadata trong bộ nhớ

	# Lưu âm thanh vào bộ nhớ
	await tts.save(audio_fp, 'audio/mp3') # Định dạng mp3
	audio_fp.seek(0)

	os.makedirs(AUDIO_DIR, exist_ok=True) # Đảm bảo thư mục tồn tại
	audio_file_name = f"{time.time()}.mp3"
	audio_file_path = os.path.join(AUDIO_DIR, audio_file_name)

	# Lưu tệp âm thanh vào thư mục AUDIO_DIR
	with open(audio_file_path, 'wb') as f:
	f.write(audio_fp.read())

	delete_old_audio_files()
	return audio_file_path, audio_file_path # Trả về đường dẫn tệp âm thanh

	def delete_old_audio_files():
	now = time.time()
	for file_name in os.listdir(AUDIO_DIR):
	file_path = os.path.join(AUDIO_DIR, file_name)
	if now - os.path.getmtime(file_path) > MAX_FILE_AGE:
	os.remove(file_path)

	async def txt_to_speech(file, lang):
	with open(file.name, 'r') as f:
	text = f.read()
	return await text_to_speech(text, lang)

	async def docx_to_speech(file, lang):
	doc = Document(file.name)
	text = "\n".join([para.text for para in doc.paragraphs]) # Lấy tất cả văn bản từ các đoạn
	return await text_to_speech(text, lang)

	# Tạo giao diện Gradio
	with gr.Blocks() as iface:
	with gr.Tab("Text to Speech"):
	gr.Markdown("### Convert text to speech")
	text_input = gr.Textbox(lines=10, label="Enter your text here:")
	lang_input = gr.Dropdown(choices=["vi-VN-NamMinhNeural", "en-US-JennyNeural"], label="Select language:")

	audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File")
	gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(text_to_speech(text, lang)),
	inputs=[text_input, lang_input],
	outputs=[audio_output, file_output])

	with gr.Tab("TXT to Speech"):
	gr.Markdown("### Convert .txt file to speech")
	file_input = gr.File(label="Upload your .txt file")
	lang_input_file = gr.Dropdown(choices=["vi-VN-NamMinhNeural", "en-US-JennyNeural"], label="Select language:")

	audio_output_file, file_output_file = gr.Audio(label="Audio"), gr.File(label="Audio File")
	gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(txt_to_speech(file, lang)),
	inputs=[file_input, lang_input_file],
	outputs=[audio_output_file, file_output_file])

	with gr.Tab("DOCX to Speech"):
	gr.Markdown("### Convert .docx file to speech")
	docx_file_input = gr.File(label="Upload your .docx file")
	lang_input_docx = gr.Dropdown(choices=["vi-VN-NamMinhNeural", "en-US-JennyNeural"], label="Select language:")

	audio_output_docx, file_output_docx = gr.Audio(label="Audio"), gr.File(label="Audio File")
	gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(docx_to_speech(file, lang)),
	inputs=[docx_file_input, lang_input_docx],
	outputs=[audio_output_docx, file_output_docx])

	iface.launch(enable_queue=True)