edge-TTS

Sleeping

App Files Files Community

vuxuanhoan commited on Oct 12, 2024

Commit

cd58124

verified ·

1 Parent(s): 20bc263

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -111

app.py CHANGED Viewed

@@ -1,121 +1,85 @@
 import gradio as gr
 import os
 import time
-import uuid
-import re
 import asyncio
-import torchaudio
-from huggingface_hub import HfApi, hf_hub_download, snapshot_download
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
-from vinorm import TTSnorm
-# download for mecab
-os.system("python -m unidic download")
-HF_TOKEN = os.environ.get("HF_TOKEN")
-api = HfApi(token=HF_TOKEN)
-# This will trigger downloading model
-print("Downloading if not downloaded viXTTS")
-checkpoint_dir = "model/"
-repo_id = "capleaf/viXTTS"
-use_deepspeed = False
-os.makedirs(checkpoint_dir, exist_ok=True)
-required_files = ["model.pth", "config.json", "vocab.json", "speakers_xtts.pth"]
-files_in_dir = os.listdir(checkpoint_dir)
-if not all(file in files_in_dir for file in required_files):
-    snapshot_download(
-        repo_id=repo_id,
-        repo_type="model",
-        local_dir=checkpoint_dir,
-    )
-    hf_hub_download(
-        repo_id="coqui/XTTS-v2",
-        filename="speakers_xtts.pth",
-        local_dir=checkpoint_dir,
-    )
-xtts_config = os.path.join(checkpoint_dir, "config.json")
-config = XttsConfig()
-config.load_json(xtts_config)
-MODEL = Xtts.init_from_config(config)
-MODEL.load_checkpoint(
-    config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed
-)
-if torch.cuda.is_available():
-    MODEL.cuda()
-supported_languages = config.languages
-if not "vi" in supported_languages:
-    supported_languages.append("vi")
-def normalize_vietnamese_text(text):
-    text = (
-        TTSnorm(text, unknown=False, lower=False, rule=True)
-        .replace("..", ".")
-        .replace("!.", "!")
-        .replace("?.", "?")
-        .replace(" .", ".")
-        .replace(" ,", ",")
-        .replace('"', "")
-        .replace("'", "")
-        .replace("AI", "Ây Ai")
-        .replace("A.I", "Ây Ai")
-    )
-    return text
-async def text_to_speech(text, lang, audio_file_path):
-    if lang not in supported_languages:
-        return None, "Language not supported."
-    if len(text) < 2:
-        return None, "Please provide a longer text."
-    if len(text) > 250:
-        return None, "Text is too long, please keep it under 250 characters."
-    if lang == "vi":
-        text = normalize_vietnamese_text(text)
-    try:
-        print("Generating new audio...")
-        out = MODEL.inference(
-            text,
-            lang,
-            gpt_cond_latent=None,
-            speaker_embedding=None,
-            repetition_penalty=5.0,
-            temperature=0.75,
-            enable_text_splitting=True,
-        )
-        torchaudio.save(audio_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
-        return audio_file_path, None
-    except Exception as e:
-        return None, f"Error during synthesis: {str(e)}"
-# Thư mục để lưu tệp âm thanh
-AUDIO_DIR = 'audio_files'
-os.makedirs(AUDIO_DIR, exist_ok=True)
-async def convert_text_to_speech(text, lang):
-    audio_file_name = f"{time.time()}.wav"
-    audio_file_path = os.path.join(AUDIO_DIR, audio_file_name)
-    return await text_to_speech(text, lang, audio_file_path)
 # Tạo giao diện Gradio
-with gr.Blocks() as iface:
-    with gr.Tab("Text to Speech"):
-        gr.Markdown("### Convert text to speech")
-        text_input = gr.Textbox(lines=10, label="Enter your text here:")
-        lang_input = gr.Dropdown(choices=supported_languages, label="Select language:")
-        audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File")
-        gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(convert_text_to_speech(text, lang)),
-                                    inputs=[text_input, lang_input],
-                                    outputs=[audio_output, file_output])
-iface.launch(enable_queue=True)

 import gradio as gr
+import edge_tts
+import io
 import os
 import time
 import asyncio
+from docx import Document
+AUDIO_DIR = 'audio_files'  # Thư mục để lưu tệp âm thanh
+MAX_FILE_AGE = 24 * 60 * 60  # Thời gian lưu trữ tệp âm thanh (24 giờ)
+# Hàm để lấy tất cả các giọng nói có sẵn
+async def get_voices():
+    voices = await edge_tts.list_voices()
+    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
+async def text_to_speech(text, lang):
+    tts = edge_tts.Communicate(text, voice=lang)
+    # Đường dẫn cho tệp âm thanh
+    os.makedirs(AUDIO_DIR, exist_ok=True)  # Tạo thư mục nếu chưa tồn tại
+    audio_file_name = f"{time.time()}.mp3"
+    audio_file_path = os.path.join(AUDIO_DIR, audio_file_name)
+    # Lưu âm thanh vào tệp
+    await tts.save(audio_file_path)  # Lưu trực tiếp vào đường dẫn hợp lệ
+    delete_old_audio_files()  # Xóa các tệp âm thanh cũ
+    return audio_file_path, audio_file_path  # Trả về đường dẫn tệp âm thanh
+def delete_old_audio_files():
+    now = time.time()
+    for file_name in os.listdir(AUDIO_DIR):
+        file_path = os.path.join(AUDIO_DIR, file_name)
+        if now - os.path.getmtime(file_path) > MAX_FILE_AGE:
+            os.remove(file_path)
+async def txt_to_speech(file, lang):
+    with open(file.name, 'r') as f:
+        text = f.read()
+    return await text_to_speech(text, lang)
+async def docx_to_speech(file, lang):
+    doc = Document(file.name)
+    text = "\n".join([para.text for para in doc.paragraphs])  # Lấy tất cả văn bản từ các đoạn
+    return await text_to_speech(text, lang)
 # Tạo giao diện Gradio
+async def create_interface():
+    voices = await get_voices()  # Lấy danh sách giọng nói
+    with gr.Blocks() as iface:
+        with gr.Tab("Text to Speech"):
+            gr.Markdown("### Convert text to speech")
+            text_input = gr.Textbox(lines=10, label="Enter your text here:")
+            lang_input = gr.Dropdown(choices=list(voices.keys()), label="Select language:")  # Cập nhật dropdown giọng nói
+            audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File")
+            gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(text_to_speech(text, voices[lang])),
+                                        inputs=[text_input, lang_input],
+                                        outputs=[audio_output, file_output])
+        with gr.Tab("TXT to Speech"):
+            gr.Markdown("### Convert .txt file to speech")
+            file_input = gr.File(label="Upload your .txt file")
+            lang_input_file = gr.Dropdown(choices=list(voices.keys()), label="Select language:")  # Cập nhật dropdown giọng nói
+            audio_output_file, file_output_file = gr.Audio(label="Audio"), gr.File(label="Audio File")
+            gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(txt_to_speech(file, voices[lang])),
+                                        inputs=[file_input, lang_input_file],
+                                        outputs=[audio_output_file, file_output_file])
+        with gr.Tab("DOCX to Speech"):
+            gr.Markdown("### Convert .docx file to speech")
+            docx_file_input = gr.File(label="Upload your .docx file")
+            lang_input_docx = gr.Dropdown(choices=list(voices.keys()), label="Select language:")  # Cập nhật dropdown giọng nói
+            audio_output_docx, file_output_docx = gr.Audio(label="Audio"), gr.File(label="Audio File")
+            gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(docx_to_speech(file, voices[lang])),
+                                        inputs=[docx_file_input, lang_input_docx],
+                                        outputs=[audio_output_docx, file_output_docx])
+    iface.launch(enable_queue=True)
+# Chạy ứng dụng
+asyncio.run(create_interface())