Spaces:

TDN-M
/

GV-a

Sleeping

App Files Files Community

TDN-M commited on Jan 24

Commit

38b7ebd

verified ·

1 Parent(s): b7935b6

Update tts.py

Browse files

Files changed (1) hide show

tts.py +104 -125

tts.py CHANGED Viewed

@@ -1,137 +1,116 @@
-import asyncio
-import mimetypes
 import os
-import tempfile
-import glob
-import fitz  # PyMuPDF
-import random
-import gradio as gr
-from docx import Document
-from content_generation import create_content, CONTENT_TYPES
-from openai import OpenAI
-from gradio_client import Client, handle_file  # Thêm thư viện để gọi API
-from tts import generate_speech
-# Khởi tạo client OpenAI với API key từ biến môi trường
-client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
-# Đường dẫn đến thư mục chứa các file âm thanh
-VOICES_DIR = "voices"
-def create_docx(content, output_path):
     """
-    Tạo file docx từ nội dung.
     """
-    doc = Document()
-    doc.add_paragraph(content)
-    doc.save(output_path)
-def process_pdf(file_path):
-    """
-    Xử lý file PDF và trích xuất nội dung.
-    """
-    doc = fitz.open(file_path)
-    text = ""
-    for page in doc:
-        text += page.get_text()
     return text
-def process_docx(file_path):
-    """
-    Xử lý file DOCX và trích xuất nội dung.
-    """
-    doc = Document(file_path)
-    text = ""
-    for para in doc.paragraphs:
-        text += para.text
-    return text
-def text_to_speech(content, voice_file):
     """
-    Chuyển đổi nội dung thành giọng nói bằng hàm generate_speech từ tts.py.
     """
     try:
-        # Gọi hàm generate_speech để tạo file âm thanh
-        output_audio = generate_speech(content, language="vi", speaker_wav=voice_file)
-        return output_audio
     except Exception as e:
-        return f"Lỗi khi chuyển đổi văn bản thành giọng nói: {str(e)}"
-def convert_content_to_speech(content, voice_file):
-    """
-    Chuyển đổi nội dung thành giọng nói.
-    """
-    return text_to_speech(content, voice_file)
-def interface():
-    with gr.Blocks() as app:
-        gr.Markdown("# Ứng dụng Tạo Nội dung và Video")
-        with gr.Tab("Tạo Nội dung"):
-            with gr.Row():
-                with gr.Column():
-                    prompt = gr.Textbox(label="Nhập yêu cầu nội dung")
-                    file_upload = gr.File(label="Tải lên file kèm theo", type="filepath")
-                    content_type = gr.Radio(label="Chọn loại nội dung",
-                                            choices=CONTENT_TYPES,
-                                            value=None)  # Giá trị mặc định là không có gì được chọn
-                    voice_files = [os.path.join(VOICES_DIR, f) for f in os.listdir(VOICES_DIR) if f.endswith(".wav")]
-                    voice_selector = gr.Dropdown(label="Chọn giọng đọc", choices=voice_files)  # Dropdown để chọn file âm thanh
-                    content_button = gr.Button("Tạo Nội dung")
-                with gr.Column():
-                    content_output = gr.Textbox(label="Nội dung tạo ra", interactive=True)
-                    confirm_button = gr.Button("Xác nhận nội dung")
-                    download_docx = gr.File(label="Tải xuống file DOCX", interactive=False)
-                    status_message = gr.Label(label="Trạng thái")
-                    convert_to_speech_button = gr.Button("Chuyển đổi thành giọng nói")
-                    audio_output = gr.Audio(label="Synthesised Audio", autoplay=True)  # Phát tự động
-            def generate_content(prompt, file, content_type):
-                try:
-                    status = "Đang xử lý..."
-                    if file and os.path.exists(file):
-                        mime_type, _ = mimetypes.guess_type(file)
-                        if mime_type == "application/pdf":
-                            file_content = process_pdf(file)
-                            prompt = f"{prompt}\n\nDưới đây là nội dung của file tài liệu:\n\n{file_content}"
-                        elif mime_type in (
-                            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                            "application/msword"):
-                            file_content = process_docx(file)
-                            prompt = f"{prompt}\n\nDưới đây là nội dung của file tài liệu:\n\n{file_content}"
-                        else:
-                            raise ValueError("Định dạng file không được hỗ trợ.")
-                    if not content_type:
-                        raise ValueError("Vui lòng chọn một loại nội dung")
-                    script_content = create_content(prompt, content_type, "Tiếng Việt")
-                    docx_path = "script.docx"
-                    create_docx(script_content, docx_path)
-                    status = "Đã tạo nội dung thành công!"
-                    return script_content, docx_path, status
-                except Exception as e:
-                    status = f"Đã xảy ra lỗi: {str(e)}"
-                    return "", None, status
-            async def confirm_content(content):
-                docx_path = "script.docx"
-                create_docx(content, docx_path)
-            content_button.click(generate_content,
-                                 inputs=[prompt, file_upload, content_type],
-                                 outputs=[content_output, download_docx, status_message])
-            convert_to_speech_button.click(convert_content_to_speech,
-                                           inputs=[content_output, voice_selector],
-                                           outputs=[audio_output])
-    return app
-# Khởi chạy ứng dụng
 if __name__ == "__main__":
-    app = interface()
-    app.launch(share=True)

 import os
+import re
+import torch
+import torchaudio
+from huggingface_hub import snapshot_download, hf_hub_download
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
+from vinorm import TTSnorm
+# Cấu hình đường dẫn và tải mô hình
+checkpoint_dir = "model/"
+repo_id = "capleaf/viXTTS"
+use_deepspeed = False
+# Tạo thư mục nếu chưa tồn tại
+os.makedirs(checkpoint_dir, exist_ok=True)
+# Kiểm tra và tải các file cần thiết
+required_files = ["model.pth", "config.json", "vocab.json", "speakers_xtts.pth"]
+files_in_dir = os.listdir(checkpoint_dir)
+if not all(file in files_in_dir for file in required_files):
+    snapshot_download(
+        repo_id=repo_id,
+        repo_type="model",
+        local_dir=checkpoint_dir,
+    )
+    hf_hub_download(
+        repo_id="coqui/XTTS-v2",
+        filename="speakers_xtts.pth",
+        local_dir=checkpoint_dir,
+    )
+# Tải cấu hình và mô hình
+xtts_config = os.path.join(checkpoint_dir, "config.json")
+config = XttsConfig()
+config.load_json(xtts_config)
+MODEL = Xtts.init_from_config(config)
+MODEL.load_checkpoint(config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed)
+# Đảm bảo mô hình chạy trên CPU
+MODEL.to("cpu")  # Chuyển mô hình sang CPU
+# Danh sách ngôn ngữ được hỗ trợ (chỉ tiếng Việt và tiếng Anh)
+supported_languages = ["vi", "en"]
+def normalize_vietnamese_text(text):
     """
+    Chuẩn hóa văn bản tiếng Việt.
     """
+    text = (
+        TTSnorm(text, unknown=False, lower=False, rule=True)
+        .replace("..", ".")
+        .replace("!.", "!")
+        .replace("?.", "?")
+        .replace(" .", ".")
+        .replace(" ,", ",")
+        .replace('"', "")
+        .replace("'", "")
+        .replace("AI", "Ây Ai")
+        .replace("A.I", "Ây Ai")
+    )
     return text
+def generate_speech(text, language="vi", speaker_wav=None, normalize_text=True):
     """
+    Tạo giọng nói từ văn bản.
     """
+    if language not in supported_languages:
+        raise ValueError(f"Ngôn ngữ {language} không được hỗ trợ. Chỉ hỗ trợ tiếng Việt (vi) và tiếng Anh (en).")
+    if len(text) < 2:
+        raise ValueError("Văn bản quá ngắn. Vui lòng nhập văn bản dài hơn.")
     try:
+        # Chuẩn hóa văn bản nếu cần
+        if normalize_text and language == "vi":
+            text = normalize_vietnamese_text(text)
+        # Lấy latent và embedding từ file âm thanh mẫu
+        with torch.no_grad():  # Tắt tính gradient để tiết kiệm bộ nhớ
+            gpt_cond_latent, speaker_embedding = MODEL.get_conditioning_latents(
+                audio_path=speaker_wav,
+                gpt_cond_len=15,  # Giảm độ dài để tối ưu hóa cho CPU
+                gpt_cond_chunk_len=4,
+                max_ref_length=30,  # Giảm độ dài để tối ưu hóa cho CPU
+            )
+            # Tạo giọng nói
+            out = MODEL.inference(
+                text,
+                language,
+                gpt_cond_latent,
+                speaker_embedding,
+                repetition_penalty=5.0,
+                temperature=0.75,
+                enable_text_splitting=True,
+            )
+        # Lưu file âm thanh
+        output_file = "output.wav"
+        torchaudio.save(output_file, torch.tensor(out["wav"]).unsqueeze(0), 24000)
+        return output_file
     except Exception as e:
+        raise RuntimeError(f"Lỗi khi tạo giọng nói: {str(e)}")
 if __name__ == "__main__":
+    # Ví dụ sử dụng
+    text = "Xin chào, đây là một đoạn văn bản được chuyển thành giọng nói."
+    speaker_wav = "voices/sample_voice.wav"  # Đường dẫn đến file âm thanh mẫu trong thư mục /voices
+    output_audio = generate_speech(text, language="vi", speaker_wav=speaker_wav)
+    print(f"File âm thanh đã được tạo: {output_audio}")