Spaces:

TDN-M
/

GV-a

Sleeping

App Files Files Community

TDN-M commited on Jan 24

Commit

b7935b6

verified ·

1 Parent(s): 93ff1fe

Update tts.py

Browse files

Files changed (1) hide show

tts.py +125 -104

tts.py CHANGED Viewed

@@ -1,116 +1,137 @@
 import os
-import re
-import torch
-import torchaudio
-from huggingface_hub import snapshot_download, hf_hub_download
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
-from vinorm import TTSnorm
-# Cấu hình đường dẫn và tải mô hình
-checkpoint_dir = "model/"
-repo_id = "capleaf/viXTTS"
-use_deepspeed = False
-# Tạo thư mục nếu chưa tồn tại
-os.makedirs(checkpoint_dir, exist_ok=True)
-# Kiểm tra và tải các file cần thiết
-required_files = ["model.pth", "config.json", "vocab.json", "speakers_xtts.pth"]
-files_in_dir = os.listdir(checkpoint_dir)
-if not all(file in files_in_dir for file in required_files):
-    snapshot_download(
-        repo_id=repo_id,
-        repo_type="model",
-        local_dir=checkpoint_dir,
-    )
-    hf_hub_download(
-        repo_id="coqui/XTTS-v2",
-        filename="speakers_xtts.pth",
-        local_dir=checkpoint_dir,
-    )
-# Tải cấu hình và mô hình
-xtts_config = os.path.join(checkpoint_dir, "config.json")
-config = XttsConfig()
-config.load_json(xtts_config)
-MODEL = Xtts.init_from_config(config)
-MODEL.load_checkpoint(config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed)
-# Sử dụng GPU nếu có
-if torch.cuda.is_available():
-    MODEL.cuda()
-# Danh sách ngôn ngữ được hỗ trợ (chỉ tiếng Việt và tiếng Anh)
-supported_languages = ["vi", "en"]
-def normalize_vietnamese_text(text):
     """
-    Chuẩn hóa văn bản tiếng Việt.
     """
-    text = (
-        TTSnorm(text, unknown=False, lower=False, rule=True)
-        .replace("..", ".")
-        .replace("!.", "!")
-        .replace("?.", "?")
-        .replace(" .", ".")
-        .replace(" ,", ",")
-        .replace('"', "")
-        .replace("'", "")
-        .replace("AI", "Ây Ai")
-        .replace("A.I", "Ây Ai")
-    )
-    return text
-def generate_speech(text, language="vi", speaker_wav=None, normalize_text=True):
     """
-    Tạo giọng nói từ văn bản.
     """
-    if language not in supported_languages:
-        raise ValueError(f"Ngôn ngữ {language} không được hỗ trợ. Chỉ hỗ trợ tiếng Việt (vi) và tiếng Anh (en).")
-    if len(text) < 2:
-        raise ValueError("Văn bản quá ngắn. Vui lòng nhập văn bản dài hơn.")
     try:
-        # Chuẩn hóa văn bản nếu cần
-        if normalize_text and language == "vi":
-            text = normalize_vietnamese_text(text)
-        # Lấy latent và embedding từ file âm thanh mẫu
-        gpt_cond_latent, speaker_embedding = MODEL.get_conditioning_latents(
-            audio_path=speaker_wav,
-            gpt_cond_len=30,
-            gpt_cond_chunk_len=4,
-            max_ref_length=60,
-        )
-        # Tạo giọng nói
-        out = MODEL.inference(
-            text,
-            language,
-            gpt_cond_latent,
-            speaker_embedding,
-            repetition_penalty=5.0,
-            temperature=0.75,
-            enable_text_splitting=True,
-        )
-        # Lưu file âm thanh
-        output_file = "output.wav"
-        torchaudio.save(output_file, torch.tensor(out["wav"]).unsqueeze(0), 24000)
-        return output_file
     except Exception as e:
-        raise RuntimeError(f"Lỗi khi tạo giọng nói: {str(e)}")
 if __name__ == "__main__":
-    # Ví dụ sử dụng
-    text = "Xin chào, đây là một đoạn văn bản được chuyển thành giọng nói."
-    speaker_wav = "voices/sample_voice.wav"  # Đường dẫn đến file âm thanh mẫu trong thư mục /voices
-    output_audio = generate_speech(text, language="vi", speaker_wav=speaker_wav)
-    print(f"File âm thanh đã được tạo: {output_audio}")

+import asyncio
+import mimetypes
 import os
+import tempfile
+import glob
+import fitz  # PyMuPDF
+import random
+import gradio as gr
+from docx import Document
+from content_generation import create_content, CONTENT_TYPES
+from openai import OpenAI
+from gradio_client import Client, handle_file  # Thêm thư viện để gọi API
+from tts import generate_speech
+# Khởi tạo client OpenAI với API key từ biến môi trường
+client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
+# Đường dẫn đến thư mục chứa các file âm thanh
+VOICES_DIR = "voices"
+def create_docx(content, output_path):
     """
+    Tạo file docx từ nội dung.
     """
+    doc = Document()
+    doc.add_paragraph(content)
+    doc.save(output_path)
+def process_pdf(file_path):
     """
+    Xử lý file PDF và trích xuất nội dung.
     """
+    doc = fitz.open(file_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def process_docx(file_path):
+    """
+    Xử lý file DOCX và trích xuất nội dung.
+    """
+    doc = Document(file_path)
+    text = ""
+    for para in doc.paragraphs:
+        text += para.text
+    return text
+def text_to_speech(content, voice_file):
+    """
+    Chuyển đổi nội dung thành giọng nói bằng hàm generate_speech từ tts.py.
+    """
     try:
+        # Gọi hàm generate_speech để tạo file âm thanh
+        output_audio = generate_speech(content, language="vi", speaker_wav=voice_file)
+        return output_audio
     except Exception as e:
+        return f"Lỗi khi chuyển đổi văn bản thành giọng nói: {str(e)}"
+def convert_content_to_speech(content, voice_file):
+    """
+    Chuyển đổi nội dung thành giọng nói.
+    """
+    return text_to_speech(content, voice_file)
+def interface():
+    with gr.Blocks() as app:
+        gr.Markdown("# Ứng dụng Tạo Nội dung và Video")
+        with gr.Tab("Tạo Nội dung"):
+            with gr.Row():
+                with gr.Column():
+                    prompt = gr.Textbox(label="Nhập yêu cầu nội dung")
+                    file_upload = gr.File(label="Tải lên file kèm theo", type="filepath")
+                    content_type = gr.Radio(label="Chọn loại nội dung",
+                                            choices=CONTENT_TYPES,
+                                            value=None)  # Giá trị mặc định là không có gì được chọn
+                    voice_files = [os.path.join(VOICES_DIR, f) for f in os.listdir(VOICES_DIR) if f.endswith(".wav")]
+                    voice_selector = gr.Dropdown(label="Chọn giọng đọc", choices=voice_files)  # Dropdown để chọn file âm thanh
+                    content_button = gr.Button("Tạo Nội dung")
+                with gr.Column():
+                    content_output = gr.Textbox(label="Nội dung tạo ra", interactive=True)
+                    confirm_button = gr.Button("Xác nhận nội dung")
+                    download_docx = gr.File(label="Tải xuống file DOCX", interactive=False)
+                    status_message = gr.Label(label="Trạng thái")
+                    convert_to_speech_button = gr.Button("Chuyển đổi thành giọng nói")
+                    audio_output = gr.Audio(label="Synthesised Audio", autoplay=True)  # Phát tự động
+            def generate_content(prompt, file, content_type):
+                try:
+                    status = "Đang xử lý..."
+                    if file and os.path.exists(file):
+                        mime_type, _ = mimetypes.guess_type(file)
+                        if mime_type == "application/pdf":
+                            file_content = process_pdf(file)
+                            prompt = f"{prompt}\n\nDưới đây là nội dung của file tài liệu:\n\n{file_content}"
+                        elif mime_type in (
+                            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                            "application/msword"):
+                            file_content = process_docx(file)
+                            prompt = f"{prompt}\n\nDưới đây là nội dung của file tài liệu:\n\n{file_content}"
+                        else:
+                            raise ValueError("Định dạng file không được hỗ trợ.")
+                    if not content_type:
+                        raise ValueError("Vui lòng chọn một loại nội dung")
+                    script_content = create_content(prompt, content_type, "Tiếng Việt")
+                    docx_path = "script.docx"
+                    create_docx(script_content, docx_path)
+                    status = "Đã tạo nội dung thành công!"
+                    return script_content, docx_path, status
+                except Exception as e:
+                    status = f"Đã xảy ra lỗi: {str(e)}"
+                    return "", None, status
+            async def confirm_content(content):
+                docx_path = "script.docx"
+                create_docx(content, docx_path)
+            content_button.click(generate_content,
+                                 inputs=[prompt, file_upload, content_type],
+                                 outputs=[content_output, download_docx, status_message])
+            convert_to_speech_button.click(convert_content_to_speech,
+                                           inputs=[content_output, voice_selector],
+                                           outputs=[audio_output])
+    return app
+# Khởi chạy ứng dụng
 if __name__ == "__main__":
+    app = interface()
+    app.launch(share=True)