vuxuanhoan commited on
Commit
cd58124
·
verified ·
1 Parent(s): 20bc263

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -111
app.py CHANGED
@@ -1,121 +1,85 @@
1
  import gradio as gr
 
 
2
  import os
3
  import time
4
- import uuid
5
- import re
6
  import asyncio
7
- import torchaudio
8
- from huggingface_hub import HfApi, hf_hub_download, snapshot_download
9
- from TTS.tts.configs.xtts_config import XttsConfig
10
- from TTS.tts.models.xtts import Xtts
11
- from vinorm import TTSnorm
12
-
13
- # download for mecab
14
- os.system("python -m unidic download")
15
-
16
- HF_TOKEN = os.environ.get("HF_TOKEN")
17
- api = HfApi(token=HF_TOKEN)
18
-
19
- # This will trigger downloading model
20
- print("Downloading if not downloaded viXTTS")
21
- checkpoint_dir = "model/"
22
- repo_id = "capleaf/viXTTS"
23
- use_deepspeed = False
24
-
25
- os.makedirs(checkpoint_dir, exist_ok=True)
26
-
27
- required_files = ["model.pth", "config.json", "vocab.json", "speakers_xtts.pth"]
28
- files_in_dir = os.listdir(checkpoint_dir)
29
- if not all(file in files_in_dir for file in required_files):
30
- snapshot_download(
31
- repo_id=repo_id,
32
- repo_type="model",
33
- local_dir=checkpoint_dir,
34
- )
35
- hf_hub_download(
36
- repo_id="coqui/XTTS-v2",
37
- filename="speakers_xtts.pth",
38
- local_dir=checkpoint_dir,
39
- )
40
-
41
- xtts_config = os.path.join(checkpoint_dir, "config.json")
42
- config = XttsConfig()
43
- config.load_json(xtts_config)
44
- MODEL = Xtts.init_from_config(config)
45
- MODEL.load_checkpoint(
46
- config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed
47
- )
48
-
49
- if torch.cuda.is_available():
50
- MODEL.cuda()
51
-
52
- supported_languages = config.languages
53
- if not "vi" in supported_languages:
54
- supported_languages.append("vi")
55
-
56
- def normalize_vietnamese_text(text):
57
- text = (
58
- TTSnorm(text, unknown=False, lower=False, rule=True)
59
- .replace("..", ".")
60
- .replace("!.", "!")
61
- .replace("?.", "?")
62
- .replace(" .", ".")
63
- .replace(" ,", ",")
64
- .replace('"', "")
65
- .replace("'", "")
66
- .replace("AI", "Ây Ai")
67
- .replace("A.I", "Ây Ai")
68
- )
69
- return text
70
-
71
- async def text_to_speech(text, lang, audio_file_path):
72
- if lang not in supported_languages:
73
- return None, "Language not supported."
74
-
75
- if len(text) < 2:
76
- return None, "Please provide a longer text."
77
-
78
- if len(text) > 250:
79
- return None, "Text is too long, please keep it under 250 characters."
80
 
81
- if lang == "vi":
82
- text = normalize_vietnamese_text(text)
 
 
83
 
84
- try:
85
- print("Generating new audio...")
86
- out = MODEL.inference(
87
- text,
88
- lang,
89
- gpt_cond_latent=None,
90
- speaker_embedding=None,
91
- repetition_penalty=5.0,
92
- temperature=0.75,
93
- enable_text_splitting=True,
94
- )
95
- torchaudio.save(audio_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
96
- return audio_file_path, None
97
- except Exception as e:
98
- return None, f"Error during synthesis: {str(e)}"
99
 
100
- # Thư mục để lưu tệp âm thanh
101
- AUDIO_DIR = 'audio_files'
102
- os.makedirs(AUDIO_DIR, exist_ok=True)
 
103
 
104
- async def convert_text_to_speech(text, lang):
105
- audio_file_name = f"{time.time()}.wav"
106
- audio_file_path = os.path.join(AUDIO_DIR, audio_file_name)
107
- return await text_to_speech(text, lang, audio_file_path)
108
 
109
  # Tạo giao diện Gradio
110
- with gr.Blocks() as iface:
111
- with gr.Tab("Text to Speech"):
112
- gr.Markdown("### Convert text to speech")
113
- text_input = gr.Textbox(lines=10, label="Enter your text here:")
114
- lang_input = gr.Dropdown(choices=supported_languages, label="Select language:")
115
-
116
- audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File")
117
- gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(convert_text_to_speech(text, lang)),
118
- inputs=[text_input, lang_input],
119
- outputs=[audio_output, file_output])
120
-
121
- iface.launch(enable_queue=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import edge_tts
3
+ import io
4
  import os
5
  import time
 
 
6
  import asyncio
7
+ from docx import Document
8
+
9
+ AUDIO_DIR = 'audio_files' # Thư mục để lưu tệp âm thanh
10
+ MAX_FILE_AGE = 24 * 60 * 60 # Thời gian lưu trữ tệp âm thanh (24 giờ)
11
+
12
+ # Hàm để lấy tất cả các giọng nói có sẵn
13
+ async def get_voices():
14
+ voices = await edge_tts.list_voices()
15
+ return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
16
+
17
+ async def text_to_speech(text, lang):
18
+ tts = edge_tts.Communicate(text, voice=lang)
19
+
20
+ # Đường dẫn cho tệp âm thanh
21
+ os.makedirs(AUDIO_DIR, exist_ok=True) # Tạo thư mục nếu chưa tồn tại
22
+ audio_file_name = f"{time.time()}.mp3"
23
+ audio_file_path = os.path.join(AUDIO_DIR, audio_file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Lưu âm thanh vào tệp
26
+ await tts.save(audio_file_path) # Lưu trực tiếp vào đường dẫn hợp lệ
27
+ delete_old_audio_files() # Xóa các tệp âm thanh cũ
28
+ return audio_file_path, audio_file_path # Trả về đường dẫn tệp âm thanh
29
 
30
+ def delete_old_audio_files():
31
+ now = time.time()
32
+ for file_name in os.listdir(AUDIO_DIR):
33
+ file_path = os.path.join(AUDIO_DIR, file_name)
34
+ if now - os.path.getmtime(file_path) > MAX_FILE_AGE:
35
+ os.remove(file_path)
 
 
 
 
 
 
 
 
 
36
 
37
+ async def txt_to_speech(file, lang):
38
+ with open(file.name, 'r') as f:
39
+ text = f.read()
40
+ return await text_to_speech(text, lang)
41
 
42
+ async def docx_to_speech(file, lang):
43
+ doc = Document(file.name)
44
+ text = "\n".join([para.text for para in doc.paragraphs]) # Lấy tất cả văn bản từ các đoạn
45
+ return await text_to_speech(text, lang)
46
 
47
  # Tạo giao diện Gradio
48
+ async def create_interface():
49
+ voices = await get_voices() # Lấy danh sách giọng nói
50
+
51
+ with gr.Blocks() as iface:
52
+ with gr.Tab("Text to Speech"):
53
+ gr.Markdown("### Convert text to speech")
54
+ text_input = gr.Textbox(lines=10, label="Enter your text here:")
55
+ lang_input = gr.Dropdown(choices=list(voices.keys()), label="Select language:") # Cập nhật dropdown giọng nói
56
+
57
+ audio_output, file_output = gr.Audio(label="Audio"), gr.File(label="Audio File")
58
+ gr.Button("Convert").click(fn=lambda text, lang: asyncio.run(text_to_speech(text, voices[lang])),
59
+ inputs=[text_input, lang_input],
60
+ outputs=[audio_output, file_output])
61
+
62
+ with gr.Tab("TXT to Speech"):
63
+ gr.Markdown("### Convert .txt file to speech")
64
+ file_input = gr.File(label="Upload your .txt file")
65
+ lang_input_file = gr.Dropdown(choices=list(voices.keys()), label="Select language:") # Cập nhật dropdown giọng nói
66
+
67
+ audio_output_file, file_output_file = gr.Audio(label="Audio"), gr.File(label="Audio File")
68
+ gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(txt_to_speech(file, voices[lang])),
69
+ inputs=[file_input, lang_input_file],
70
+ outputs=[audio_output_file, file_output_file])
71
+
72
+ with gr.Tab("DOCX to Speech"):
73
+ gr.Markdown("### Convert .docx file to speech")
74
+ docx_file_input = gr.File(label="Upload your .docx file")
75
+ lang_input_docx = gr.Dropdown(choices=list(voices.keys()), label="Select language:") # Cập nhật dropdown giọng nói
76
+
77
+ audio_output_docx, file_output_docx = gr.Audio(label="Audio"), gr.File(label="Audio File")
78
+ gr.Button("Convert").click(fn=lambda file, lang: asyncio.run(docx_to_speech(file, voices[lang])),
79
+ inputs=[docx_file_input, lang_input_docx],
80
+ outputs=[audio_output_docx, file_output_docx])
81
+
82
+ iface.launch(enable_queue=True)
83
+
84
+ # Chạy ứng dụng
85
+ asyncio.run(create_interface())