fixd
Browse files- app.py +13 -13
- logs/sentence_analyzer_2024-12-02.log +18 -0
app.py
CHANGED
@@ -28,7 +28,7 @@ def gpu_decorator(func):
|
|
28 |
|
29 |
# Importando a nova API F5TTS
|
30 |
from f5_tts.api import F5TTS
|
31 |
-
from f5_tts.infer.utils_infer import preprocess_ref_audio_text
|
32 |
|
33 |
import os
|
34 |
from huggingface_hub import hf_hub_download
|
@@ -51,8 +51,8 @@ def load_f5tts():
|
|
51 |
return F5TTS(
|
52 |
model_type="F5-TTS", # Ajuste o nome do modelo se necessário
|
53 |
ckpt_file=ckpt_path,
|
54 |
-
vocab_file="/home/user/app/data/Emilia_ZH_EN_pinyin/vocab.txt",
|
55 |
-
device="cuda" if torch.cuda.is_available() else "cpu",
|
56 |
use_ema=True
|
57 |
)
|
58 |
|
@@ -84,7 +84,7 @@ def infer(
|
|
84 |
last_device = device_test
|
85 |
if last_ema != use_ema:
|
86 |
last_ema = use_ema
|
87 |
-
vocab_file = "/home/user/app/data/Emilia_ZH_EN_pinyin/vocab.txt"
|
88 |
tts_api = F5TTS(
|
89 |
model_type=exp_name, ckpt_file=file_checkpoint, vocab_file=vocab_file, device=device_test, use_ema=use_ema
|
90 |
)
|
@@ -202,16 +202,16 @@ with gr.Blocks(css=custom_css) as app:
|
|
202 |
# Processar cada chunk
|
203 |
audio_segments = []
|
204 |
for chunk in chunks:
|
205 |
-
# Usando a função infer correta aqui
|
206 |
-
audio_file,
|
207 |
-
"Emilia_ZH_EN_pinyin",
|
208 |
-
"/home/user/app/model_1200000.safetensors",
|
209 |
-
"F5-TTS",
|
210 |
ref_text_input,
|
211 |
ref_audio_input,
|
212 |
chunk,
|
213 |
nfe_slider,
|
214 |
-
True,
|
215 |
speed_slider,
|
216 |
seed_input,
|
217 |
remove_silence,
|
@@ -253,8 +253,8 @@ with gr.Blocks(css=custom_css) as app:
|
|
253 |
],
|
254 |
outputs=[
|
255 |
audio_output,
|
256 |
-
ref_text_input,
|
257 |
-
seed_output,
|
258 |
],
|
259 |
)
|
260 |
|
@@ -279,4 +279,4 @@ if __name__ == "__main__":
|
|
279 |
if not USING_SPACES:
|
280 |
main()
|
281 |
else:
|
282 |
-
app.queue().launch()
|
|
|
28 |
|
29 |
# Importando a nova API F5TTS
|
30 |
from f5_tts.api import F5TTS
|
31 |
+
from f5_tts.infer.utils_infer import preprocess_ref_audio_text
|
32 |
|
33 |
import os
|
34 |
from huggingface_hub import hf_hub_download
|
|
|
51 |
return F5TTS(
|
52 |
model_type="F5-TTS", # Ajuste o nome do modelo se necessário
|
53 |
ckpt_file=ckpt_path,
|
54 |
+
vocab_file="/home/user/app/data/Emilia_ZH_EN_pinyin/vocab.txt", # Caminho para o arquivo vocab.txt
|
55 |
+
device="cuda" if torch.cuda.is_available() else "cpu", # Define o dispositivo
|
56 |
use_ema=True
|
57 |
)
|
58 |
|
|
|
84 |
last_device = device_test
|
85 |
if last_ema != use_ema:
|
86 |
last_ema = use_ema
|
87 |
+
vocab_file = "/home/user/app/data/Emilia_ZH_EN_pinyin/vocab.txt"
|
88 |
tts_api = F5TTS(
|
89 |
model_type=exp_name, ckpt_file=file_checkpoint, vocab_file=vocab_file, device=device_test, use_ema=use_ema
|
90 |
)
|
|
|
202 |
# Processar cada chunk
|
203 |
audio_segments = []
|
204 |
for chunk in chunks:
|
205 |
+
# Usando a função infer correta aqui, ignorando device_used
|
206 |
+
audio_file, _, seed_used = infer(
|
207 |
+
"Emilia_ZH_EN_pinyin", # Substitua pelo nome do seu projeto
|
208 |
+
"/home/user/app/model_1200000.safetensors", # Substitua pelo caminho do seu checkpoint
|
209 |
+
"F5-TTS", # Ou "E2-TTS" dependendo do seu modelo
|
210 |
ref_text_input,
|
211 |
ref_audio_input,
|
212 |
chunk,
|
213 |
nfe_slider,
|
214 |
+
True, # use_ema - ajuste se necessário
|
215 |
speed_slider,
|
216 |
seed_input,
|
217 |
remove_silence,
|
|
|
253 |
],
|
254 |
outputs=[
|
255 |
audio_output,
|
256 |
+
ref_text_input,
|
257 |
+
seed_output,
|
258 |
],
|
259 |
)
|
260 |
|
|
|
279 |
if not USING_SPACES:
|
280 |
main()
|
281 |
else:
|
282 |
+
app.queue().launch()
|
logs/sentence_analyzer_2024-12-02.log
CHANGED
@@ -26,3 +26,21 @@
|
|
26 |
2024-12-02 19:25:59,990 - SentenceAnalyzer - INFO - SentenceAnalyzer initialized successfully
|
27 |
2024-12-02 19:28:56,156 - SentenceAnalyzer - DEBUG - Logger set up successfully
|
28 |
2024-12-02 19:28:56,156 - SentenceAnalyzer - INFO - SentenceAnalyzer initialized successfully
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
2024-12-02 19:25:59,990 - SentenceAnalyzer - INFO - SentenceAnalyzer initialized successfully
|
27 |
2024-12-02 19:28:56,156 - SentenceAnalyzer - DEBUG - Logger set up successfully
|
28 |
2024-12-02 19:28:56,156 - SentenceAnalyzer - INFO - SentenceAnalyzer initialized successfully
|
29 |
+
2024-12-02 19:30:06,733 - SentenceAnalyzer - DEBUG - Logger set up successfully
|
30 |
+
2024-12-02 19:30:06,734 - SentenceAnalyzer - INFO - SentenceAnalyzer initialized successfully
|
31 |
+
2024-12-02 19:30:28,918 - SentenceAnalyzer - DEBUG - Starting sentence splitting
|
32 |
+
2024-12-02 19:30:28,918 - SentenceAnalyzer - DEBUG - Normalized text using NFC
|
33 |
+
2024-12-02 19:30:28,919 - SentenceAnalyzer - DEBUG - Removed page numbers and chapter titles
|
34 |
+
2024-12-02 19:30:28,919 - SentenceAnalyzer - DEBUG - Replaced hyphenated line breaks
|
35 |
+
2024-12-02 19:30:28,919 - SentenceAnalyzer - DEBUG - Replaced multiple newlines with a space
|
36 |
+
2024-12-02 19:30:28,919 - SentenceAnalyzer - DEBUG - Normalized whitespace
|
37 |
+
2024-12-02 19:30:28,941 - SentenceAnalyzer - DEBUG - Split text into 1 sentences using NLTK
|
38 |
+
2024-12-02 19:30:28,941 - SentenceAnalyzer - INFO - Split text into 1 sentences after cleanup
|
39 |
+
2024-12-02 19:31:01,028 - SentenceAnalyzer - DEBUG - Starting sentence splitting
|
40 |
+
2024-12-02 19:31:01,028 - SentenceAnalyzer - DEBUG - Normalized text using NFC
|
41 |
+
2024-12-02 19:31:01,029 - SentenceAnalyzer - DEBUG - Removed page numbers and chapter titles
|
42 |
+
2024-12-02 19:31:01,029 - SentenceAnalyzer - DEBUG - Replaced hyphenated line breaks
|
43 |
+
2024-12-02 19:31:01,029 - SentenceAnalyzer - DEBUG - Replaced multiple newlines with a space
|
44 |
+
2024-12-02 19:31:01,029 - SentenceAnalyzer - DEBUG - Normalized whitespace
|
45 |
+
2024-12-02 19:31:01,051 - SentenceAnalyzer - DEBUG - Split text into 1 sentences using NLTK
|
46 |
+
2024-12-02 19:31:01,051 - SentenceAnalyzer - INFO - Split text into 1 sentences after cleanup
|