Spaces:
Sleeping
Sleeping
tab-lization and fix bugs
Browse files
app.py
CHANGED
@@ -16,29 +16,12 @@ import pdb
|
|
16 |
# local import
|
17 |
import sys
|
18 |
from espnet2.bin.tts_inference import Text2Speech
|
19 |
-
from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
|
20 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
sys.path.append("src")
|
23 |
|
24 |
import gradio as gr
|
25 |
-
|
26 |
-
# ASR part
|
27 |
-
|
28 |
-
audio_files = [
|
29 |
-
str(x)
|
30 |
-
for x in sorted(
|
31 |
-
Path(
|
32 |
-
"/home/kevingeng/Disk2/laronix/laronix_automos/data/20230103_video"
|
33 |
-
).glob("**/*wav")
|
34 |
-
)
|
35 |
-
]
|
36 |
-
# audio_files = [str(x) for x in sorted(Path("./data/Patient_sil_trim_16k_normed_5_snr_40/Rainbow").glob("**/*wav"))]
|
37 |
-
# transcriber = pipeline(
|
38 |
-
# "automatic-speech-recognition",
|
39 |
-
# model="KevinGeng/PAL_John_128_train_dev_test_seed_1",
|
40 |
-
# )
|
41 |
-
|
42 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
43 |
|
44 |
processor = AutoProcessor.from_pretrained("KevinGeng/whipser_medium_en_PAL300_step25")
|
@@ -51,8 +34,7 @@ transcriber = pipeline("automatic-speech-recognition", model="KevinGeng/whipser_
|
|
51 |
# @title English multi-speaker pretrained model { run: "auto" }
|
52 |
lang = "English"
|
53 |
vits_tag = "kan-bayashi/libritts_xvector_vits"
|
54 |
-
ft2_tag = "kan-bayashi/
|
55 |
-
# ft2_tag = "kan-bayashi/libritts_xvector_conformer_fastspeech2"
|
56 |
transformer_tag = "kan-bayashi/libritts_xvector_transformer"
|
57 |
|
58 |
# !!! vits needs no vocoder !!!
|
@@ -378,7 +360,7 @@ with gr.Blocks(
|
|
378 |
analytics_enabled=False,
|
379 |
css=".gradio-container {background-color: #78BD91}",
|
380 |
) as demo:
|
381 |
-
#
|
382 |
with gr.Tab("Open Version"):
|
383 |
with gr.Column(elem_id="Column"):
|
384 |
input_format = gr.Radio(
|
|
|
16 |
# local import
|
17 |
import sys
|
18 |
from espnet2.bin.tts_inference import Text2Speech
|
19 |
+
from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
|
20 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
21 |
|
22 |
sys.path.append("src")
|
23 |
|
24 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
26 |
|
27 |
processor = AutoProcessor.from_pretrained("KevinGeng/whipser_medium_en_PAL300_step25")
|
|
|
34 |
# @title English multi-speaker pretrained model { run: "auto" }
|
35 |
lang = "English"
|
36 |
vits_tag = "kan-bayashi/libritts_xvector_vits"
|
37 |
+
ft2_tag = "kan-bayashi/libritts_xvector_conformer_fastspeech2"
|
|
|
38 |
transformer_tag = "kan-bayashi/libritts_xvector_transformer"
|
39 |
|
40 |
# !!! vits needs no vocoder !!!
|
|
|
360 |
analytics_enabled=False,
|
361 |
css=".gradio-container {background-color: #78BD91}",
|
362 |
) as demo:
|
363 |
+
# Public Version
|
364 |
with gr.Tab("Open Version"):
|
365 |
with gr.Column(elem_id="Column"):
|
366 |
input_format = gr.Radio(
|