Spaces:
Runtime error
Runtime error
jaekookang
commited on
Commit
β’
49041a5
1
Parent(s):
a07e2df
first upload
Browse files- .gitignore +8 -0
- examples/gentleman.wav +0 -0
- examples/jaekoo_numbers.wav +0 -0
- examples/maybe_next_time.wav +0 -0
- examples/old_oily_rag.wav +0 -0
- gradio_asr_en_libri100.py +82 -0
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*~
|
2 |
+
__pycache__
|
3 |
+
*.log
|
4 |
+
*.db
|
5 |
+
*.nohup
|
6 |
+
.vscode
|
7 |
+
keyble_ssl/*
|
8 |
+
README_github.md
|
examples/gentleman.wav
ADDED
Binary file (153 kB). View file
|
|
examples/jaekoo_numbers.wav
ADDED
Binary file (218 kB). View file
|
|
examples/maybe_next_time.wav
ADDED
Binary file (25.7 kB). View file
|
|
examples/old_oily_rag.wav
ADDED
Binary file (67.8 kB). View file
|
|
gradio_asr_en_libri100.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''Librispeech 100h English ASR demo
|
2 |
+
|
3 |
+
@ML2
|
4 |
+
|
5 |
+
2022-02-11
|
6 |
+
'''
|
7 |
+
|
8 |
+
import os
|
9 |
+
from glob import glob
|
10 |
+
from loguru import logger
|
11 |
+
import soundfile as sf
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
from espnet_model_zoo.downloader import ModelDownloader
|
15 |
+
from espnet2.bin.asr_inference import Speech2Text
|
16 |
+
|
17 |
+
|
18 |
+
# ---------- Settings ----------
|
19 |
+
GPU_ID = '-1'
|
20 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = GPU_ID
|
21 |
+
DEVICE = 'cuda' if GPU_ID != '-1' else 'cpu'
|
22 |
+
|
23 |
+
SERVER_PORT = 42208
|
24 |
+
SERVER_NAME = "0.0.0.0"
|
25 |
+
|
26 |
+
SSL_DIR = './keyble_ssl'
|
27 |
+
MODEL_DIR = '/home/jkang/HDD4T/jkang/huggingface'
|
28 |
+
|
29 |
+
EXAMPLE_DIR = './examples'
|
30 |
+
examples = sorted(glob(os.path.join(EXAMPLE_DIR, '*.wav')))
|
31 |
+
|
32 |
+
# ---------- Logging ----------
|
33 |
+
logger.add('app.log', mode='a')
|
34 |
+
logger.info('============================= App restarted =============================')
|
35 |
+
|
36 |
+
# ---------- Model ----------
|
37 |
+
logger.info('download model')
|
38 |
+
d = ModelDownloader(MODEL_DIR)
|
39 |
+
out = d.download_and_unpack("jkang/espnet2_librispeech_100_conformer")
|
40 |
+
logger.info('model downloaded')
|
41 |
+
model = Speech2Text.from_pretrained(
|
42 |
+
asr_train_config=out['asr_train_config'],
|
43 |
+
asr_model_file=out['asr_model_file']
|
44 |
+
)
|
45 |
+
logger.info('model loaded')
|
46 |
+
|
47 |
+
def predict(wav_file):
|
48 |
+
logger.info('wav file loaded')
|
49 |
+
speech, rate = sf.read(wav_file)
|
50 |
+
nbests = model(speech)
|
51 |
+
text, *_ = nbests[0]
|
52 |
+
logger.info('predicted')
|
53 |
+
return text
|
54 |
+
|
55 |
+
iface = gr.Interface(
|
56 |
+
predict,
|
57 |
+
title='μμ΄ μμ±μΈμ λ°λͺ¨ (espnet libri100) -- νλ‘ν νμ
',
|
58 |
+
description='μμ΄ μμ± νμΌμ μ
λ‘λνλ©΄ ν
μ€νΈ λ΄μ©μ κ²°κ³Όλ‘ λ³΄μ¬μ€λλ€.',
|
59 |
+
inputs=[
|
60 |
+
gr.inputs.Audio(label='μμ΄ μμ±', source='upload', type='filepath')
|
61 |
+
],
|
62 |
+
outputs=[
|
63 |
+
gr.outputs.Textbox(label='μμ± μΈμ λμ½λ©κ²°κ³Ό'),
|
64 |
+
],
|
65 |
+
examples=examples,
|
66 |
+
article='<p style="text-align:center">i-Scream AI</p>',
|
67 |
+
)
|
68 |
+
|
69 |
+
if __name__ == '__main__':
|
70 |
+
try:
|
71 |
+
iface.launch(debug=True,
|
72 |
+
server_name=SERVER_NAME,
|
73 |
+
server_port=SERVER_PORT,
|
74 |
+
enable_queue=True,
|
75 |
+
# ssl_keyfile=SSL_DIR,
|
76 |
+
# ssl_certfile=SSL_DIR
|
77 |
+
)
|
78 |
+
except KeyboardInterrupt as e:
|
79 |
+
print(e)
|
80 |
+
|
81 |
+
finally:
|
82 |
+
iface.close()
|