txya900619 commited on
Commit
784aace
·
1 Parent(s): e43ab87

feat: upload inference script

Browse files
Files changed (3) hide show
  1. app.py +69 -0
  2. configs/models.yaml +2 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import torch
5
+ from huggingface_hub import snapshot_download
6
+ from nemo.collections.asr.models import ASRModel
7
+ from omegaconf import OmegaConf
8
+
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
11
+
12
+
13
+ def load_model(model_id: str):
14
+ model_dir = snapshot_download(model_id)
15
+ model_ckpt_path = os.path.join(model_dir, "model.nemo")
16
+ asr_model = ASRModel.restore_from(model_ckpt_path)
17
+ asr_model.eval()
18
+ asr_model = asr_model.to(device)
19
+
20
+ return asr_model
21
+
22
+
23
+ OmegaConf.register_new_resolver("load_model", load_model)
24
+
25
+ models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
26
+
27
+
28
+ def automatic_speech_recognition(model_id: str, audio_file: str):
29
+ model = models_config[model_id]["model"]
30
+ text = model.transcribe(audio_file)
31
+ return text
32
+
33
+
34
+ demo = gr.Blocks(
35
+ title="康統語音辨識系統",
36
+ )
37
+
38
+ with demo:
39
+ default_model_id = list(models_config.keys())[0]
40
+ model_drop_down = gr.Dropdown(
41
+ models_config.keys(),
42
+ value=default_model_id,
43
+ label="模型",
44
+ )
45
+
46
+ gr.Markdown(
47
+ """
48
+ # 康統語音辨識系統
49
+ """
50
+ )
51
+ gr.Interface(
52
+ automatic_speech_recognition,
53
+ inputs=[
54
+ model_drop_down,
55
+ gr.Audio(
56
+ label="上傳或錄音",
57
+ type="filepath",
58
+ waveform_options=gr.WaveformOptions(
59
+ sample_rate=16000,
60
+ ),
61
+ ),
62
+ ],
63
+ outputs=[
64
+ gr.Text(interactive=False, label="辨識結果"),
65
+ ],
66
+ allow_flagging="auto",
67
+ )
68
+
69
+ demo.launch()
configs/models.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ parakeet-0.6b-custom-tokenizer:
2
+ model: ${load_model:kenkone/parakeet-ctc-0.6b-custom-tokenizer}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ omegaconf
2
+ git+https://github.com/NVIDIA/NeMo.git@main#egg=nemo_toolkit[asr]
3
+ IPython
4
+ numpy==1.*