Spaces:
Running
Running
admin
commited on
Commit
·
2def64b
1
Parent(s):
ff6c649
2 en
Browse files- app.py +23 -24
- model.py +1 -2
- requirements.txt +3 -3
app.py
CHANGED
@@ -15,6 +15,18 @@ from model import net, MODEL_DIR
|
|
15 |
|
16 |
|
17 |
MODEL = net()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def most_common_element(input_list):
|
@@ -24,7 +36,7 @@ def most_common_element(input_list):
|
|
24 |
|
25 |
|
26 |
def wav_to_mel(audio_path: str, width=0.18):
|
27 |
-
os.makedirs(
|
28 |
try:
|
29 |
y, sr = librosa.load(audio_path, sr=48000)
|
30 |
non_silent = y
|
@@ -40,7 +52,7 @@ def wav_to_mel(audio_path: str, width=0.18):
|
|
40 |
librosa.display.specshow(log_mel_spec[:, i : i + step])
|
41 |
plt.axis("off")
|
42 |
plt.savefig(
|
43 |
-
f"
|
44 |
bbox_inches="tight",
|
45 |
pad_inches=0.0,
|
46 |
)
|
@@ -62,12 +74,12 @@ def embed_img(img_path, input_size=224):
|
|
62 |
return transform(img).unsqueeze(0)
|
63 |
|
64 |
|
65 |
-
def inference(wav_path, folder_path=
|
66 |
if os.path.exists(folder_path):
|
67 |
shutil.rmtree(folder_path)
|
68 |
|
69 |
if not wav_path:
|
70 |
-
return None, "
|
71 |
|
72 |
wav_to_mel(wav_path)
|
73 |
outputs = []
|
@@ -82,48 +94,35 @@ def inference(wav_path, folder_path="./tmp"):
|
|
82 |
|
83 |
max_count_item = most_common_element(outputs)
|
84 |
shutil.rmtree(folder_path)
|
85 |
-
return os.path.basename(wav_path),
|
86 |
|
87 |
|
88 |
if __name__ == "__main__":
|
89 |
warnings.filterwarnings("ignore")
|
90 |
-
translate = {
|
91 |
-
"PearlRiver": "Pearl River",
|
92 |
-
"YoungChang": "YOUNG CHANG",
|
93 |
-
"Steinway-T": "STEINWAY Theater",
|
94 |
-
"Hsinghai": "HSINGHAI",
|
95 |
-
"Kawai": "KAWAI",
|
96 |
-
"Steinway": "STEINWAY",
|
97 |
-
"Kawai-G": "KAWAI Grand",
|
98 |
-
"Yamaha": "YAMAHA",
|
99 |
-
}
|
100 |
-
classes = list(translate.keys())
|
101 |
example_wavs = []
|
102 |
-
for cls in
|
103 |
example_wavs.append(f"{MODEL_DIR}/examples/{cls}.wav")
|
104 |
|
105 |
with gr.Blocks() as demo:
|
106 |
gr.Interface(
|
107 |
fn=inference,
|
108 |
-
inputs=gr.Audio(
|
109 |
-
type="filepath", label="上传钢琴录音 Upload a piano recording"
|
110 |
-
),
|
111 |
outputs=[
|
112 |
-
gr.Textbox(label="
|
113 |
gr.Textbox(
|
114 |
-
label="
|
115 |
show_copy_button=True,
|
116 |
),
|
117 |
],
|
118 |
examples=example_wavs,
|
119 |
cache_examples=False,
|
120 |
allow_flagging="never",
|
121 |
-
title="
|
122 |
)
|
123 |
|
124 |
gr.Markdown(
|
125 |
"""
|
126 |
-
#
|
127 |
```bibtex
|
128 |
@article{Zhou2023AHE,
|
129 |
author = {Monan Zhou and Shangda Wu and Shaohua Ji and Zijin Li and Wei Li},
|
|
|
15 |
|
16 |
|
17 |
MODEL = net()
|
18 |
+
TRANS = {
|
19 |
+
"PearlRiver": "Pearl River",
|
20 |
+
"YoungChang": "YOUNG CHANG",
|
21 |
+
"Steinway-T": "STEINWAY Theater",
|
22 |
+
"Hsinghai": "HSINGHAI",
|
23 |
+
"Kawai": "KAWAI",
|
24 |
+
"Steinway": "STEINWAY",
|
25 |
+
"Kawai-G": "KAWAI Grand",
|
26 |
+
"Yamaha": "YAMAHA",
|
27 |
+
}
|
28 |
+
CLASSES = list(TRANS.keys())
|
29 |
+
CACHE_DIR = "./__pycache__/tmp"
|
30 |
|
31 |
|
32 |
def most_common_element(input_list):
|
|
|
36 |
|
37 |
|
38 |
def wav_to_mel(audio_path: str, width=0.18):
|
39 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
40 |
try:
|
41 |
y, sr = librosa.load(audio_path, sr=48000)
|
42 |
non_silent = y
|
|
|
52 |
librosa.display.specshow(log_mel_spec[:, i : i + step])
|
53 |
plt.axis("off")
|
54 |
plt.savefig(
|
55 |
+
f"{CACHE_DIR}/{os.path.basename(audio_path)[:-4]}_{i}.jpg",
|
56 |
bbox_inches="tight",
|
57 |
pad_inches=0.0,
|
58 |
)
|
|
|
74 |
return transform(img).unsqueeze(0)
|
75 |
|
76 |
|
77 |
+
def inference(wav_path, folder_path=CACHE_DIR):
|
78 |
if os.path.exists(folder_path):
|
79 |
shutil.rmtree(folder_path)
|
80 |
|
81 |
if not wav_path:
|
82 |
+
return None, "Please input an audio!"
|
83 |
|
84 |
wav_to_mel(wav_path)
|
85 |
outputs = []
|
|
|
94 |
|
95 |
max_count_item = most_common_element(outputs)
|
96 |
shutil.rmtree(folder_path)
|
97 |
+
return os.path.basename(wav_path), TRANS[CLASSES[max_count_item]]
|
98 |
|
99 |
|
100 |
if __name__ == "__main__":
|
101 |
warnings.filterwarnings("ignore")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
example_wavs = []
|
103 |
+
for cls in CLASSES:
|
104 |
example_wavs.append(f"{MODEL_DIR}/examples/{cls}.wav")
|
105 |
|
106 |
with gr.Blocks() as demo:
|
107 |
gr.Interface(
|
108 |
fn=inference,
|
109 |
+
inputs=gr.Audio(type="filepath", label="Upload a piano recording"),
|
|
|
|
|
110 |
outputs=[
|
111 |
+
gr.Textbox(label="Audio filename", show_copy_button=True),
|
112 |
gr.Textbox(
|
113 |
+
label="Piano classification result",
|
114 |
show_copy_button=True,
|
115 |
),
|
116 |
],
|
117 |
examples=example_wavs,
|
118 |
cache_examples=False,
|
119 |
allow_flagging="never",
|
120 |
+
title="It is recommended to keep the duration of recording around 3s, too long will affect the recognition efficiency.",
|
121 |
)
|
122 |
|
123 |
gr.Markdown(
|
124 |
"""
|
125 |
+
# Cite
|
126 |
```bibtex
|
127 |
@article{Zhou2023AHE,
|
128 |
author = {Monan Zhou and Shangda Wu and Shaohua Ji and Zijin Li and Wei Li},
|
model.py
CHANGED
@@ -14,7 +14,6 @@ def Classifier(cls_num=8, output_size=512, linear_output=False):
|
|
14 |
l1 = int(q * cls_num)
|
15 |
l2 = int(q * l1)
|
16 |
l3 = int(q * l2)
|
17 |
-
|
18 |
if linear_output:
|
19 |
return torch.nn.Sequential(
|
20 |
nn.Dropout(),
|
@@ -45,7 +44,7 @@ def Classifier(cls_num=8, output_size=512, linear_output=False):
|
|
45 |
)
|
46 |
|
47 |
|
48 |
-
def net(weights=MODEL_DIR
|
49 |
model = squeezenet1_1(pretrained=False)
|
50 |
model.classifier = Classifier()
|
51 |
model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))
|
|
|
14 |
l1 = int(q * cls_num)
|
15 |
l2 = int(q * l1)
|
16 |
l3 = int(q * l2)
|
|
|
17 |
if linear_output:
|
18 |
return torch.nn.Sequential(
|
19 |
nn.Dropout(),
|
|
|
44 |
)
|
45 |
|
46 |
|
47 |
+
def net(weights=f"{MODEL_DIR}/save.pt"):
|
48 |
model = squeezenet1_1(pretrained=False)
|
49 |
model.classifier = Classifier()
|
50 |
model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
librosa
|
2 |
torch
|
|
|
|
|
3 |
matplotlib
|
4 |
torchvision
|
5 |
-
|
6 |
-
modelscope==1.15
|
|
|
|
|
1 |
torch
|
2 |
+
pillow
|
3 |
+
librosa
|
4 |
matplotlib
|
5 |
torchvision
|
6 |
+
modelscope[framework]==1.18
|
|