admin commited on
Commit
2def64b
·
1 Parent(s): ff6c649
Files changed (3) hide show
  1. app.py +23 -24
  2. model.py +1 -2
  3. requirements.txt +3 -3
app.py CHANGED
@@ -15,6 +15,18 @@ from model import net, MODEL_DIR
15
 
16
 
17
  MODEL = net()
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def most_common_element(input_list):
@@ -24,7 +36,7 @@ def most_common_element(input_list):
24
 
25
 
26
  def wav_to_mel(audio_path: str, width=0.18):
27
- os.makedirs("./tmp")
28
  try:
29
  y, sr = librosa.load(audio_path, sr=48000)
30
  non_silent = y
@@ -40,7 +52,7 @@ def wav_to_mel(audio_path: str, width=0.18):
40
  librosa.display.specshow(log_mel_spec[:, i : i + step])
41
  plt.axis("off")
42
  plt.savefig(
43
- f"./tmp/{os.path.basename(audio_path)[:-4]}_{i}.jpg",
44
  bbox_inches="tight",
45
  pad_inches=0.0,
46
  )
@@ -62,12 +74,12 @@ def embed_img(img_path, input_size=224):
62
  return transform(img).unsqueeze(0)
63
 
64
 
65
- def inference(wav_path, folder_path="./tmp"):
66
  if os.path.exists(folder_path):
67
  shutil.rmtree(folder_path)
68
 
69
  if not wav_path:
70
- return None, "请输入音频 Please input an audio!"
71
 
72
  wav_to_mel(wav_path)
73
  outputs = []
@@ -82,48 +94,35 @@ def inference(wav_path, folder_path="./tmp"):
82
 
83
  max_count_item = most_common_element(outputs)
84
  shutil.rmtree(folder_path)
85
- return os.path.basename(wav_path), translate[classes[max_count_item]]
86
 
87
 
88
  if __name__ == "__main__":
89
  warnings.filterwarnings("ignore")
90
- translate = {
91
- "PearlRiver": "Pearl River",
92
- "YoungChang": "YOUNG CHANG",
93
- "Steinway-T": "STEINWAY Theater",
94
- "Hsinghai": "HSINGHAI",
95
- "Kawai": "KAWAI",
96
- "Steinway": "STEINWAY",
97
- "Kawai-G": "KAWAI Grand",
98
- "Yamaha": "YAMAHA",
99
- }
100
- classes = list(translate.keys())
101
  example_wavs = []
102
- for cls in classes:
103
  example_wavs.append(f"{MODEL_DIR}/examples/{cls}.wav")
104
 
105
  with gr.Blocks() as demo:
106
  gr.Interface(
107
  fn=inference,
108
- inputs=gr.Audio(
109
- type="filepath", label="上传钢琴录音 Upload a piano recording"
110
- ),
111
  outputs=[
112
- gr.Textbox(label="音频文件名 Audio filename", show_copy_button=True),
113
  gr.Textbox(
114
- label="钢琴分类结果 Piano classification result",
115
  show_copy_button=True,
116
  ),
117
  ],
118
  examples=example_wavs,
119
  cache_examples=False,
120
  allow_flagging="never",
121
- title="建议录音时长保持在 3s 左右, 过长会影响识别效率<br>It is recommended to keep the duration of recording around 3s, too long will affect the recognition efficiency.",
122
  )
123
 
124
  gr.Markdown(
125
  """
126
- # 引用 Cite
127
  ```bibtex
128
  @article{Zhou2023AHE,
129
  author = {Monan Zhou and Shangda Wu and Shaohua Ji and Zijin Li and Wei Li},
 
15
 
16
 
17
  MODEL = net()
18
+ TRANS = {
19
+ "PearlRiver": "Pearl River",
20
+ "YoungChang": "YOUNG CHANG",
21
+ "Steinway-T": "STEINWAY Theater",
22
+ "Hsinghai": "HSINGHAI",
23
+ "Kawai": "KAWAI",
24
+ "Steinway": "STEINWAY",
25
+ "Kawai-G": "KAWAI Grand",
26
+ "Yamaha": "YAMAHA",
27
+ }
28
+ CLASSES = list(TRANS.keys())
29
+ CACHE_DIR = "./__pycache__/tmp"
30
 
31
 
32
  def most_common_element(input_list):
 
36
 
37
 
38
  def wav_to_mel(audio_path: str, width=0.18):
39
+ os.makedirs(CACHE_DIR, exist_ok=True)
40
  try:
41
  y, sr = librosa.load(audio_path, sr=48000)
42
  non_silent = y
 
52
  librosa.display.specshow(log_mel_spec[:, i : i + step])
53
  plt.axis("off")
54
  plt.savefig(
55
+ f"{CACHE_DIR}/{os.path.basename(audio_path)[:-4]}_{i}.jpg",
56
  bbox_inches="tight",
57
  pad_inches=0.0,
58
  )
 
74
  return transform(img).unsqueeze(0)
75
 
76
 
77
+ def inference(wav_path, folder_path=CACHE_DIR):
78
  if os.path.exists(folder_path):
79
  shutil.rmtree(folder_path)
80
 
81
  if not wav_path:
82
+ return None, "Please input an audio!"
83
 
84
  wav_to_mel(wav_path)
85
  outputs = []
 
94
 
95
  max_count_item = most_common_element(outputs)
96
  shutil.rmtree(folder_path)
97
+ return os.path.basename(wav_path), TRANS[CLASSES[max_count_item]]
98
 
99
 
100
  if __name__ == "__main__":
101
  warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
102
  example_wavs = []
103
+ for cls in CLASSES:
104
  example_wavs.append(f"{MODEL_DIR}/examples/{cls}.wav")
105
 
106
  with gr.Blocks() as demo:
107
  gr.Interface(
108
  fn=inference,
109
+ inputs=gr.Audio(type="filepath", label="Upload a piano recording"),
 
 
110
  outputs=[
111
+ gr.Textbox(label="Audio filename", show_copy_button=True),
112
  gr.Textbox(
113
+ label="Piano classification result",
114
  show_copy_button=True,
115
  ),
116
  ],
117
  examples=example_wavs,
118
  cache_examples=False,
119
  allow_flagging="never",
120
+ title="It is recommended to keep the duration of recording around 3s, too long will affect the recognition efficiency.",
121
  )
122
 
123
  gr.Markdown(
124
  """
125
+ # Cite
126
  ```bibtex
127
  @article{Zhou2023AHE,
128
  author = {Monan Zhou and Shangda Wu and Shaohua Ji and Zijin Li and Wei Li},
model.py CHANGED
@@ -14,7 +14,6 @@ def Classifier(cls_num=8, output_size=512, linear_output=False):
14
  l1 = int(q * cls_num)
15
  l2 = int(q * l1)
16
  l3 = int(q * l2)
17
-
18
  if linear_output:
19
  return torch.nn.Sequential(
20
  nn.Dropout(),
@@ -45,7 +44,7 @@ def Classifier(cls_num=8, output_size=512, linear_output=False):
45
  )
46
 
47
 
48
- def net(weights=MODEL_DIR + "/save.pt"):
49
  model = squeezenet1_1(pretrained=False)
50
  model.classifier = Classifier()
51
  model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))
 
14
  l1 = int(q * cls_num)
15
  l2 = int(q * l1)
16
  l3 = int(q * l2)
 
17
  if linear_output:
18
  return torch.nn.Sequential(
19
  nn.Dropout(),
 
44
  )
45
 
46
 
47
+ def net(weights=f"{MODEL_DIR}/save.pt"):
48
  model = squeezenet1_1(pretrained=False)
49
  model.classifier = Classifier()
50
  model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- librosa
2
  torch
 
 
3
  matplotlib
4
  torchvision
5
- pillow
6
- modelscope==1.15
 
 
1
  torch
2
+ pillow
3
+ librosa
4
  matplotlib
5
  torchvision
6
+ modelscope[framework]==1.18