Spaces:

ccmusic-database
/

pianos

Running

App Files Files

admin commited on Feb 7

Commit

2def64b

1 Parent(s): ff6c649

2 en

Browse files

Files changed (3) hide show

app.py +23 -24
model.py +1 -2
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -15,6 +15,18 @@ from model import net, MODEL_DIR
 MODEL = net()
 def most_common_element(input_list):
@@ -24,7 +36,7 @@ def most_common_element(input_list):
 def wav_to_mel(audio_path: str, width=0.18):
-    os.makedirs("./tmp")
     try:
         y, sr = librosa.load(audio_path, sr=48000)
         non_silent = y
@@ -40,7 +52,7 @@ def wav_to_mel(audio_path: str, width=0.18):
             librosa.display.specshow(log_mel_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
-                f"./tmp/{os.path.basename(audio_path)[:-4]}_{i}.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
@@ -62,12 +74,12 @@ def embed_img(img_path, input_size=224):
     return transform(img).unsqueeze(0)
-def inference(wav_path, folder_path="./tmp"):
     if os.path.exists(folder_path):
         shutil.rmtree(folder_path)
     if not wav_path:
-        return None, "请输入音频 Please input an audio!"
     wav_to_mel(wav_path)
     outputs = []
@@ -82,48 +94,35 @@ def inference(wav_path, folder_path="./tmp"):
     max_count_item = most_common_element(outputs)
     shutil.rmtree(folder_path)
-    return os.path.basename(wav_path), translate[classes[max_count_item]]
 if __name__ == "__main__":
     warnings.filterwarnings("ignore")
-    translate = {
-        "PearlRiver": "Pearl River",
-        "YoungChang": "YOUNG CHANG",
-        "Steinway-T": "STEINWAY Theater",
-        "Hsinghai": "HSINGHAI",
-        "Kawai": "KAWAI",
-        "Steinway": "STEINWAY",
-        "Kawai-G": "KAWAI Grand",
-        "Yamaha": "YAMAHA",
-    }
-    classes = list(translate.keys())
     example_wavs = []
-    for cls in classes:
         example_wavs.append(f"{MODEL_DIR}/examples/{cls}.wav")
     with gr.Blocks() as demo:
         gr.Interface(
             fn=inference,
-            inputs=gr.Audio(
-                type="filepath", label="上传钢琴录音 Upload a piano recording"
-            ),
             outputs=[
-                gr.Textbox(label="音频文件名 Audio filename", show_copy_button=True),
                 gr.Textbox(
-                    label="钢琴分类结果 Piano classification result",
                     show_copy_button=True,
                 ),
             ],
             examples=example_wavs,
             cache_examples=False,
             allow_flagging="never",
-            title="建议录音时长保持在 3s 左右, 过长会影响识别效率<br>It is recommended to keep the duration of recording around 3s, too long will affect the recognition efficiency.",
         )
         gr.Markdown(
             """
-# 引用 Cite
 ```bibtex
 @article{Zhou2023AHE,
   author    = {Monan Zhou and Shangda Wu and Shaohua Ji and Zijin Li and Wei Li},

 MODEL = net()
+TRANS = {
+    "PearlRiver": "Pearl River",
+    "YoungChang": "YOUNG CHANG",
+    "Steinway-T": "STEINWAY Theater",
+    "Hsinghai": "HSINGHAI",
+    "Kawai": "KAWAI",
+    "Steinway": "STEINWAY",
+    "Kawai-G": "KAWAI Grand",
+    "Yamaha": "YAMAHA",
+}
+CLASSES = list(TRANS.keys())
+CACHE_DIR = "./__pycache__/tmp"
 def most_common_element(input_list):
 def wav_to_mel(audio_path: str, width=0.18):
+    os.makedirs(CACHE_DIR, exist_ok=True)
     try:
         y, sr = librosa.load(audio_path, sr=48000)
         non_silent = y
             librosa.display.specshow(log_mel_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
+                f"{CACHE_DIR}/{os.path.basename(audio_path)[:-4]}_{i}.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
     return transform(img).unsqueeze(0)
+def inference(wav_path, folder_path=CACHE_DIR):
     if os.path.exists(folder_path):
         shutil.rmtree(folder_path)
     if not wav_path:
+        return None, "Please input an audio!"
     wav_to_mel(wav_path)
     outputs = []
     max_count_item = most_common_element(outputs)
     shutil.rmtree(folder_path)
+    return os.path.basename(wav_path), TRANS[CLASSES[max_count_item]]
 if __name__ == "__main__":
     warnings.filterwarnings("ignore")
     example_wavs = []
+    for cls in CLASSES:
         example_wavs.append(f"{MODEL_DIR}/examples/{cls}.wav")
     with gr.Blocks() as demo:
         gr.Interface(
             fn=inference,
+            inputs=gr.Audio(type="filepath", label="Upload a piano recording"),
             outputs=[
+                gr.Textbox(label="Audio filename", show_copy_button=True),
                 gr.Textbox(
+                    label="Piano classification result",
                     show_copy_button=True,
                 ),
             ],
             examples=example_wavs,
             cache_examples=False,
             allow_flagging="never",
+            title="It is recommended to keep the duration of recording around 3s, too long will affect the recognition efficiency.",
         )
         gr.Markdown(
             """
+# Cite
 ```bibtex
 @article{Zhou2023AHE,
   author    = {Monan Zhou and Shangda Wu and Shaohua Ji and Zijin Li and Wei Li},

model.py CHANGED Viewed

@@ -14,7 +14,6 @@ def Classifier(cls_num=8, output_size=512, linear_output=False):
     l1 = int(q * cls_num)
     l2 = int(q * l1)
     l3 = int(q * l2)
     if linear_output:
         return torch.nn.Sequential(
             nn.Dropout(),
@@ -45,7 +44,7 @@ def Classifier(cls_num=8, output_size=512, linear_output=False):
         )
-def net(weights=MODEL_DIR + "/save.pt"):
     model = squeezenet1_1(pretrained=False)
     model.classifier = Classifier()
     model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))

     l1 = int(q * cls_num)
     l2 = int(q * l1)
     l3 = int(q * l2)
     if linear_output:
         return torch.nn.Sequential(
             nn.Dropout(),
         )
+def net(weights=f"{MODEL_DIR}/save.pt"):
     model = squeezenet1_1(pretrained=False)
     model.classifier = Classifier()
     model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-librosa
 torch
 matplotlib
 torchvision
-pillow
-modelscope==1.15

 torch
+pillow
+librosa
 matplotlib
 torchvision
+modelscope[framework]==1.18