{ "_name_or_path": "MIT/ast-finetuned-audioset-10-10-0.450", "architectures": [ "ASTForAudioClassification" ], "attention_probs_dropout_prob": 0.0, "frequency_stride": 10, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "dog", "1": "rooster", "2": "pig", "3": "cow", "4": "frog", "5": "cat", "6": "hen", "7": "insects", "8": "sheep", "9": "crow", "10": "rain", "11": "sea_waves", "12": "crackling_fire", "13": "crickets", "14": "chirping_birds", "15": "water_drops", "16": "wind", "17": "pouring_water", "18": "toilet_flush", "19": "thunderstorm", "20": "crying_baby", "21": "sneezing", "22": "clapping", "23": "breathing", "24": "coughing", "25": "footsteps", "26": "laughing", "27": "brushing_teeth", "28": "snoring", "29": "drinking_sipping", "30": "door_wood_knock", "31": "mouse_click", "32": "keyboard_typing", "33": "door_wood_creaks", "34": "can_opening", "35": "washing_machine", "36": "vacuum_cleaner", "37": "clock_alarm", "38": "clock_tick", "39": "glass_breaking", "40": "helicopter", "41": "chainsaw", "42": "siren", "43": "car_horn", "44": "engine", "45": "train", "46": "church_bells", "47": "airplane", "48": "fireworks", "49": "hand_saw" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "airplane": 47, "breathing": 23, "brushing_teeth": 27, "can_opening": 34, "car_horn": 43, "cat": 5, "chainsaw": 41, "chirping_birds": 14, "church_bells": 46, "clapping": 22, "clock_alarm": 37, "clock_tick": 38, "coughing": 24, "cow": 3, "crackling_fire": 12, "crickets": 13, "crow": 9, "crying_baby": 20, "dog": 0, "door_wood_creaks": 33, "door_wood_knock": 30, "drinking_sipping": 29, "engine": 44, "fireworks": 48, "footsteps": 25, "frog": 4, "glass_breaking": 39, "hand_saw": 49, "helicopter": 40, "hen": 6, "insects": 7, "keyboard_typing": 32, "laughing": 26, "mouse_click": 31, "pig": 2, "pouring_water": 17, "rain": 10, "rooster": 1, "sea_waves": 11, "sheep": 8, "siren": 42, "sneezing": 21, "snoring": 28, "thunderstorm": 19, "toilet_flush": 18, "train": 45, "vacuum_cleaner": 36, "washing_machine": 35, "water_drops": 15, "wind": 16 }, "layer_norm_eps": 1e-12, "max_length": 1024, "model_type": "audio-spectrogram-transformer", "num_attention_heads": 12, "num_hidden_layers": 12, "num_mel_bins": 128, "patch_size": 16, "problem_type": "single_label_classification", "qkv_bias": true, "time_stride": 10, "torch_dtype": "float32", "transformers_version": "4.39.3" }