Spaces:

VOJ
/

voj

Sleeping

App Files Files Community

amroa commited on Jun 13, 2024

Commit

d73fb39

•

1 Parent(s): dcf7d14

update weights

Browse files

Files changed (4) hide show

__pycache__/app.cpython-311.pyc +0 -0
app.py +12 -2
birdvec.py +95 -0
fetch_img.py +0 -3

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ from fetch_img import download_images, scientific_to_species_code
 from audio_class_predictor import predict_class
 from bird_ast_model import birdast_preprocess, birdast_inference
 from bird_ast_seq_model import birdast_seq_preprocess, birdast_seq_inference
 from utils import plot_wave, plot_mel, download_model, bandpass_filter
 # Define the default parameters
@@ -60,10 +60,20 @@ birdast_seq_assets = {
     "inference_fn": birdast_seq_inference,
 }
 # maintain a dictionary of assets
 ASSET_DICT = {
     "BirdAST": birdast_assets,
     "BirdAST_Seq": birdast_seq_assets,
 }
@@ -251,7 +261,7 @@ with gr.Blocks(theme = seafoam, css = css, js = js) as demo:
     gr.Markdown(DESCRIPTION)
     # add dropdown for model selection
-    model_names = ['BirdAST', 'BirdAST_Seq'] #, 'EfficientNet']
     model_dropdown = gr.Dropdown(label="Choose a model", choices=model_names)
     download_status = gr.Textbox(label="Model Status", lines=3, value='', interactive=False) # Non-interactive textbox for status
     model_dropdown.change(handle_model_selection, inputs=[model_dropdown, download_status], outputs=download_status)

 from audio_class_predictor import predict_class
 from bird_ast_model import birdast_preprocess, birdast_inference
 from bird_ast_seq_model import birdast_seq_preprocess, birdast_seq_inference
+from birdvec import birdvec_preprocess, birdvec_inference
 from utils import plot_wave, plot_mel, download_model, bandpass_filter
 # Define the default parameters
     "inference_fn": birdast_seq_inference,
 }
+birdvec_assets = {
+    "model_weights": [
+        f"https://huggingface.co/amroa/BirdVec/resolve/main/fold{i}/best-model{i}.ckpt" for i in range(3)
+    ],
+    "label_mapping": "https://huggingface.co/amroa/BirdVec/resolve/main/new_label_map.csv",
+    "preprocess_fn": birdvec_preprocess,
+    "inference_fn": birdvec_inference,
+}
 # maintain a dictionary of assets
 ASSET_DICT = {
     "BirdAST": birdast_assets,
     "BirdAST_Seq": birdast_seq_assets,
+    "BirdWav2Vec": birdvec_assets,
 }
     gr.Markdown(DESCRIPTION)
     # add dropdown for model selection
+    model_names = ['BirdAST', 'BirdAST_Seq', 'BirdWav2Vec'] #, 'EfficientNet']
     model_dropdown = gr.Dropdown(label="Choose a model", choices=model_names)
     download_status = gr.Textbox(label="Model Status", lines=3, value='', interactive=False) # Non-interactive textbox for status
     model_dropdown.change(handle_model_selection, inputs=[model_dropdown, download_status], outputs=download_status)

birdvec.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+import pytorch_lightning as pl
+from transformers import AutoConfig, AutoFeatureExtractor, AutoModelForAudioClassification
+DEFAULT_SR = 16_000
+DEFAULT_BACKBONE = "MIT/ast-finetuned-audioset-10-10-0.4593"
+DEFAULT_N_CLASSES = 728
+MODEL_STR = "dima806/bird_sounds_classification" #"facebook/wav2vec2-base-960h"
+RATE_HZ = 16000
+# Define the maximum audio interval length to consider in seconds
+MAX_SECONDS = 10
+# Calculate the maximum audio interval length in samples by multiplying the rate and seconds
+MAX_LENGTH = RATE_HZ * MAX_SECONDS
+# Create an instance of the feature extractor for audio.
+FEATURE_EXTRACTOR = AutoFeatureExtractor.from_pretrained(MODEL_STR)
+def birdvec_preprocess(audio_array, sr=DEFAULT_SR):
+    """
+    Preprocess audio array for BirdAST model
+    audio_array: np.array, audio array of the recording, shape (n_samples,) Note: The audio array should be normalized to [-1, 1]
+    sr: int, sampling rate of the audio array (default: 16_000)
+    Note:
+    1. The audio array should be normalized to [-1, 1].
+    2. The audio length should be 10 seconds (or 10.24 seconds). Longer audio will be truncated.
+    """
+    # Extract features
+    features = FEATURE_EXTRACTOR(audio_array, sampling_rate=DEFAULT_SR, max_length=MAX_LENGTH, truncation=True, return_tensors="pt")
+    return features.input_values
+def birdvec_inference(
+    model_weights,
+    spectrogram,
+    device = 'cpu',
+    backbone_name=None,
+    n_classes=728,
+    activation=None,
+    n_mlp_layers=None
+    ):
+    """
+    Perform inference on BirdAST model
+    model_weights: list, list of model weights
+    spectrogram: torch.Tensor, spectrogram tensor, shape (batch_size, n_frames, n_mels,)
+    device: str, device to run inference (default: 'cpu')
+    backbone_name: str, name of the backbone model (default: 'MIT/ast-finetuned-audioset-10-10-0.4593')
+    n_classes: int, number of classes (default: 728)
+    activation: str, activation function (default: 'silu')
+    n_mlp_layers: int, number of MLP layers (default: 1)
+    Returns:
+    predictions: np.array, array of predictions, shape (n_models, batch_size, n_classes)
+    """
+    predict_collects = []
+    for _weights in model_weights:
+        #model.load_state_dict(torch.load(_weights, map_location=device)['state_dict'])
+        model = BirdSongClassifier.load_from_checkpoint(_weights, map_location=device, class_weights = None)
+        if device != 'cpu': model.to(device)
+        model.eval()
+        with torch.no_grad():
+            if device != 'cpu': spectrogram = spectrogram.to(device)
+            output = model(spectrogram)
+            logits = output['logits']
+            probs = F.softmax(logits, dim=-1)
+            predict_collects.append(probs)
+    if device != 'cpu':
+        predict_collects = [pred.cpu() for pred in predict_collects]
+    predict_collects = torch.cat(predict_collects, dim=0).numpy()
+    return predict_collects
+class BirdSongClassifier(pl.LightningModule):
+    def __init__(self, class_weights):
+        super().__init__()
+        config = AutoConfig.from_pretrained("dima806/bird_sounds_classification")
+        config.num_labels = 728
+        self.model = AutoModelForAudioClassification.from_config(config)
+    def forward(self, x):
+        return self.model(x)

fetch_img.py CHANGED Viewed

@@ -13,9 +13,6 @@ REQ_FMT =  {
     "url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
     "params" : {
         'species': 'CHANGE THIS TO SPECIES CODE'
-    },
-    "headers" : {
-        'X-eBirdApiToken': 'id1a0e3q2lt3'
     }
 }
 bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")

     "url": 'https://api.ebird.org/v2/ref/taxonomy/ebird',
     "params" : {
         'species': 'CHANGE THIS TO SPECIES CODE'
     }
 }
 bird_df = pd.read_csv("ebird_taxonomy_v2023.csv")