Spaces:

VOJ
/

voj

Running

App Files Files Community

amroa commited on May 31, 2024

Commit

6ded705

1 Parent(s): faad4ba

averaging over 5 folds

Browse files

Files changed (3) hide show

__pycache__/app.cpython-311.pyc +0 -0
app.py +13 -5
example.jpg +0 -0

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ from transformers import ASTFeatureExtractor
 FEATURE_EXTRACTOR  = ASTFeatureExtractor()
 def plot_mel(sr, x):
-    mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=224, fmax=10000)
     mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
     mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
     mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1)  # Convert to 3-channel
@@ -54,7 +54,9 @@ def plot_wave(sr, x):
 def predict(audio, start, end):
     sr, x = audio
-    x = np.array(x, dtype=np.float64)/32768.0
     res = preprocess_for_inference(x, sr)
     if start >= end:
@@ -102,8 +104,10 @@ label_mapping = pd.read_csv('BirdAST_Baseline_5folds_label_map.csv')
 species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
 def preprocess_for_inference(audio_arr, sr):
     spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
     input_values = spec['input_values']  # Get the input values prepared for model input
     # Initialize a list to store predictions from all models
     model_outputs = []
@@ -114,16 +118,20 @@ def preprocess_for_inference(audio_arr, sr):
             output = model(input_values)
             predict_score = F.softmax(output['logits'], dim=1)
             model_outputs.append(predict_score)
         # Average the predictions across all models
-        avg_predictions = torch.mean(torch.stack(model_outputs), dim=0)
         # Get the top 10 predictions based on the average prediction scores
-        topk_values, topk_indices = torch.topk(avg_predictions, 10, dim=1)
         # Initialize results list to store the species names and their associated probabilities
         results = []
-        for idx, scores in zip(topk_indices[0], topk_values[0]):
             species_name = species_id_to_name[idx.item()]
             probability = scores.item()
             results.append([species_name, probability])

 FEATURE_EXTRACTOR  = ASTFeatureExtractor()
 def plot_mel(sr, x):
+    mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128, fmax=10000)
     mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
     mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
     mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1)  # Convert to 3-channel
 def predict(audio, start, end):
     sr, x = audio
+    x = np.array(x, dtype=np.float32)/32768.0
+    x = x[start*sr : end*sr]
     res = preprocess_for_inference(x, sr)
     if start >= end:
 species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
 def preprocess_for_inference(audio_arr, sr):
+    print(sr)
     spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
     input_values = spec['input_values']  # Get the input values prepared for model input
     # Initialize a list to store predictions from all models
     model_outputs = []
             output = model(input_values)
             predict_score = F.softmax(output['logits'], dim=1)
             model_outputs.append(predict_score)
+            print(predict_score[0, 434])
         # Average the predictions across all models
+        avg_predictions = torch.mean(torch.cat(model_outputs), dim=0) #.values
+        print(avg_predictions[434])
         # Get the top 10 predictions based on the average prediction scores
+        topk_values, topk_indices = torch.topk(avg_predictions, 10)
+        print(topk_values.shape, topk_indices.shape)
         # Initialize results list to store the species names and their associated probabilities
         results = []
+        for idx, scores in zip(topk_indices, topk_values):
             species_name = species_id_to_name[idx.item()]
             probability = scores.item()
             results.append([species_name, probability])

example.jpg ADDED Viewed