amroa commited on
Commit
6ded705
·
1 Parent(s): faad4ba

averaging over 5 folds

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-311.pyc +0 -0
  2. app.py +13 -5
  3. example.jpg +0 -0
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -25,7 +25,7 @@ from transformers import ASTFeatureExtractor
25
  FEATURE_EXTRACTOR = ASTFeatureExtractor()
26
 
27
  def plot_mel(sr, x):
28
- mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=224, fmax=10000)
29
  mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
30
  mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
31
  mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1) # Convert to 3-channel
@@ -54,7 +54,9 @@ def plot_wave(sr, x):
54
 
55
  def predict(audio, start, end):
56
  sr, x = audio
57
- x = np.array(x, dtype=np.float64)/32768.0
 
 
58
  res = preprocess_for_inference(x, sr)
59
 
60
  if start >= end:
@@ -102,8 +104,10 @@ label_mapping = pd.read_csv('BirdAST_Baseline_5folds_label_map.csv')
102
  species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
103
 
104
  def preprocess_for_inference(audio_arr, sr):
 
105
  spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
106
  input_values = spec['input_values'] # Get the input values prepared for model input
 
107
 
108
  # Initialize a list to store predictions from all models
109
  model_outputs = []
@@ -114,16 +118,20 @@ def preprocess_for_inference(audio_arr, sr):
114
  output = model(input_values)
115
  predict_score = F.softmax(output['logits'], dim=1)
116
  model_outputs.append(predict_score)
 
 
117
 
118
  # Average the predictions across all models
119
- avg_predictions = torch.mean(torch.stack(model_outputs), dim=0)
 
120
 
121
  # Get the top 10 predictions based on the average prediction scores
122
- topk_values, topk_indices = torch.topk(avg_predictions, 10, dim=1)
 
123
 
124
  # Initialize results list to store the species names and their associated probabilities
125
  results = []
126
- for idx, scores in zip(topk_indices[0], topk_values[0]):
127
  species_name = species_id_to_name[idx.item()]
128
  probability = scores.item()
129
  results.append([species_name, probability])
 
25
  FEATURE_EXTRACTOR = ASTFeatureExtractor()
26
 
27
  def plot_mel(sr, x):
28
+ mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128, fmax=10000)
29
  mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
30
  mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
31
  mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1) # Convert to 3-channel
 
54
 
55
  def predict(audio, start, end):
56
  sr, x = audio
57
+
58
+ x = np.array(x, dtype=np.float32)/32768.0
59
+ x = x[start*sr : end*sr]
60
  res = preprocess_for_inference(x, sr)
61
 
62
  if start >= end:
 
104
  species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
105
 
106
  def preprocess_for_inference(audio_arr, sr):
107
+ print(sr)
108
  spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
109
  input_values = spec['input_values'] # Get the input values prepared for model input
110
+
111
 
112
  # Initialize a list to store predictions from all models
113
  model_outputs = []
 
118
  output = model(input_values)
119
  predict_score = F.softmax(output['logits'], dim=1)
120
  model_outputs.append(predict_score)
121
+ print(predict_score[0, 434])
122
+
123
 
124
  # Average the predictions across all models
125
+ avg_predictions = torch.mean(torch.cat(model_outputs), dim=0) #.values
126
+ print(avg_predictions[434])
127
 
128
  # Get the top 10 predictions based on the average prediction scores
129
+ topk_values, topk_indices = torch.topk(avg_predictions, 10)
130
+ print(topk_values.shape, topk_indices.shape)
131
 
132
  # Initialize results list to store the species names and their associated probabilities
133
  results = []
134
+ for idx, scores in zip(topk_indices, topk_values):
135
  species_name = species_id_to_name[idx.item()]
136
  probability = scores.item()
137
  results.append([species_name, probability])
example.jpg ADDED