averaging over 5 folds
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- app.py +13 -5
- example.jpg +0 -0
__pycache__/app.cpython-311.pyc
CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -25,7 +25,7 @@ from transformers import ASTFeatureExtractor
|
|
25 |
FEATURE_EXTRACTOR = ASTFeatureExtractor()
|
26 |
|
27 |
def plot_mel(sr, x):
|
28 |
-
mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=
|
29 |
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
|
30 |
mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
|
31 |
mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1) # Convert to 3-channel
|
@@ -54,7 +54,9 @@ def plot_wave(sr, x):
|
|
54 |
|
55 |
def predict(audio, start, end):
|
56 |
sr, x = audio
|
57 |
-
|
|
|
|
|
58 |
res = preprocess_for_inference(x, sr)
|
59 |
|
60 |
if start >= end:
|
@@ -102,8 +104,10 @@ label_mapping = pd.read_csv('BirdAST_Baseline_5folds_label_map.csv')
|
|
102 |
species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
|
103 |
|
104 |
def preprocess_for_inference(audio_arr, sr):
|
|
|
105 |
spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
|
106 |
input_values = spec['input_values'] # Get the input values prepared for model input
|
|
|
107 |
|
108 |
# Initialize a list to store predictions from all models
|
109 |
model_outputs = []
|
@@ -114,16 +118,20 @@ def preprocess_for_inference(audio_arr, sr):
|
|
114 |
output = model(input_values)
|
115 |
predict_score = F.softmax(output['logits'], dim=1)
|
116 |
model_outputs.append(predict_score)
|
|
|
|
|
117 |
|
118 |
# Average the predictions across all models
|
119 |
-
avg_predictions = torch.mean(torch.
|
|
|
120 |
|
121 |
# Get the top 10 predictions based on the average prediction scores
|
122 |
-
topk_values, topk_indices = torch.topk(avg_predictions, 10
|
|
|
123 |
|
124 |
# Initialize results list to store the species names and their associated probabilities
|
125 |
results = []
|
126 |
-
for idx, scores in zip(topk_indices
|
127 |
species_name = species_id_to_name[idx.item()]
|
128 |
probability = scores.item()
|
129 |
results.append([species_name, probability])
|
|
|
25 |
FEATURE_EXTRACTOR = ASTFeatureExtractor()
|
26 |
|
27 |
def plot_mel(sr, x):
|
28 |
+
mel_spec = librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128, fmax=10000)
|
29 |
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
|
30 |
mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min()) # normalize spectrogram to [0,1]
|
31 |
mel_spec_db = np.stack([mel_spec_db, mel_spec_db, mel_spec_db], axis=-1) # Convert to 3-channel
|
|
|
54 |
|
55 |
def predict(audio, start, end):
|
56 |
sr, x = audio
|
57 |
+
|
58 |
+
x = np.array(x, dtype=np.float32)/32768.0
|
59 |
+
x = x[start*sr : end*sr]
|
60 |
res = preprocess_for_inference(x, sr)
|
61 |
|
62 |
if start >= end:
|
|
|
104 |
species_id_to_name = {row['species_id']: row['scientific_name'] for index, row in label_mapping.iterrows()}
|
105 |
|
106 |
def preprocess_for_inference(audio_arr, sr):
|
107 |
+
print(sr)
|
108 |
spec = FEATURE_EXTRACTOR(audio_arr, sampling_rate=sr, padding="max_length", return_tensors="pt")
|
109 |
input_values = spec['input_values'] # Get the input values prepared for model input
|
110 |
+
|
111 |
|
112 |
# Initialize a list to store predictions from all models
|
113 |
model_outputs = []
|
|
|
118 |
output = model(input_values)
|
119 |
predict_score = F.softmax(output['logits'], dim=1)
|
120 |
model_outputs.append(predict_score)
|
121 |
+
print(predict_score[0, 434])
|
122 |
+
|
123 |
|
124 |
# Average the predictions across all models
|
125 |
+
avg_predictions = torch.mean(torch.cat(model_outputs), dim=0) #.values
|
126 |
+
print(avg_predictions[434])
|
127 |
|
128 |
# Get the top 10 predictions based on the average prediction scores
|
129 |
+
topk_values, topk_indices = torch.topk(avg_predictions, 10)
|
130 |
+
print(topk_values.shape, topk_indices.shape)
|
131 |
|
132 |
# Initialize results list to store the species names and their associated probabilities
|
133 |
results = []
|
134 |
+
for idx, scores in zip(topk_indices, topk_values):
|
135 |
species_name = species_id_to_name[idx.item()]
|
136 |
probability = scores.item()
|
137 |
results.append([species_name, probability])
|
example.jpg
ADDED