Spaces:
Runtime error
Runtime error
import ast | |
import keras | |
from keras.models import Sequential | |
from keras.layers import Dense, Dropout, Flatten | |
from keras.layers import Conv2D, MaxPooling2D | |
from keras.preprocessing import image | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import train_test_split | |
from tqdm import tqdm | |
from keras.layers import BatchNormalization | |
import json | |
def label_map(category, n_classes=290): | |
category = ast.literal_eval(category) | |
labels = [0]*n_classes | |
for category_id in category: | |
labels[int(category_id)-1] = 1 | |
return labels | |
if __name__ == "__main__": | |
## load data | |
image_dir = "images/" | |
train_df = pd.read_csv("multilabel_classification/train.csv") | |
train_df['categories'] = train_df['categories'].apply(label_map) | |
file_name = [] | |
for idx in range(len(train_df)): | |
file_name.append(image_dir + train_df["id"][idx]+".png") | |
train_df["file_name"] = file_name | |
X_dataset = [] | |
SIZE = 256 | |
for i in range(len(train_df)): | |
img = keras.utils.load_img(train_df["file_name"][i], target_size=(SIZE,SIZE,3)) | |
img = keras.utils.img_to_array(img) | |
img = img/255. | |
X_dataset.append(img) | |
X = np.array(X_dataset) | |
y = np.array(train_df["categories"].to_list()) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20, test_size=0.3) | |
# define model | |
model = Sequential() | |
model.add(Conv2D(filters=16, kernel_size=(5, 5), activation="relu", input_shape=(SIZE,SIZE,3))) | |
model.add(BatchNormalization()) | |
model.add(MaxPooling2D(pool_size=(2, 2))) | |
model.add(Dropout(0.2)) | |
model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu')) | |
model.add(MaxPooling2D(pool_size=(2, 2))) | |
model.add(BatchNormalization()) | |
model.add(Dropout(0.2)) | |
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation="relu")) | |
model.add(MaxPooling2D(pool_size=(2, 2))) | |
model.add(BatchNormalization()) | |
model.add(Dropout(0.2)) | |
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu')) | |
model.add(MaxPooling2D(pool_size=(2, 2))) | |
model.add(BatchNormalization()) | |
model.add(Dropout(0.2)) | |
model.add(Flatten()) | |
model.add(Dense(128, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(64, activation='relu')) | |
model.add(Dropout(0.5)) | |
model.add(Dense(290, activation='sigmoid')) | |
# Train model | |
EPOCH = 1 | |
BATCH_SIZE = 64 | |
#Binary cross entropy of each label. So no really a binary classification problem but | |
#Calculating binary cross entropy for each label. | |
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) | |
history = model.fit(X_train, y_train, epochs=EPOCH, validation_data=(X_test, y_test), batch_size=BATCH_SIZE) | |
## generate predictions on test set and save to submission.csv | |
valid_json = json.load(open("object_detection/eval.json"))["images"] | |
valid_df = pd.DataFrame(valid_json) | |
predict_list = [] | |
for i in range(len(valid_df)): | |
img = keras.utils.load_img(image_dir + valid_df['file_name'][0], target_size=(SIZE,SIZE,3)) | |
img = keras.utils.img_to_array(img) | |
img = img/255. | |
img = np.expand_dims(img, axis=0) | |
classes = np.array(pd.read_csv("category_key.csv")["name"].to_list()) #Get array of all classes | |
proba = model.predict(img) #Get probabilities for each class | |
sorted_categories = np.argsort(proba[0])[:-11:-1] #Get class names for top 10 categories | |
threshold = 0.5 | |
predict = [] | |
proba = proba[0] | |
for i in range(len(proba)): | |
if proba[i]>=threshold: | |
predict.append(i+1) #Get true id of the class | |
predict.sort() | |
predict_list.append(predict) | |
valid_id = [x[:-4] for x in valid_df["file_name"].to_list()] | |
valid_osd = [1]*len(valid_id) | |
submit_data = [[valid_id[i], predict_list[i], valid_osd[i]] for i in range(len(valid_id))] | |
pd.DataFrame(data=submit_data, columns=["id", "categories", "osd"]).to_csv("submission.csv", index=False) | |