In [None]:
!pip install huggingface_hub
!pip install datasets
!pip install keras

In [None]:
from huggingface_hub import notebook_login
from datasets import load_dataset
import pandas as pd
from datasets import load_dataset
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from PIL import Image
import numpy as np

In [None]:
notebook_login()

In [None]:
# load dataset from hugging face
# prepare data for training, validation and testing
train_ds, val_ds = load_dataset('competitions/aiornot', split="train").train_test_split(test_size=0.15).values()

In [None]:
data_sz = 1000
X_train = train_ds[:data_sz]['image']
X_val = val_ds[:data_sz]['image']
Y_train = to_categorical(train_ds[:data_sz]['label'])
Y_val = to_categorical(val_ds[:data_sz]['label'])
# Convert the input data to a NumPy array
X_train = np.stack([np.array(image) for image in X_train])
X_val = np.stack([np.array(image) for image in X_val])

In [None]:
with tf.device('/device:GPU:3'):
 # Load the VGG16 model pre-trained on ImageNet
 base_model = VGG16(weights='imagenet', include_top=False)

 # Add a global spatial average pooling layer
 x = base_model.output
 x = GlobalAveragePooling2D()(x)

 # Add a fully-connected layer
 x = Dense(1024, activation='relu')(x)

 # Add a logistic layer with the number of classes of target variable
 num_classes = 2
 predictions = Dense(num_classes, activation='softmax')(x)

 # Create the final model
 model = Model(inputs=base_model.input, outputs=predictions)

 # Freeze all layers in the base VGG16 model
 for layer in base_model.layers:
 layer.trainable = False

 # Compile the model
 model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on your new dataset
model.fit(X_train, Y_train, epochs=10, validation_data=(X_val, Y_val))

In [None]:
# Generate predictions for the data
y_pred = model.predict(X_val)
# Convert predictions and true labels to class indices
y_pred_classes = y_pred.argmax(axis=1)
y_true_classes = Y_val.argmax(axis=1)
# Find the indices of the misclassified samples
misclassified_indices = np.where(y_pred_classes != y_true_classes)[0]

# Get the misclassified samples
# x_misclassified = X_val[misclassified_indices]
# y_misclassified_true = Y_val[misclassified_indices]
# y_misclassified_pred = y_pred[misclassified_indices]

In [None]:
# a helper function to view missclassfied data with the image and prediction
def checkMiss(idx):
 print("\ncorrect:", Y_val[idx])
 print("miss:", y_pred[idx])
 img = Image.fromarray(X_val[idx])
 img.show()

In [None]:
# view 5 miss classified data to see what could be improved
for i in range(10):
 checkMiss(misclassified_indices[i])