## Download dataset and connect your Google drive
for that you need to get kaggle.json file for [here](https://www.kaggle.com/settings/account) where you will see API section under which you will have option to ```"Create New Token"``` ,which will download a ```kaggle.json``` file, upload that file it working dir.

In [None]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d divaxshah/cities-all
!unzip /content/cities-all.zip

In [None]:
!pip install split-folders tensorflow[torch] seaborn numpy matplotlib

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### **Importing of Necessary Libraries**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import random
import cv2
import os
import PIL
import pathlib
import splitfolders

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16

### **Dataset Loading and Splitting**
Split-folders library was used to split the dataset into three parts: Training set(70%), Validation set(15%), and Test set(15%).

In [None]:
base_ds = '/content/Citeisall'
base_ds = pathlib.Path(base_ds)

In [None]:
splitfolders.ratio(base_ds, output='/content/imgs', seed=123, ratio=(.7,.15,.15), group_prefix=None)

Copying files: 12500 files [00:13, 928.79 files/s]


In [None]:
Ahmedabad = [fn for fn in os.listdir(f'{base_ds}/Ahmedabad') if fn.endswith('.jpg')]
Delhi = [fn for fn in os.listdir(f'{base_ds}/Delhi') if fn.endswith('.jpg')]
Kerala = [fn for fn in os.listdir(f'{base_ds}/Kerala') if fn.endswith('.jpg')]
Kolkata = [fn for fn in os.listdir(f'{base_ds}/Kolkata') if fn.endswith('.jpg')]
Mumbai = [fn for fn in os.listdir(f'{base_ds}/Mumabi') ]
city = [Ahmedabad, Delhi, Kerala, Kolkata, Mumbai]
city_classes = []
for i in os.listdir('imgs/train'):
 city_classes+=[i]
city_classes.sort()

### **Dataset Exploration**
It can be seen here the total number of images in the dataset, the number of classes, and how well the images from each variety is distributed

In [None]:
image_count = len(list(base_ds.glob('*/*.jpg')))
print(f'Total images: {image_count}')
print(f'Total number of classes: {len(city_classes)}')
count = 0
city_count = []
for x in city_classes:
 print(f'Total {x} images: {len(city[count])}')
 city_count.append(len(city[count]))
 count += 1

sns.set_style('darkgrid')
sns.barplot(x=city_classes, y=city_count, palette="Blues_d")
plt.show()

### Sample Images
Each image from the dataset has a dimension of 250 by 250 and a color type of RGB

In [None]:
sample_img = cv2.imread('/content/imgs/test/Ahmedabad/Ahmedabad-Test (1).jpg')
plt.imshow(sample_img)
print(f'Image dimensions: {sample_img.shape}')

In [None]:
def load_random_img(dir, label):
 plt.figure(figsize=(10,10))
 i=0
 for label in city_classes:
 i+=1
 plt.subplot(1, 5, i)
 file = random.choice(os.listdir(f'{dir}/{label}'))
 image_path = os.path.join(f'{dir}/{label}', file)
 img=cv2.imread(image_path)
 plt.title(label)
 plt.imshow(img)
 plt.grid(None)
 plt.axis('off')

In [None]:
for i in range(3):
 load_random_img(base_ds, city_classes)

In [None]:
batch_size = 128
img_height, img_width = 175, 175
input_shape = (img_height, img_width, 3)

### **Data Pre-processing**

In [None]:
datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_ds = datagen.flow_from_directory(
 'imgs/train',
 target_size = (img_height, img_width),
 batch_size = batch_size,
 subset = "training",
 class_mode='categorical')

val_ds = datagen.flow_from_directory(
 'imgs/val',
 target_size = (img_height, img_width),
 batch_size = batch_size,
 class_mode='categorical',
 shuffle=False)

test_ds = datagen.flow_from_directory(
 'imgs/test',
 target_size = (img_height, img_width),
 batch_size = batch_size,
 class_mode='categorical',
 shuffle=False)

In [None]:
def plot_train_history(history):
 plt.figure(figsize=(15,5))
 plt.subplot(1,2,1)
 plt.plot(history.history['accuracy'])
 plt.plot(history.history['val_accuracy'])
 plt.title('Model accuracy')
 plt.ylabel('accuracy')
 plt.xlabel('epoch')
 plt.legend(['train', 'validation'], loc='upper left')

 plt.subplot(1,2,2)
 plt.plot(history.history['loss'])
 plt.plot(history.history['val_loss'])
 plt.title('Model loss')
 plt.ylabel('loss')
 plt.xlabel('epoch')
 plt.legend(['train', 'validation'], loc='upper left')
 plt.show()

## **Vanilla CNN Model**

In [None]:
model_vanilla = tf.keras.Sequential([
 tf.keras.layers.Conv2D(32,(3,3), activation='relu', input_shape=input_shape),
 tf.keras.layers.BatchNormalization(),
 tf.keras.layers.Conv2D(32,(3,3),activation='relu',padding='same'),
 tf.keras.layers.BatchNormalization(axis = 3),
 tf.keras.layers.MaxPooling2D(pool_size=(2,2),padding='same'),
 tf.keras.layers.Dropout(0.3),

 tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'),
 tf.keras.layers.BatchNormalization(),
 tf.keras.layers.Conv2D(64,(3,3),activation='relu',padding='same'),
 tf.keras.layers.BatchNormalization(axis = 3),
 tf.keras.layers.MaxPooling2D(pool_size=(2,2),padding='same'),
 tf.keras.layers.Dropout(0.3),

 tf.keras.layers.Conv2D(128,(3,3),activation='relu',padding='same'),
 tf.keras.layers.BatchNormalization(),
 tf.keras.layers.Conv2D(128,(3,3),activation='relu',padding='same'),
 tf.keras.layers.BatchNormalization(axis = 3),
 tf.keras.layers.MaxPooling2D(pool_size=(2,2),padding='same'),
 tf.keras.layers.Dropout(0.5),

 tf.keras.layers.Flatten(),
 tf.keras.layers.Dense(512, activation='relu'),
 tf.keras.layers.BatchNormalization(),
 tf.keras.layers.Dropout(0.5),
 tf.keras.layers.Dense(128, activation='relu'),
 tf.keras.layers.Dropout(0.25),
 tf.keras.layers.Dense(5, activation='softmax')
])

In [None]:
model_vanilla.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_vanilla.summary()

## **Callbacks**

In [None]:
models_dir = "saved_models"
if not os.path.exists(models_dir):
 os.makedirs(models_dir)

checkpointer = ModelCheckpoint(filepath='saved_models/model_vanilla.hdf5',
 monitor='val_accuracy', mode='max',
 verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=2, min_lr=0.001)
callbacks=[early_stopping, reduce_lr, checkpointer]

In [None]:
history1 = model_vanilla.fit(train_ds, epochs = 40, validation_data = val_ds, callbacks=callbacks)

In [None]:
model_vanilla.save("model1")
model_vanilla.load_weights('saved_models/model_vanilla.hdf5')
plot_train_history(history1)

## **Model Evaluation of Vanilla CNN**

In [None]:
score1 = model_vanilla.evaluate(test_ds, verbose=1)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

Y_pred = model_vanilla.predict(test_ds)

In [None]:
y_pred = np.argmax(Y_pred, axis=1)
confusion_mtx = confusion_matrix(y_pred, test_ds.classes)
f,ax = plt.subplots(figsize=(12, 12))
sns.heatmap(confusion_mtx, annot=True,
 linewidths=0.01,
 linecolor="white",
 fmt= '.1f',ax=ax,)
sns.color_palette("rocket", as_cmap=True)

plt.xlabel("Predicted Label")
plt.ylabel("True Label")
ax.xaxis.set_ticklabels(test_ds.class_indices)
ax.yaxis.set_ticklabels(city_classes)
plt.title("Confusion Matrix")
plt.show()

In [None]:
report1 = classification_report(test_ds.classes, y_pred, target_names=city_classes, output_dict=True)
df1 = pd.DataFrame(report1).transpose()
df1

## **Transfer Learning**

In [None]:
vgg16 = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
vgg16.trainable = False
inputs = tf.keras.Input(input_shape)
x = vgg16(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dense(5, activation='softmax')(x)
model_vgg16 = tf.keras.Model(inputs, x)

In [None]:
model_vgg16.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_vgg16.summary()

In [None]:
checkpointer = ModelCheckpoint(filepath='saved_models/model_vgg16.hdf5',
 monitor='val_accuracy', mode='max',
 verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=2, min_lr=0.001)
callbacks=[early_stopping, reduce_lr, checkpointer]

In [None]:
history2 = model_vgg16.fit(train_ds, epochs = 40, validation_data = val_ds, callbacks=callbacks)

In [None]:
model_vgg16.load_weights('saved_models/model_vgg16.hdf5')

In [None]:
plot_train_history(history2)

In [None]:
score2 = model_vgg16.evaluate(test_ds, verbose=1)
print(f'Model 1 Vanilla Loss: {score1[0]}, Accuracy: {score1[1]*100}')
print(f'Model 2 VGG16 Loss: {score2[0]}, Accuracy: {score2[1]*100}')
model_vgg16.save("model2")

## **Fine Tuning**

In [None]:
vgg16.trainable = True
model_vgg16.compile(optimizer=keras.optimizers.Adam(1e-5),
 loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history3 = model_vgg16.fit(train_ds, epochs = 40, validation_data = val_ds, callbacks=callbacks)

In [None]:
model_vgg16.load_weights('saved_models/model_vgg16.hdf5')
model_vgg16.save("model3")

## **Final Evaluation**

In [None]:
score3 = model_vgg16.evaluate(test_ds, verbose=1)
print(f'Model 1 Vanilla Loss: {score1[0]}, Accuracy: {score1[1]*100}')
print(f'Model 2 VGG16 Loss: {score2[0]}, Accuracy: {score2[1]*100}')
print(f'Model 2 VGG16 Fine-tuned Loss: {score3[0]}, Accuracy: {score3[1]*100}')

In [None]:
Y_pred = model_vgg16.predict(test_ds)

In [None]:
y_pred = np.argmax(Y_pred, axis=1)
confusion_mtx = confusion_matrix(y_pred, test_ds.classes)
f,ax = plt.subplots(figsize=(12, 12))
sns.heatmap(confusion_mtx, annot=True,
 linewidths=0.01,
 linecolor="white",
 fmt= '.1f',ax=ax,)
sns.color_palette("rocket", as_cmap=True)

plt.xlabel("Predicted Label")
plt.ylabel("True Label")
ax.xaxis.set_ticklabels(test_ds.class_indices)
ax.yaxis.set_ticklabels(city_classes)
plt.title("Confusion Matrix")
plt.show()

In [None]:
report2 = classification_report(test_ds.classes, y_pred, target_names=city_classes, output_dict=True)
df2 = pd.DataFrame(report1).transpose()
df2

In [None]:
plt.figure(figsize=(100, 100))
x, label= train_ds.next()
for i in range(25):
 plt.subplot(5, 5, i+1)
 plt.imshow(x[i])
 result = np.where(label[i]==1)
 predict = model_vgg16(tf.expand_dims(x[i], 0))
 score = tf.nn.softmax(predict[0])
 score_label = city_classes[np.argmax(score)]
 plt.title(f'Truth: {city_classes[result[0][0]]}\nPrediction:{score_label}')
 plt.axis(False)

In [None]:
model_vgg16.save("/content/drive/MyDrive/model")
# Assuming your model is named model_vgg16
model_vgg16.save("/content/drive/MyDrive/tensorflow", save_format='tf')


In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
 '/content/imgs/test',
 target_size=(175, 175),
 batch_size=32,
 class_mode='categorical'
)

test_loss, test_accuracy = model_vgg16.evaluate(test_generator, steps=len(test_generator))
print('Test loss:', test_loss)
print('Test accuracy:', test_accuracy)

Found 1875 images belonging to 5 classes.
Test loss: 1.4102574586868286
Test accuracy: 0.6362666487693787


## **Testing single image**

In [None]:
# import tensorflow.keras as keras
# from tensorflow.keras.models import load_model
# from tensorflow.keras.preprocessing import image
# import numpy as np

# model = load_model('/content/model3')



# # Load and preprocess the input image
# img_path = '/content/Citeisall/Kolkata/Kolkata-Test (10).jpg'
# img = image.load_img(img_path, target_size=(175,175))
# img = image.img_to_array(img)
# img = np.expand_dims(img, axis=0)
# img = img / 255.0

# # Make predictions on the input image
# predictions = model.predict(img)
# class_labels = ['Ahmedabad', 'Delhi', 'Kerala', 'Kolkata', 'Mumbai']

# # Set the threshold for minimum accuracy
# threshold = 0.0

# # Get the predicted class label and accuracy
# predicted_class_index = np.argmax(predictions)
# predicted_class_label = class_labels[predicted_class_index]
# accuracy = predictions[0][predicted_class_index]

# # Check if accuracy is below the threshold for all classes
# if all(accuracy < threshold for accuracy in predictions[0]):
# print("This location is not in our database.")
# else:
# print('Predicted class:', predicted_class_label)
# print('Accuracy:', accuracy)




Predicted class: Kolkata
Accuracy: 0.8562558


## **Visulization**

---



In [None]:
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns

# # Sample data from the classification report you provided
# labels = ["Ahmedabad", "Delhi", "Kerala", "Kolkata", "Mumbai"]
# precision = [0.85, 0.60, 0.64, 0.58, 0.55]
# recall = [0.84, 0.65, 0.66, 0.58, 0.49]
# f1 = [0.85, 0.62, 0.65, 0.58, 0.52]

# # Bar Plot
# plt.figure(figsize=(10, 5))
# barWidth = 0.25
# r1 = np.arange(len(precision))
# r2 = [x + barWidth for x in r1]
# r3 = [x + barWidth for x in r2]
# plt.bar(r1, precision, color='b', width=barWidth, edgecolor='grey', label='precision')
# plt.bar(r2, recall, color='r', width=barWidth, edgecolor='grey', label='recall')
# plt.bar(r3, f1, color='g', width=barWidth, edgecolor='grey', label='f1-score')
# plt.xlabel('Cities', fontweight='bold')
# plt.xticks([r + barWidth for r in range(len(precision))], labels)
# plt.legend()
# plt.show()

# # Heatmap
# df = {
# 'precision': precision,
# 'recall': recall,
# 'f1-score': f1
# }
# plt.figure(figsize=(10, 5))
# sns.heatmap([precision, recall, f1], yticklabels=["precision", "recall", "f1-score"], xticklabels=labels, cmap="YlGnBu", annot=True, fmt='.2g')
# plt.show()

# # Spider (Radar) Plot
# angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
# precision += precision[:1]
# recall += recall[:1]
# f1 += f1[:1]
# angles += angles[:1]
# plt.figure(figsize=(10, 5))
# ax = plt.subplot(111, polar=True)
# ax.fill(angles, precision, color='b', alpha=0.25)
# ax.fill(angles, recall, color='r', alpha=0.25)
# ax.fill(angles, f1, color='g', alpha=0.25)
# ax.set_theta_offset(np.pi / 2)
# ax.set_theta_direction(-1)
# plt.xticks(angles[:-1], labels)
# ax.set_rlabel_position(30)
# plt.yticks([0.2, 0.4, 0.6, 0.8], ["0.2", "0.4", "0.6", "0.8"], color="grey", size=12)
# plt.ylim(0, 1)
# ax.plot(angles, precision, color='b', linewidth=2, linestyle='solid', label='precision')
# ax.plot(angles, recall, color='r', linewidth=2, linestyle='solid', label='recall')
# ax.plot(angles, f1, color='g', linewidth=2, linestyle='solid', label='f1-score')
# ax.fill(angles, precision, color='b', alpha=0.4)
# ax.fill(angles, recall, color='r', alpha=0.4)
# ax.fill(angles, f1, color='g', alpha=0.4)
# plt.legend(loc="upper right", bbox_to_anchor=(0.1, 0.1))
# plt.show()


In [None]:
# Import necessary libraries
import numpy as np
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import pandas as pd

# Load the pre-trained model
model = load_model('/content/drive/MyDrive/model.h5')

# Preprocess the test data
test_datagen = ImageDataGenerator(rescale=1./255) # Assuming you rescaled your images during training
test_dir = '/content/imgs/test'
test_generator = test_datagen.flow_from_directory(
 test_dir,
 target_size=(175, 175), # Adjust if you used a different input size during training
 batch_size=1,
 class_mode='categorical',
 shuffle=False
)

# Predict classes using the model
predictions = model.predict(test_generator, steps=test_generator.n, verbose=1)
predicted_classes = np.argmax(predictions, axis=1)

# Get true labels and class labels
true_classes = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Generate the classification report
report = classification_report(true_classes, predicted_classes, target_names=class_labels, output_dict=True)
report_df = pd.DataFrame(report).transpose()

# Plot the metrics in the report
report_df[['precision', 'recall', 'f1-score']].drop(['accuracy', 'macro avg', 'weighted avg']).plot(kind='bar', figsize=(15, 7))
plt.title('Classification Report Metrics')
plt.ylabel('Score')
plt.xticks(rotation=45)
plt.ylim(0, 1)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

# Import necessary libraries
# import numpy as np
# from keras.models import load_model
# from keras.preprocessing.image import ImageDataGenerator
# from sklearn.metrics import classification_report, confusion_matrix
# import matplotlib.pyplot as plt
# import seaborn as sns

# # Load the pre-trained model
# model = load_model('/content/drive/MyDrive/model.h5')

# # Preprocess the test data
# test_datagen = ImageDataGenerator(rescale=1./255) # Assuming you rescaled your images during training
# test_dir = '/content/imgs/test'
# test_generator = test_datagen.flow_from_directory(
# test_dir,
# target_size=(175, 175), # Adjust if you used a different input size during training
# batch_size=1,
# class_mode='categorical',
# shuffle=False
# )

# # Predict classes using the model
# predictions = model.predict(test_generator, steps=test_generator.n, verbose=1)
# predicted_classes = np.argmax(predictions, axis=1)

# # Get true labels and class labels
# true_classes = test_generator.classes
# class_labels = list(test_generator.class_indices.keys())

# # Generate the classification report
# report = classification_report(true_classes, predicted_classes, target_names=class_labels)
# print(report)

# # Generate the confusion matrix
# confusion_mtx = confusion_matrix(true_classes, predicted_classes)

# # Plot the heatmap using Seaborn
# plt.figure(figsize=(10, 8))
# sns.heatmap(confusion_mtx, annot=True, fmt='d', cmap='Blues',
# xticklabels=class_labels,
# yticklabels=class_labels)
# plt.xlabel('Predicted Label')
# plt.ylabel('True Label')
# plt.title('Confusion Matrix')
# plt.show()
