Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
|
|
1 |
import pickle
|
2 |
import torch
|
3 |
import torch.nn as nn
|
4 |
from torchvision import transforms, models
|
5 |
from PIL import Image
|
|
|
6 |
import gradio as gr
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
@@ -12,16 +14,16 @@ from torchcam.utils import overlay_mask
|
|
12 |
from torchvision.transforms.functional import to_pil_image
|
13 |
from sklearn.metrics.pairwise import cosine_similarity
|
14 |
|
15 |
-
# Detectron2
|
16 |
from detectron2.engine import DefaultPredictor
|
17 |
from detectron2.config import get_cfg
|
18 |
from detectron2.utils.visualizer import Visualizer, ColorMode
|
19 |
from detectron2.data import MetadataCatalog
|
20 |
from detectron2 import model_zoo
|
21 |
|
22 |
-
from PIL import Image, ImageDraw, ImageFont
|
23 |
|
24 |
|
|
|
25 |
cfg = get_cfg()
|
26 |
cfg.MODEL.DEVICE = "cpu"
|
27 |
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
|
@@ -38,7 +40,7 @@ class Classifier(nn.Module):
|
|
38 |
|
39 |
def forward(self, x):
|
40 |
x = self.Linear1(x)
|
41 |
-
|
42 |
return x
|
43 |
|
44 |
class FeatureExtractor(nn.Module):
|
@@ -62,6 +64,7 @@ classifier = Classifier(input_channels, num_classes)
|
|
62 |
# Load the trained classifier weights
|
63 |
classifier.load_state_dict(torch.load('class_model_state.pt'))
|
64 |
|
|
|
65 |
class CombinedModel(nn.Module):
|
66 |
def __init__(self, feature_extractor, classifier):
|
67 |
super(CombinedModel, self).__init__()
|
@@ -85,55 +88,60 @@ imgTransforms = transforms.Compose([
|
|
85 |
transforms.Resize(256),
|
86 |
transforms.CenterCrop(224),
|
87 |
transforms.ToTensor(),
|
88 |
-
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # use mean and standard deviation computed for ImageNet dataset for normalization
|
89 |
|
|
|
90 |
resnet_18_trained = torch.load("resnet_18_trained.pth")
|
91 |
resnet_18_trained.eval()
|
92 |
|
|
|
93 |
features_mod = nn.Sequential(*(list(resnet_18_trained.children())[:-1]))
|
94 |
features_mod.eval()
|
95 |
|
|
|
96 |
with open('saved_dictionary.pkl', 'rb') as f:
|
97 |
loaded_dict = pickle.load(f)
|
98 |
|
99 |
-
#
|
100 |
def detect(input_img):
|
101 |
-
outputs = predictor(input_img)
|
102 |
-
pred_classes = outputs["instances"].pred_classes
|
103 |
-
pred_masks = outputs["instances"].pred_masks
|
104 |
|
105 |
-
automobile_indices = (pred_classes == 2).nonzero(as_tuple=True)[0].tolist()
|
106 |
|
107 |
-
if len(automobile_indices) > 0:
|
|
|
|
|
108 |
car_index = max(automobile_indices, key=lambda i: pred_masks[i].sum())
|
109 |
|
110 |
-
#
|
111 |
coord = np.where(pred_masks[car_index].cpu().numpy())
|
112 |
min_y, max_y = coord[0].min(), coord[0].max()
|
113 |
min_x, max_x = coord[1].min(), coord[1].max()
|
114 |
|
115 |
-
# Crop the car from the image
|
116 |
mask = pred_masks[car_index].cpu().numpy()
|
117 |
cropped_car = input_img[min_y:max_y+1, min_x:max_x+1]
|
118 |
cropped_mask = mask[min_y:max_y+1, min_x:max_x+1]
|
119 |
|
120 |
-
# Create
|
121 |
white_back = np.ones_like(cropped_car) * 255
|
122 |
|
123 |
-
#
|
124 |
white_back[cropped_mask] = cropped_car[cropped_mask]
|
125 |
|
126 |
-
#
|
127 |
cropped_image_pil = Image.fromarray(white_back.astype(np.uint8))
|
128 |
-
|
129 |
-
# Predict year category
|
130 |
inp = imgTransforms(cropped_image_pil).unsqueeze(0)
|
131 |
|
|
|
132 |
with torch.no_grad():
|
133 |
out = combined_model(inp)
|
134 |
prediction = F.softmax(out, dim=1)
|
135 |
pred_year_cat = round(torch.argmax(prediction, dim = 1).tolist()[0])
|
136 |
|
|
|
137 |
year_categories = torch.tensor([0,1,2,3,4], dtype=torch.float32).to(torch.device('cpu'))
|
138 |
|
139 |
# Calculate modernity score out of test_outputs by multiplying the outputs probabilities with the according year categories
|
@@ -141,25 +149,28 @@ def detect(input_img):
|
|
141 |
modernity_sc = modernity_scores.tolist()[0]
|
142 |
modernity = round(float(modernity_sc), 2)
|
143 |
|
144 |
-
#Create Year
|
145 |
with SmoothGradCAMpp(combined_model) as cam_extractor:
|
146 |
# Preprocess data and feed it to the model
|
147 |
out = combined_model(inp)
|
148 |
# Retrieve the CAM by passing the class index and the model output
|
149 |
modernity_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
|
150 |
-
|
|
|
151 |
heatmap_modernity = overlay_mask(cropped_image_pil, to_pil_image(modernity_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
|
152 |
|
153 |
-
# Mapping dictionary
|
154 |
year_category_map = {
|
155 |
0: '2000-2003',
|
156 |
1: '2004-2008',
|
157 |
2: '2009-2011',
|
158 |
3: '2012-2014',
|
159 |
4: '2015-2017'}
|
160 |
-
|
|
|
161 |
pred_year_cat = year_category_map[pred_year_cat]
|
162 |
|
|
|
163 |
with torch.no_grad():
|
164 |
output = resnet_18_trained(inp)
|
165 |
body_type = round(torch.argmax(output, dim = 1).tolist()[0])
|
@@ -171,8 +182,10 @@ def detect(input_img):
|
|
171 |
# Retrieve the CAM by passing the class index and the model output
|
172 |
body_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
|
173 |
|
|
|
174 |
heatmap_body = overlay_mask(cropped_image_pil, to_pil_image(body_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
|
175 |
|
|
|
176 |
body_type_map = {
|
177 |
0: 'Hatchback',
|
178 |
1: 'SUV',
|
@@ -184,15 +197,20 @@ def detect(input_img):
|
|
184 |
7: 'Estate',
|
185 |
8: 'Unknown'}
|
186 |
|
|
|
187 |
body_type = body_type_map[body_type]
|
188 |
|
|
|
189 |
with torch.no_grad():
|
190 |
features = features_mod(inp).view(1,-1)
|
191 |
|
|
|
192 |
year_body_cat = pred_year_cat + '_' + body_type
|
193 |
|
|
|
194 |
mean_features = loaded_dict[year_body_cat]
|
195 |
|
|
|
196 |
cosin_sim = cosine_similarity(features, mean_features.unsqueeze(0)).item()
|
197 |
cosin = round(float(cosin_sim), 2)
|
198 |
|
@@ -217,7 +235,7 @@ demo = gr.Interface(
|
|
217 |
gr.Image(label="Heatmap for Production Year Category", type="pil"),
|
218 |
gr.Image(label="Heatmap for Car Body Type Category", type="pil")],
|
219 |
title='Car detection and Modernity and Typicality Scores of Cars',
|
220 |
-
description='Select one of the provided example images or upload your own image. The model will then search fo a car in the image. If one or more cars are being detected, the car with the
|
221 |
examples=[["Old_car.jpg"], ['Auto-ueberholt-Fahrrad-scaled.jpeg'], ['160923-nabu-autobahn-helge-may7.jpeg']]
|
222 |
)
|
223 |
|
|
|
1 |
+
# Import packages
|
2 |
import pickle
|
3 |
import torch
|
4 |
import torch.nn as nn
|
5 |
from torchvision import transforms, models
|
6 |
from PIL import Image
|
7 |
+
from PIL import Image, ImageDraw, ImageFont
|
8 |
import gradio as gr
|
9 |
import numpy as np
|
10 |
import pandas as pd
|
|
|
14 |
from torchvision.transforms.functional import to_pil_image
|
15 |
from sklearn.metrics.pairwise import cosine_similarity
|
16 |
|
17 |
+
# Import specific Detectron2 packages
|
18 |
from detectron2.engine import DefaultPredictor
|
19 |
from detectron2.config import get_cfg
|
20 |
from detectron2.utils.visualizer import Visualizer, ColorMode
|
21 |
from detectron2.data import MetadataCatalog
|
22 |
from detectron2 import model_zoo
|
23 |
|
|
|
24 |
|
25 |
|
26 |
+
# Load detectron2 model and set device to CPU
|
27 |
cfg = get_cfg()
|
28 |
cfg.MODEL.DEVICE = "cpu"
|
29 |
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
|
|
|
40 |
|
41 |
def forward(self, x):
|
42 |
x = self.Linear1(x)
|
43 |
+
|
44 |
return x
|
45 |
|
46 |
class FeatureExtractor(nn.Module):
|
|
|
64 |
# Load the trained classifier weights
|
65 |
classifier.load_state_dict(torch.load('class_model_state.pt'))
|
66 |
|
67 |
+
# Combine feature extractor and fully connected layer
|
68 |
class CombinedModel(nn.Module):
|
69 |
def __init__(self, feature_extractor, classifier):
|
70 |
super(CombinedModel, self).__init__()
|
|
|
88 |
transforms.Resize(256),
|
89 |
transforms.CenterCrop(224),
|
90 |
transforms.ToTensor(),
|
91 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # use mean and standard deviation computed for ImageNet dataset for normalization
|
92 |
|
93 |
+
# Load bodytype model from Assignment 3 (only predicts 9 classes as already mentioned in Assignment 3)
|
94 |
resnet_18_trained = torch.load("resnet_18_trained.pth")
|
95 |
resnet_18_trained.eval()
|
96 |
|
97 |
+
# Load feature extractor for the cosine similarity
|
98 |
features_mod = nn.Sequential(*(list(resnet_18_trained.children())[:-1]))
|
99 |
features_mod.eval()
|
100 |
|
101 |
+
# Load morphs dictionary from Assignment 3
|
102 |
with open('saved_dictionary.pkl', 'rb') as f:
|
103 |
loaded_dict = pickle.load(f)
|
104 |
|
105 |
+
# Detect function for the App Interface
|
106 |
def detect(input_img):
|
107 |
+
outputs = predictor(input_img) # detect objects in image with detectron2 model
|
108 |
+
pred_classes = outputs["instances"].pred_classes # get predicted classes
|
109 |
+
pred_masks = outputs["instances"].pred_masks # get predicted masks for cropping the image
|
110 |
|
111 |
+
automobile_indices = (pred_classes == 2).nonzero(as_tuple=True)[0].tolist() # Filter out indices of car objects in the image (Based on what I have found in the internet cars do have the predicted class == 2)
|
112 |
|
113 |
+
if len(automobile_indices) > 0: # if there is any car
|
114 |
+
|
115 |
+
# Find car with the largest number of pixels
|
116 |
car_index = max(automobile_indices, key=lambda i: pred_masks[i].sum())
|
117 |
|
118 |
+
# Get image coordinates of the largest car image
|
119 |
coord = np.where(pred_masks[car_index].cpu().numpy())
|
120 |
min_y, max_y = coord[0].min(), coord[0].max()
|
121 |
min_x, max_x = coord[1].min(), coord[1].max()
|
122 |
|
123 |
+
# Crop the car from the image and define cropped mask
|
124 |
mask = pred_masks[car_index].cpu().numpy()
|
125 |
cropped_car = input_img[min_y:max_y+1, min_x:max_x+1]
|
126 |
cropped_mask = mask[min_y:max_y+1, min_x:max_x+1]
|
127 |
|
128 |
+
# Create white background with dimensions of the cropped car image
|
129 |
white_back = np.ones_like(cropped_car) * 255
|
130 |
|
131 |
+
# Put cropped car on the white background
|
132 |
white_back[cropped_mask] = cropped_car[cropped_mask]
|
133 |
|
134 |
+
# Transform cropped image for model predictions
|
135 |
cropped_image_pil = Image.fromarray(white_back.astype(np.uint8))
|
|
|
|
|
136 |
inp = imgTransforms(cropped_image_pil).unsqueeze(0)
|
137 |
|
138 |
+
# Predict year category and get probabilites for modernity score calculation
|
139 |
with torch.no_grad():
|
140 |
out = combined_model(inp)
|
141 |
prediction = F.softmax(out, dim=1)
|
142 |
pred_year_cat = round(torch.argmax(prediction, dim = 1).tolist()[0])
|
143 |
|
144 |
+
|
145 |
year_categories = torch.tensor([0,1,2,3,4], dtype=torch.float32).to(torch.device('cpu'))
|
146 |
|
147 |
# Calculate modernity score out of test_outputs by multiplying the outputs probabilities with the according year categories
|
|
|
149 |
modernity_sc = modernity_scores.tolist()[0]
|
150 |
modernity = round(float(modernity_sc), 2)
|
151 |
|
152 |
+
#Create Year Category heatmap
|
153 |
with SmoothGradCAMpp(combined_model) as cam_extractor:
|
154 |
# Preprocess data and feed it to the model
|
155 |
out = combined_model(inp)
|
156 |
# Retrieve the CAM by passing the class index and the model output
|
157 |
modernity_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
|
158 |
+
|
159 |
+
# Year Category heatmap
|
160 |
heatmap_modernity = overlay_mask(cropped_image_pil, to_pil_image(modernity_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
|
161 |
|
162 |
+
# Mapping dictionary for year categories as defined in Assignment 3
|
163 |
year_category_map = {
|
164 |
0: '2000-2003',
|
165 |
1: '2004-2008',
|
166 |
2: '2009-2011',
|
167 |
3: '2012-2014',
|
168 |
4: '2015-2017'}
|
169 |
+
|
170 |
+
# Get year category from prediction
|
171 |
pred_year_cat = year_category_map[pred_year_cat]
|
172 |
|
173 |
+
# Predict bodytype
|
174 |
with torch.no_grad():
|
175 |
output = resnet_18_trained(inp)
|
176 |
body_type = round(torch.argmax(output, dim = 1).tolist()[0])
|
|
|
182 |
# Retrieve the CAM by passing the class index and the model output
|
183 |
body_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
|
184 |
|
185 |
+
# Body type heatmap
|
186 |
heatmap_body = overlay_mask(cropped_image_pil, to_pil_image(body_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
|
187 |
|
188 |
+
# Mapping dictionary for car body types as defined in Assignment 3
|
189 |
body_type_map = {
|
190 |
0: 'Hatchback',
|
191 |
1: 'SUV',
|
|
|
197 |
7: 'Estate',
|
198 |
8: 'Unknown'}
|
199 |
|
200 |
+
# Get body type from prediction
|
201 |
body_type = body_type_map[body_type]
|
202 |
|
203 |
+
# Extract features for cosine similarity
|
204 |
with torch.no_grad():
|
205 |
features = features_mod(inp).view(1,-1)
|
206 |
|
207 |
+
# Combine predicted year category and predicted body type to get key for morph dictionary
|
208 |
year_body_cat = pred_year_cat + '_' + body_type
|
209 |
|
210 |
+
# Load mean features for specific year_body category
|
211 |
mean_features = loaded_dict[year_body_cat]
|
212 |
|
213 |
+
# Calculate cosine similarity as typicality score
|
214 |
cosin_sim = cosine_similarity(features, mean_features.unsqueeze(0)).item()
|
215 |
cosin = round(float(cosin_sim), 2)
|
216 |
|
|
|
235 |
gr.Image(label="Heatmap for Production Year Category", type="pil"),
|
236 |
gr.Image(label="Heatmap for Car Body Type Category", type="pil")],
|
237 |
title='Car detection and Modernity and Typicality Scores of Cars',
|
238 |
+
description='Select one of the provided example images or upload your own image. The model will then search fo a car in the image. If one or more cars are being detected, the car with the largest number of pixels is being extracted, and its design modernity and typicality scores are being calculated.',
|
239 |
examples=[["Old_car.jpg"], ['Auto-ueberholt-Fahrrad-scaled.jpeg'], ['160923-nabu-autobahn-helge-may7.jpeg']]
|
240 |
)
|
241 |
|