car_class

Sleeping

App Files Files Community

wwnvp01 commited on Jul 23, 2024

Commit

e3d8486

verified ·

1 Parent(s): f4aeb87

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -22

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import pickle
 import torch
 import torch.nn as nn
 from torchvision import transforms, models
 from PIL import Image
 import gradio as gr
 import numpy as np
 import pandas as pd
@@ -12,16 +14,16 @@ from torchcam.utils import overlay_mask
 from torchvision.transforms.functional import to_pil_image
 from sklearn.metrics.pairwise import cosine_similarity
-# Detectron2 imports
 from detectron2.engine import DefaultPredictor
 from detectron2.config import get_cfg
 from detectron2.utils.visualizer import Visualizer, ColorMode
 from detectron2.data import MetadataCatalog
 from detectron2 import model_zoo
-from PIL import Image, ImageDraw, ImageFont
 cfg = get_cfg()
 cfg.MODEL.DEVICE = "cpu"
 cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
@@ -38,7 +40,7 @@ class Classifier(nn.Module):
     def forward(self, x):
         x = self.Linear1(x)
-        #x = nn.functional.softmax(x, dim=1) # use softmax to get probabilities
         return x
 class FeatureExtractor(nn.Module):
@@ -62,6 +64,7 @@ classifier = Classifier(input_channels, num_classes)
 # Load the trained classifier weights
 classifier.load_state_dict(torch.load('class_model_state.pt'))
 class CombinedModel(nn.Module):
     def __init__(self, feature_extractor, classifier):
         super(CombinedModel, self).__init__()
@@ -85,55 +88,60 @@ imgTransforms = transforms.Compose([
                                     transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
-                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # use mean and standard deviation computed for ImageNet dataset for normalization])
 resnet_18_trained = torch.load("resnet_18_trained.pth")
 resnet_18_trained.eval()
 features_mod = nn.Sequential(*(list(resnet_18_trained.children())[:-1]))
 features_mod.eval()
 with open('saved_dictionary.pkl', 'rb') as f:
     loaded_dict = pickle.load(f)
-# Function
 def detect(input_img):
-    outputs = predictor(input_img)
-    pred_classes = outputs["instances"].pred_classes
-    pred_masks = outputs["instances"].pred_masks
-    automobile_indices = (pred_classes == 2).nonzero(as_tuple=True)[0].tolist()
-    if len(automobile_indices) > 0:
         car_index = max(automobile_indices, key=lambda i: pred_masks[i].sum())
-        # Define image coordinates of the car image
         coord = np.where(pred_masks[car_index].cpu().numpy())
         min_y, max_y = coord[0].min(), coord[0].max()
         min_x, max_x = coord[1].min(), coord[1].max()
-        # Crop the car from the image
         mask = pred_masks[car_index].cpu().numpy()
         cropped_car = input_img[min_y:max_y+1, min_x:max_x+1]
         cropped_mask = mask[min_y:max_y+1, min_x:max_x+1]
-        # Create a white background
         white_back = np.ones_like(cropped_car) * 255
-        # Overlay the cropped car on the white background
         white_back[cropped_mask] = cropped_car[cropped_mask]
-        # Prepare the cropped image for further steps
         cropped_image_pil = Image.fromarray(white_back.astype(np.uint8))
-        # Predict year category
         inp = imgTransforms(cropped_image_pil).unsqueeze(0)
         with torch.no_grad():
             out = combined_model(inp)
             prediction = F.softmax(out, dim=1)
             pred_year_cat = round(torch.argmax(prediction, dim = 1).tolist()[0])
         year_categories = torch.tensor([0,1,2,3,4], dtype=torch.float32).to(torch.device('cpu'))
         # Calculate modernity score out of test_outputs by multiplying the outputs probabilities with the according year categories
@@ -141,25 +149,28 @@ def detect(input_img):
         modernity_sc = modernity_scores.tolist()[0]
         modernity = round(float(modernity_sc), 2)
-        #Create Year Categoryn heatmap
         with SmoothGradCAMpp(combined_model) as cam_extractor:
         # Preprocess data and feed it to the model
             out = combined_model(inp)
         # Retrieve the CAM by passing the class index and the model output
             modernity_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
         heatmap_modernity = overlay_mask(cropped_image_pil, to_pil_image(modernity_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
-        # Mapping dictionary
         year_category_map = {
             0: '2000-2003',
             1: '2004-2008',
             2: '2009-2011',
             3: '2012-2014',
             4: '2015-2017'}
         pred_year_cat = year_category_map[pred_year_cat]
         with torch.no_grad():
             output = resnet_18_trained(inp)
             body_type = round(torch.argmax(output, dim = 1).tolist()[0])
@@ -171,8 +182,10 @@ def detect(input_img):
         # Retrieve the CAM by passing the class index and the model output
             body_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
         heatmap_body = overlay_mask(cropped_image_pil, to_pil_image(body_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
         body_type_map = {
                 0: 'Hatchback',
                 1: 'SUV',
@@ -184,15 +197,20 @@ def detect(input_img):
                 7: 'Estate',
                 8: 'Unknown'}
         body_type = body_type_map[body_type]
         with torch.no_grad():
             features = features_mod(inp).view(1,-1)
         year_body_cat = pred_year_cat + '_' + body_type
         mean_features = loaded_dict[year_body_cat]
         cosin_sim = cosine_similarity(features, mean_features.unsqueeze(0)).item()
         cosin = round(float(cosin_sim), 2)
@@ -217,7 +235,7 @@ demo = gr.Interface(
              gr.Image(label="Heatmap for Production Year Category", type="pil"),
              gr.Image(label="Heatmap for Car Body Type Category", type="pil")],
     title='Car detection and Modernity and Typicality Scores of Cars',
-    description='Select one of the provided example images or upload your own image. The model will then search fo a car in the image. If one or more cars are being detected, the car with the most salient instance is being extracted, and its design modernity and typicality score are being calculated.',
     examples=[["Old_car.jpg"], ['Auto-ueberholt-Fahrrad-scaled.jpeg'], ['160923-nabu-autobahn-helge-may7.jpeg']]
 )

+# Import packages
 import pickle
 import torch
 import torch.nn as nn
 from torchvision import transforms, models
 from PIL import Image
+from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
 import numpy as np
 import pandas as pd
 from torchvision.transforms.functional import to_pil_image
 from sklearn.metrics.pairwise import cosine_similarity
+# Import specific Detectron2 packages
 from detectron2.engine import DefaultPredictor
 from detectron2.config import get_cfg
 from detectron2.utils.visualizer import Visualizer, ColorMode
 from detectron2.data import MetadataCatalog
 from detectron2 import model_zoo
+# Load detectron2 model and set device to CPU
 cfg = get_cfg()
 cfg.MODEL.DEVICE = "cpu"
 cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
     def forward(self, x):
         x = self.Linear1(x)
         return x
 class FeatureExtractor(nn.Module):
 # Load the trained classifier weights
 classifier.load_state_dict(torch.load('class_model_state.pt'))
+# Combine feature extractor and fully connected layer
 class CombinedModel(nn.Module):
     def __init__(self, feature_extractor, classifier):
         super(CombinedModel, self).__init__()
                                     transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
+                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # use mean and standard deviation computed for ImageNet dataset for normalization
+# Load bodytype model from Assignment 3 (only predicts 9 classes as already mentioned in Assignment 3)
 resnet_18_trained = torch.load("resnet_18_trained.pth")
 resnet_18_trained.eval()
+# Load feature extractor for the cosine similarity
 features_mod = nn.Sequential(*(list(resnet_18_trained.children())[:-1]))
 features_mod.eval()
+# Load morphs dictionary from Assignment 3
 with open('saved_dictionary.pkl', 'rb') as f:
     loaded_dict = pickle.load(f)
+# Detect function for the App Interface
 def detect(input_img):
+    outputs = predictor(input_img) # detect objects in image with detectron2 model
+    pred_classes = outputs["instances"].pred_classes # get predicted classes
+    pred_masks = outputs["instances"].pred_masks # get predicted masks for cropping the image
+    automobile_indices = (pred_classes == 2).nonzero(as_tuple=True)[0].tolist() # Filter out indices of car objects in the image (Based on what I have found in the internet cars do have the predicted class == 2)
+    if len(automobile_indices) > 0: # if there is any car
+        # Find car with the largest number of pixels
         car_index = max(automobile_indices, key=lambda i: pred_masks[i].sum())
+        # Get image coordinates of the largest car image
         coord = np.where(pred_masks[car_index].cpu().numpy())
         min_y, max_y = coord[0].min(), coord[0].max()
         min_x, max_x = coord[1].min(), coord[1].max()
+        # Crop the car from the image and define cropped mask
         mask = pred_masks[car_index].cpu().numpy()
         cropped_car = input_img[min_y:max_y+1, min_x:max_x+1]
         cropped_mask = mask[min_y:max_y+1, min_x:max_x+1]
+        # Create white background with dimensions of the cropped car image
         white_back = np.ones_like(cropped_car) * 255
+        # Put cropped car on the white background
         white_back[cropped_mask] = cropped_car[cropped_mask]
+        # Transform cropped image for model predictions
         cropped_image_pil = Image.fromarray(white_back.astype(np.uint8))
         inp = imgTransforms(cropped_image_pil).unsqueeze(0)
+        # Predict year category and get probabilites for modernity score calculation
         with torch.no_grad():
             out = combined_model(inp)
             prediction = F.softmax(out, dim=1)
             pred_year_cat = round(torch.argmax(prediction, dim = 1).tolist()[0])
         year_categories = torch.tensor([0,1,2,3,4], dtype=torch.float32).to(torch.device('cpu'))
         # Calculate modernity score out of test_outputs by multiplying the outputs probabilities with the according year categories
         modernity_sc = modernity_scores.tolist()[0]
         modernity = round(float(modernity_sc), 2)
+        #Create Year Category heatmap
         with SmoothGradCAMpp(combined_model) as cam_extractor:
         # Preprocess data and feed it to the model
             out = combined_model(inp)
         # Retrieve the CAM by passing the class index and the model output
             modernity_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
+        # Year Category heatmap
         heatmap_modernity = overlay_mask(cropped_image_pil, to_pil_image(modernity_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
+        # Mapping dictionary for year categories as defined in Assignment 3
         year_category_map = {
             0: '2000-2003',
             1: '2004-2008',
             2: '2009-2011',
             3: '2012-2014',
             4: '2015-2017'}
+        # Get year category from prediction
         pred_year_cat = year_category_map[pred_year_cat]
+        # Predict bodytype
         with torch.no_grad():
             output = resnet_18_trained(inp)
             body_type = round(torch.argmax(output, dim = 1).tolist()[0])
         # Retrieve the CAM by passing the class index and the model output
             body_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
+        # Body type heatmap
         heatmap_body = overlay_mask(cropped_image_pil, to_pil_image(body_activation_map[0].squeeze(0), mode='F'), alpha=0.6)
+        # Mapping dictionary for car body types as defined in Assignment 3
         body_type_map = {
                 0: 'Hatchback',
                 1: 'SUV',
                 7: 'Estate',
                 8: 'Unknown'}
+        # Get body type from prediction
         body_type = body_type_map[body_type]
+        # Extract features for cosine similarity
         with torch.no_grad():
             features = features_mod(inp).view(1,-1)
+        # Combine predicted year category and predicted body type to get key for morph dictionary
         year_body_cat = pred_year_cat + '_' + body_type
+        # Load mean features for specific year_body category
         mean_features = loaded_dict[year_body_cat]
+        # Calculate cosine similarity as typicality score
         cosin_sim = cosine_similarity(features, mean_features.unsqueeze(0)).item()
         cosin = round(float(cosin_sim), 2)
              gr.Image(label="Heatmap for Production Year Category", type="pil"),
              gr.Image(label="Heatmap for Car Body Type Category", type="pil")],
     title='Car detection and Modernity and Typicality Scores of Cars',
+    description='Select one of the provided example images or upload your own image. The model will then search fo a car in the image. If one or more cars are being detected, the car with the largest number of pixels is being extracted, and its design modernity and typicality scores are being calculated.',
     examples=[["Old_car.jpg"], ['Auto-ueberholt-Fahrrad-scaled.jpeg'], ['160923-nabu-autobahn-helge-may7.jpeg']]
 )