# Import packages
import pickle
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import numpy as np
import pandas as pd
import torch.nn.functional as F
from torchcam.methods import SmoothGradCAMpp
from torchcam.utils import overlay_mask
from torchvision.transforms.functional import to_pil_image
from sklearn.metrics.pairwise import cosine_similarity

# Import specific Detectron2 packages
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog
from detectron2 import model_zoo


# Load detectron2 model and set device to CPU
cfg = get_cfg()
cfg.MODEL.DEVICE = "cpu"
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

# Presteps to combine models from Assignment 3
class Classifier(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(Classifier, self).__init__()
        self.Linear1 = nn.Linear(input_channels, num_classes) # add Linear layer


    def forward(self, x):
        x = self.Linear1(x)
        
        return x

class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        resnet_18 = models.resnet18(pretrained=True)
        self.feature_extractor = nn.Sequential(*(list(resnet_18.children())[:-1]))

    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)  # Flatten the output
        return x


input_channels = 512  # Output size of the ResNet18 feature extractor
num_classes = 5  # Number of year range categories from Assignment 3

# Initialize the classifier
classifier = Classifier(input_channels, num_classes)

# Load the trained classifier weights
classifier.load_state_dict(torch.load('class_model_state.pt'))

# Combine feature extractor and fully connected layer
class CombinedModel(nn.Module):
    def __init__(self, feature_extractor, classifier):
        super(CombinedModel, self).__init__()
        self.feature_extractor = feature_extractor
        self.classifier = classifier

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

# Initialize the feature extractor
feature_extractor = FeatureExtractor()

# Combine the models and put model in evaluation mode for prediction later
combined_model = CombinedModel(feature_extractor, classifier)
combined_model.eval()

# Image transformer
imgTransforms = transforms.Compose([
                                    transforms.Resize(256),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) # use mean and standard deviation computed for ImageNet dataset for normalization

# Load bodytype model from Assignment 3 (only predicts 9 classes as already mentioned in Assignment 3)
resnet_18_trained = torch.load("resnet_18_trained.pth")
resnet_18_trained.eval()

# Load feature extractor for the cosine similarity 
features_mod = nn.Sequential(*(list(resnet_18_trained.children())[:-1]))
features_mod.eval()

# Load morphs dictionary from Assignment 3
with open('saved_dictionary.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

# Detect function for the App Interface
def detect(input_img):
    outputs = predictor(input_img) # detect objects in image with detectron2 model
    pred_classes = outputs["instances"].pred_classes # get predicted classes
    pred_masks = outputs["instances"].pred_masks # get predicted masks for cropping the image

    automobile_indices = (pred_classes == 2).nonzero(as_tuple=True)[0].tolist() # Filter out indices of car objects in the image (Based on what I have found in the internet cars do have the predicted class == 2)

    if len(automobile_indices) > 0: # if there is any car

        # Find car with the largest number of pixels
        car_index = max(automobile_indices, key=lambda i: pred_masks[i].sum())
        
        # Get image coordinates of the largest car image
        coord = np.where(pred_masks[car_index].cpu().numpy())
        min_y, max_y = coord[0].min(), coord[0].max()
        min_x, max_x = coord[1].min(), coord[1].max()
    
        # Crop the car from the image and define cropped mask
        mask = pred_masks[car_index].cpu().numpy()
        cropped_car = input_img[min_y:max_y+1, min_x:max_x+1]
        cropped_mask = mask[min_y:max_y+1, min_x:max_x+1]
    
        # Create white background with dimensions of the cropped car image
        white_back = np.ones_like(cropped_car) * 255
    
        # Put cropped car on the white background
        white_back[cropped_mask] = cropped_car[cropped_mask]

        # Transform cropped image for model predictions
        cropped_image_pil = Image.fromarray(white_back.astype(np.uint8))
        inp = imgTransforms(cropped_image_pil).unsqueeze(0)

        # Predict year category and get probabilites for modernity score calculation
        with torch.no_grad():
            out = combined_model(inp)
            prediction = F.softmax(out, dim=1)
            pred_year_cat = round(torch.argmax(prediction, dim = 1).tolist()[0])

        
        year_categories = torch.tensor([0,1,2,3,4], dtype=torch.float32).to(torch.device('cpu'))
        
        # Calculate modernity score out of test_outputs by multiplying the outputs probabilities with the according year categories
        modernity_scores = (prediction * year_categories).sum(dim=1)
        modernity_sc = modernity_scores.tolist()[0]
        modernity = round(float(modernity_sc), 2)

        #Create Year Category heatmap
        with SmoothGradCAMpp(combined_model) as cam_extractor:
        # Preprocess data and feed it to the model
            out = combined_model(inp)
        # Retrieve the CAM by passing the class index and the model output
            modernity_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
            
        # Year Category heatmap
        heatmap_modernity = overlay_mask(cropped_image_pil, to_pil_image(modernity_activation_map[0].squeeze(0), mode='F'), alpha=0.6)

        # Mapping dictionary for year categories as defined in Assignment 3
        year_category_map = {
            0: '2000-2003',
            1: '2004-2008',
            2: '2009-2011',
            3: '2012-2014',
            4: '2015-2017'}

        # Get year category from prediction
        pred_year_cat = year_category_map[pred_year_cat]

        # Predict bodytype
        with torch.no_grad():
            output = resnet_18_trained(inp)
            body_type = round(torch.argmax(output, dim = 1).tolist()[0])

        #Create bodytype heatmap
        with SmoothGradCAMpp(resnet_18_trained) as cam_extractor:
        # Preprocess your data and feed it to the model
            out = resnet_18_trained(inp)
        # Retrieve the CAM by passing the class index and the model output
            body_activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)

        # Body type heatmap
        heatmap_body = overlay_mask(cropped_image_pil, to_pil_image(body_activation_map[0].squeeze(0), mode='F'), alpha=0.6)

        # Mapping dictionary for car body types as defined in Assignment 3
        body_type_map = {
                0: 'Hatchback',
                1: 'SUV',
                2: 'MPV',
                3: 'Saloon',
                4: 'Convertible',
                5: 'Coupe',
                6: 'Pickup',
                7: 'Estate',
                8: 'Unknown'}

        # Get body type from prediction
        body_type = body_type_map[body_type]

        # Extract features for cosine similarity
        with torch.no_grad():
            features = features_mod(inp).view(1,-1)

        # Combine predicted year category and predicted body type to get key for morph dictionary
        year_body_cat = pred_year_cat + '_' + body_type

        # Load mean features for specific year_body category
        mean_features = loaded_dict[year_body_cat]

        # Calculate cosine similarity as typicality score
        cosin_sim = cosine_similarity(features, mean_features.unsqueeze(0)).item()
        cosin = round(float(cosin_sim), 2)

        year_body = f"Predicted Year Category:  {pred_year_cat},    Predicted body type:  {body_type}"

        
        return "Car found", white_back, year_body, modernity, cosin, heatmap_modernity, heatmap_body
    
    # If no car is found
    else:
        return "No car found", None, None, None, None, None, None
        

demo = gr.Interface(
    fn=detect,
    inputs=gr.Image(type="numpy", label="Upload an image"),
    outputs=[gr.Textbox(label = "Car detection"),
             gr.Image(label="Car Image", type = "pil"),
             gr.Textbox(label = "Predictions"),
             gr.Number(label="Design Modernity Score"),
             gr.Number(label="Typicality Score"),
             gr.Image(label="Heatmap for Production Year Category", type="pil"),
             gr.Image(label="Heatmap for Car Body Type Category", type="pil")],
    title='Car detection and Modernity and Typicality Scores of Cars',
    description='Select one of the provided example images or upload your own image. The model will then search fo a car in the image. If one or more cars are being detected, the car with the largest number of pixels is being extracted, and its design modernity and typicality scores are being calculated.',
    examples=[["Old_car.jpg"], ['Auto-ueberholt-Fahrrad-scaled.jpeg'], ['160923-nabu-autobahn-helge-may7.jpeg']]
)

demo.launch()