# https://tree.rocks/get-heatmap-from-cnn-convolution-neural-network-aka-grad-cam-222e08f57a34 import cv2, os, torch, re import matplotlib.pyplot as plt from scipy.ndimage import zoom import numpy as np from model import MakiAlexNet from tqdm import tqdm # from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions TOP_ACCURACY_PERCENTILE = 10 TEST_IMAGE = "dataset/root/train/left1_frame_10.jpg" MODEL_PARAMS = "alexnet_cognitive.pth" GIF_STORE = "dataset/gifs/" TRAIN_STORE = "dataset/root/train/" model = MakiAlexNet() model.load_state_dict(torch.load(MODEL_PARAMS)) model.eval() # Make model run on cuda if available. if torch.cuda.is_available(): model = model.cuda() print("Running on cuda") print(dir(model)) for name, module in model.named_modules(): # Print the layer name print(name) def extract_file_paths(filename): """With aid from https://regex101.com/, regex.""" extractor_reg = r"(left|right)([0-9]+)(_frame_)([0-9]+)" result = re.search(extractor_reg, filename) frame_no = result.group(4) frame_name = result.group(1) video_no = result.group(2) return frame_no, frame_name, video_no def create_mp4_from_frames(file_name, frames): """Generate MP4/GIF file with the collection of frames given with a duration of 2000 msec. """ print("Sorted frames: ", sorted(frames)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') height, width, _ = cv2.imread(frames[0]).shape fps = 20 # Adjust the frames per second (FPS) as needed video_path = os.path.join(os.getcwd(), "dataset", "gifs", f"{file_name}.mp4") video = cv2.VideoWriter(video_path, fourcc, fps, (width, height)) for frame_path in sorted(frames): # Convert BRG to RGB image = cv2.imread(frame_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # if image.dtype != np.uint8: # image = (image * 255).astype(np.uint8) # Convert to uint8 video.write(image) # Release the VideoWriter video.release() current_video_name = None selected_frames = [] # stores matrices for the GIF generation. for image_filename in ["left1_frame_5.jpg"]: # tqdm(sorted(os.listdir(TRAIN_STORE)), desc="Running Images"): # : frame_no, frame_name, video_no = extract_file_paths(image_filename) obtained_video_name = video_no+"vid"+frame_name if current_video_name != obtained_video_name: # We have a new video sequence, so save current sequences and name if selected_frames: filename = f"{current_video_name}" # Create gif from the frames. if current_video_name: create_mp4_from_frames(filename, selected_frames) # Clear frames and hand off to new handle. selected_frames = [] current_video_name = obtained_video_name # With the number and name of the file paths, we can then determine which should be part of the specific GIF file. # f"frame_no,fileno,video_no.gif" img = cv2.imread(os.path.join(TRAIN_STORE, image_filename)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.unsqueeze(torch.tensor(img.astype(np.float32)), 0) # Convert image to tensor with float32, and extended batch size dimension. (Batch, Channel, W,H) X = torch.einsum("BWHC->BCWH", img) if torch.cuda.is_available(): X = X.cuda() output = model(X) # print(output) # print(model.layer_outputs) conv = model.layer_outputs['Conv2d'] conv = torch.einsum("BCWH->BWHC", conv).cpu().detach().numpy() # print(conv.shape) # torch.Size([1, 256, 12, 12]) # conv = conv.squeeze(0) # print(conv.shape) # torch.Size([256, 12, 12]) scale = 224 / 12 # 256x5x5 after this additional. plt.figure(figsize=(16, 16)) total_mat = None for i in range(256): plt.subplot(16, 16, i + 1) plt.imshow(img.squeeze(0)) plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3) plt.show() # wait for user to press a key # mat = zoom(conv[0, :, :, i], zoom=(scale, scale)) # threshold = np.percentile(mat.flatten(), TOP_ACCURACY_PERCENTILE) # # The Lower threshold is to zero, the more specific the look is shown. # # mask = mat > threshold # # OR: filter_map = np.where(filter_map <= threshold, 0, filter_map) # # # Rescale remaining values (adjust new_range if needed) # new_range = 1 # Adjust based on your desired final range # filter_map = np.where(mask, (mat - threshold) / (mat.max() - threshold) * new_range, 0) # # # I just add all the maps together, which is really noisy. # if type(total_mat) != type(None): # total_mat += filter_map # else: # total_mat = filter_map # # # Normalize based on largest value, # # Store this image in a collection, in which a GIF will be made, that lasts at least 2 seconds. # total_mat = total_mat / abs(np.max(total_mat)) # # # image = img.squeeze(0) # .detach().numpy().astype(np.float32) # # # plt.imshow(plt.imread(os.path.join(os.getcwd(), "dataset/root/train", image_filename))) # full path needed # plt.imshow(total_mat, cmap='jet', alpha=0.3) # # # selected_frames.append() # filename = frame_name+frame_no+video_no+".jpg" # file_path = os.path.join(os.getcwd(), "dataset/gifs/raw/", filename) # plt.savefig(file_path) # selected_frames.append(file_path) exit() # plt.figure(figsize=(16, 16)) # for i in range(36): # plt.subplot(6, 6, i + 1) # plt.imshow(cv2.imread(TEST_IMAGE)) # plt.imshow(zoom(conv[0, :,:,i], zoom=(scale, scale)), cmap='jet', alpha=0.3) # # plt.show()