CLIP_as_RNN / utils /visualize.py
Kevin Sun
init commit
6cd90b7
# coding=utf-8
# Copyright 2024 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Visualization functions."""
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import torch
# pylint: disable=g-importing-member
from utils.utils import normalize
_VIS_HEIGHT = 512
_VIS_WIDTH = 512
def show_cam_on_image(img, mask):
if img.shape[1] != mask.shape[1]:
mask = cv2.resize(mask, (img.shape[1], img.shape[0]))
heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
heatmap = np.float32(heatmap) / 255
cam = heatmap + np.float32(img)
cam = cam / np.max(cam)
cam = np.uint8(255 * cam)
return cam
def save_img(array, img_name):
numpy_array = array.astype(np.uint8)
image = Image.fromarray(numpy_array, mode="RGB")
image.save(f"{img_name}.png")
def viz_attn(img, attn_map, prefix="vis_results/clipcam_img", img_name="cam"):
"""Visualize attention map."""
num_masks = 1
if len(attn_map.shape) == 3:
num_masks = attn_map.shape[0]
attn_map = attn_map.float().squeeze(1).detach().cpu().numpy()
attn_map = normalize(attn_map)
img = normalize(img)
if num_masks == 1:
vis = show_cam_on_image(img, attn_map)
if not os.path.exists(prefix):
os.makedirs(prefix)
save_img(vis, os.path.join(prefix, f"{img_name}"))
return vis
for i in range(num_masks):
vis = show_cam_on_image(img, attn_map[i])
if not os.path.exists(prefix):
os.makedirs(prefix)
save_img(vis, os.path.join(prefix, f"{img_name}_{i}"))
def vis_mask(mask, gt_mask, img, output_dir, fname):
"""Visualize mask."""
mask_img = torch.zeros((_VIS_WIDTH, _VIS_HEIGHT))
mask_img[mask[0]] = 1
# print(gt_mask.shape, img.size())
# Assume img and gt_mask are also torch.Tensor with size (512, 512)
img = img[0].permute(1, 2, 0).numpy()
gt_mask_img = torch.zeros((_VIS_WIDTH, _VIS_HEIGHT))
gt_mask_img[gt_mask[0]] = 1
_, axs = plt.subplots(
1, 3, figsize=(15, 5)
) # change the figsize if necessary
axs[0].imshow(img) # if image is grayscale, otherwise remove cmap argument
axs[0].axis("off")
axs[0].set_title("Original Image")
axs[1].imshow(
mask_img.numpy(), cmap="jet", alpha=0.5
) # using alpha for transparency
axs[1].axis("off")
axs[1].set_title("Mask")
axs[2].imshow(
gt_mask_img.numpy(), cmap="jet", alpha=0.5
) # using alpha for transparency
axs[2].axis("off")
axs[2].set_title("Ground Truth Mask")
plt.savefig(
os.path.join(output_dir, f"{fname}.jpg"),
bbox_inches="tight",
dpi=300,
pad_inches=0.0,
)