Spaces:

hohonu-vicml
/

Trailblazer

Runtime error

File size: 6,382 Bytes

85456ff

"""
"""
import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as nnf
import torchvision
import einops
import matplotlib.pyplot as plt
import scipy.stats as st
from PIL import Image, ImageFont, ImageDraw

plt.rcParams["figure.figsize"] = [
    float(v) * 1.5 for v in plt.rcParams["figure.figsize"]
]


class CrossAttnPainter:

    def __init__(self, bundle, pipe, root="/tmp"):
        self.dim = 64
        self.folder =

    def plot_frames(self):
        folder = "/tmp"
        from PIL import Image
        for i, f in enumerate(video_frames):
            img = Image.fromarray(f)
            filepath = os.path.join(folder, "recons.{:04d}.jpg".format(i))
            img.save(filepath)


    def plot_spatial_attn(self):

        arr = (
            pipe.unet.up_blocks[1]
            .attentions[0]
            .transformer_blocks[0]
            .attn2.processor.cross_attention_map
        )
        heads = pipe.unet.up_blocks[1].attentions[0].transformer_blocks[0].attn2.heads
        arr = torch.transpose(arr, 1, 3)
        arr = nnf.interpolate(arr, size=(64, 64), mode='bicubic', align_corners=False)
        arr = torch.transpose(arr, 1, 3)
        arr = arr.cpu().numpy()
        arr = arr.reshape(24, heads, 64, 64, 77)
        arr = arr.mean(axis=1)
        n = arr.shape[0]
        for i in range(n):
            filename = "/tmp/spatialca.{:04d}.jpg".format(i)
            plt.clf()
            plt.imshow(arr[i, :, :, 2], cmap="jet")
            plt.gca().set_axis_off()
            plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0,
                                hspace = 0, wspace = 0)
            plt.margins(0,0)
            plt.gca().xaxis.set_major_locator(plt.NullLocator())
            plt.gca().yaxis.set_major_locator(plt.NullLocator())
            plt.savefig(filename, bbox_inches = 'tight',pad_inches = 0)
            print(filename)

    def plot_temporal_attn(self):

        # arr = pipe.unet.mid_block.temp_attentions[0].transformer_blocks[0].attn2.processor.cross_attention_map
        import matplotlib.pyplot as plt
        import torch.nn.functional as nnf
        arr = (
            pipe.unet.up_blocks[2]
            .temp_attentions[1]
            .transformer_blocks[0]
            .attn2.processor.cross_attention_map
        )
        #arr = pipe.unet.transformer_in.transformer_blocks[0].attn2.processor.cross_attention_map
        arr = torch.transpose(arr, 0, 2).transpose(1, 3)
        arr = nnf.interpolate(arr, size=(64, 64), mode="bicubic", align_corners=False)
        arr = torch.transpose(arr, 0, 2).transpose(1, 3)
        arr = arr.cpu().numpy()
        n = arr.shape[-1]
        for i in range(n-2):
            filename = "/tmp/tempcaiip2.{:04d}.jpg".format(i)
            plt.clf()
            plt.imshow(arr[..., i+2, i], cmap="jet")
            plt.gca().set_axis_off()
            plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
            plt.margins(0, 0)
            plt.gca().xaxis.set_major_locator(plt.NullLocator())
            plt.gca().yaxis.set_major_locator(plt.NullLocator())
            plt.savefig(filename, bbox_inches="tight", pad_inches=0)
            print(filename)










def plot_latent_noise(latents, mode):

    for i in range(latents.shape[0]):
        tensor = latents[i].cpu()
        min_val = torch.min(tensor)
        max_val = torch.max(tensor)
        scale = 255 * (max_val - min_val)
        tensor = scale * (tensor - min_val)
        tensor = tensor.type(torch.int8)
        tensor = einops.rearrange(tensor, "c w h -> w h c")
        if mode == "RGB":
            tensor = tensor[...,:3]
            mode_ = "RGB"
        elif mode == "RGBA":
            mode_ = "RGBA"
            pass
        elif mode == "GRAY":
            tensor = tensor[...,0]
            mode_ = "L"

        x = tensor.numpy()

        img = Image.fromarray(x, mode_)
        img = img.resize((256, 256), resample=Image.NEAREST )
        filepath = f"/tmp/out.{i:04d}.jpg"
        img.save(filepath)

        tensor = latents[i].cpu()
        x = tensor.flatten().numpy()
        x /= x.max()
        plt.hist(x, density=True, bins=20, range=[-1, 1])
        mn, mx = plt.xlim()
        plt.xlim(mn, mx)
        kde_xs = np.linspace(mn, mx, 300)
        kde = st.gaussian_kde(x)
        plt.plot(kde_xs, kde.pdf(kde_xs), label="PDF")
        filepath = f"/tmp/hist.{i:04d}.jpg"
        plt.savefig(filepath)
        plt.clf()

        print(i)


def plot_activation(cross_attn, prompt, filepath="", plot_with_trailings=False, n_trailing=2):
    splitted_prompt = prompt.split(" ")
    n = len(splitted_prompt)
    start = 0
    arrs = []
    if plot_with_trailings:
        for j in range(n_trailing):
            arr = []
            for i in range(start, start + n):
                cross_attn_sliced = cross_attn[..., i + 1]
                arr.append(cross_attn_sliced.T)
            start += n
            arr = np.hstack(arr)
            arrs.append(arr)
        arrs = np.vstack(arrs).T
    else:
        arr = []
        for i in range(start, start + n):
            cross_attn_sliced = cross_attn[..., i + 1]
            arr.append(cross_attn_sliced)
        arrs = np.vstack(arr)
    plt.imshow(arrs, cmap="jet", vmin=0.0, vmax=.5)
    plt.title(prompt)
    if filepath:
        plt.savefig(filepath)
    else:
        plt.show()


def draw_dd_metadata(img, bbox, text="", target_res=1024):
    img = img.resize((target_res, target_res))
    image_editable = ImageDraw.Draw(img)

    for region in [bbox]:
        x0 = region[0] * target_res
        y0 = region[2] * target_res
        x1 = region[1] * target_res
        y1 = region[3] * target_res
        image_editable.rectangle(xy=[x0, y0, x1, y1], outline=(255, 0, 0, 255), width=5)
        if text:
            font = ImageFont.truetype("./assets/JetBrainsMono-Bold.ttf", size=13)
            image_editable.multiline_text(
                (15, 15),
                text,
                (255, 255, 255, 0),
                font=font,
                stroke_width=2,
                stroke_fill=(0, 0, 0, 255),
                spacing=0,
            )
    return img




























if __name__ == "__main__":
    latents = torch.load("assets/experiments/a-cat-sitting-on-a-car_230615-144611/latents.pt")
    plot_latent_noise(latents, "GRAY")