Spaces:

samuelstevens
/

saev-image-classification

Running

App Files Files Community

Samuel Stevens commited on 21 days ago

Commit

c0b4385

1 Parent(s): 29d1b06

Updates based on lab feedback

Browse files

Files changed (2) hide show

app.py +140 -89
requirements.txt +5 -5

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import functools
 import io
 import json
@@ -6,11 +7,11 @@ import math
 import os
 import pathlib
 import random
 import beartype
 import einops.layers.torch
 import gradio as gr
-import matplotlib
 import numpy as np
 import open_clip
 import requests
@@ -36,7 +37,7 @@ DEBUG = False
 n_sae_latents = 5
 """Number of SAE latents to show."""
-n_sae_examples = 4
 """Number of SAE examples per latent to show."""
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -52,14 +53,44 @@ max_frequency = 1e-1
 """Maximum frequency. Any feature that fires more than this is ignored."""
 CWD = pathlib.Path(__file__).parent
 r2_url = "https://pub-289086e849214430853bc87bd8964988.r2.dev/"
-colormap = matplotlib.colormaps.get_cmap("plasma")
 logger.info("Set global constants.")
 ###########
 # Helpers #
 ###########
@@ -102,26 +133,31 @@ def get_dataset_img(i: int) -> Image.Image:
 @beartype.beartype
-def make_img(
-    img: Image.Image,
-    patches: Float[Tensor, " n_patches"],
-    *,
-    upper: int | None = None,
-) -> Image.Image:
-    # Resize to 256x256 and crop to 224x224
-    resize_size_px = (512, 512)
-    resize_w_px, resize_h_px = resize_size_px
-    crop_size_px = (448, 448)
-    crop_w_px, crop_h_px = crop_size_px
-    crop_coords_px = (
-        (resize_w_px - crop_w_px) // 2,
-        (resize_h_px - crop_h_px) // 2,
-        (resize_w_px + crop_w_px) // 2,
-        (resize_h_px + crop_h_px) // 2,
-    )
-    img = img.resize(resize_size_px).crop(crop_coords_px)
-    img = add_highlights(img, patches.numpy(), upper=upper, opacity=0.5)
-    return img
 ##########
@@ -209,7 +245,7 @@ logger.info("Loaded SAE.")
 ############
 human_transform = transforms.Compose([
-    transforms.Resize((448,), interpolation=transforms.InterpolationMode.BICUBIC),
     transforms.CenterCrop((448, 448)),
     transforms.ToTensor(),
     einops.layers.torch.Rearrange("channels width height -> width height channels"),
@@ -226,7 +262,7 @@ with open(CWD / "data" / "image_fpaths.json") as fd:
 with open(CWD / "data" / "image_labels.json") as fd:
-    image_labels = json.load(fd)
 logger.info("Loaded all datasets.")
@@ -256,40 +292,41 @@ mask = mask & (sparsity < max_frequency)
 @beartype.beartype
-def get_image(image_i: int) -> list[Image.Image | int]:
-    image = get_dataset_img(image_i)
-    image = human_transform(image)
-    return [
-        Image.fromarray((image * 255).to(torch.uint8).numpy()),
-        image_labels[image_i],
-    ]
 @beartype.beartype
-def get_random_class_image(cls: int) -> Image.Image:
-    indices = [i for i, tgt in enumerate(image_labels) if tgt == cls]
     i = random.choice(indices)
-    image = get_dataset_img(i)
-    image = human_transform(image)
-    return Image.fromarray((image * 255).to(torch.uint8).numpy())
 @torch.inference_mode
-def get_sae_examples(
-    image_i: int, patches: list[int]
-) -> list[None | Image.Image | int]:
     """
     Given a particular cell, returns some highlighted images showing what feature fires most on this cell.
     """
     if not patches:
-        return [None] * n_sae_latents * n_sae_examples + [-1] * n_sae_latents
     logger.info("Getting SAE examples for patches %s.", patches)
-    img = get_dataset_img(image_i)
-    x = vit_transform(img)[None, ...].to(device)
-    x_BPD = split_vit.forward_start(x)
     # Need to add 1 to account for [CLS] token.
     vit_acts_MD = x_BPD[0, [p + 1 for p in patches]].to(device)
@@ -299,15 +336,19 @@ def get_sae_examples(
     latents = torch.argsort(f_x_S, descending=True).cpu()
     latents = latents[mask[latents]][:n_sae_latents].tolist()
-    images = []
     for latent in latents:
-        img_patch_pairs, seen_i_im = [], set()
         for i_im, values_p in zip(top_img_i[latent].tolist(), top_values[latent]):
             if i_im in seen_i_im:
                 continue
             example_img = get_dataset_img(i_im)
-            img_patch_pairs.append((example_img, values_p))
             seen_i_im.add(i_im)
         # How to scale values.
@@ -315,17 +356,24 @@ def get_sae_examples(
         if top_values[latent].numel() > 0:
             upper = top_values[latent].max().item()
-        latent_images = [
-            make_img(img, patches.to(float), upper=upper)
-            for img, patches in img_patch_pairs[:n_sae_examples]
-        ]
-        while len(latent_images) < n_sae_examples:
-            latent_images += [None]
-        images.extend(latent_images)
-    return images + latents
 @torch.inference_mode
@@ -434,7 +482,8 @@ def add_highlights(
     overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
     draw = ImageDraw.Draw(overlay)
-    colors = (colormap(patches / (upper + 1e-9))[:, :3] * 255).astype(np.uint8)
     for p, (val, color) in enumerate(zip(patches, colors)):
         assert upper is not None
@@ -458,43 +507,45 @@ def add_highlights(
 with gr.Blocks() as demo:
-    image_number = gr.Number(label="Test Example", precision=0)
-    class_number = gr.Number(label="Test Class", precision=0)
-    input_image = gr.Image(label="Input Image")
-    get_input_image_btn = gr.Button(value="Get Input Image")
-    get_input_image_btn.click(
-        get_image,
-        inputs=[image_number],
-        outputs=[input_image, class_number],
-        api_name="get-image",
     )
     get_random_class_image_btn = gr.Button(value="Get Random Class Image")
-    get_input_image_btn.click(
-        get_random_class_image,
-        inputs=[image_number],
-        outputs=[input_image],
-        api_name="get-random-class-image",
     )
     patch_numbers = gr.CheckboxGroup(
         label="Image Patch", choices=list(range(n_patches_per_img))
     )
-    top_latent_numbers = gr.CheckboxGroup(label="Top Latents")
-    top_latent_numbers = [
-        gr.Number(label=f"Top Latents #{j + 1}", precision=0)
-        for j in range(n_sae_latents)
-    ]
-    sae_example_images = [
-        gr.Image(label=f"Latent #{j}, Example #{i + 1}")
-        for i in range(n_sae_examples)
-        for j in range(n_sae_latents)
-    ]
-    get_sae_examples_btn = gr.Button(value="Get SAE Examples")
-    get_sae_examples_btn.click(
-        get_sae_examples,
-        inputs=[image_number, patch_numbers],
-        outputs=sae_example_images + top_latent_numbers,
-        api_name="get-sae-examples",
         concurrency_limit=16,
     )
@@ -502,7 +553,7 @@ with gr.Blocks() as demo:
     get_pred_dist_btn = gr.Button(value="Get Pred. Distribution")
     get_pred_dist_btn.click(
         get_pred_dist,
-        inputs=[image_number],
         outputs=[pred_dist],
         api_name="get-preds",
     )
@@ -514,7 +565,7 @@ with gr.Blocks() as demo:
     get_modified_dist_btn = gr.Button(value="Get Modified Label")
     get_modified_dist_btn.click(
         get_modified_dist,
-        inputs=[image_number, patch_numbers] + latent_numbers + value_sliders,
         outputs=[pred_dist],
         api_name="get-modified",
         concurrency_limit=16,

+import base64
 import functools
 import io
 import json
 import os
 import pathlib
 import random
+import typing
 import beartype
 import einops.layers.torch
 import gradio as gr
 import numpy as np
 import open_clip
 import requests
 n_sae_latents = 5
 """Number of SAE latents to show."""
+n_latent_examples = 4
 """Number of SAE examples per latent to show."""
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 """Maximum frequency. Any feature that fires more than this is ignored."""
 CWD = pathlib.Path(__file__).parent
+"""Current working directory."""
 r2_url = "https://pub-289086e849214430853bc87bd8964988.r2.dev/"
 logger.info("Set global constants.")
+@beartype.beartype
+class Example(typing.TypedDict):
+    """Represents an example image and its associated label.
+    Used to store examples of SAE latent activations for visualization.
+    """
+    orig_url: str
+    """The URL or path to access the original example image."""
+    highlighted_url: typing.NotRequired[str]
+    """The URL or path to access the SAE-highlighted image."""
+    target: int
+    """Class ID."""
+@beartype.beartype
+class SaeLatent(typing.TypedDict):
+    """Represents a single SAE latent."""
+    latent: int
+    """The index of the SAE latent being measured."""
+    highlighted_url: str
+    """The image with the colormaps applied."""
+    examples: list[Example]
+    """Top examples for this latent."""
 ###########
 # Helpers #
 ###########
 @beartype.beartype
+def to_sized(img: Image.Image) -> Image.Image:
+    # Copied from contrib/classification/transforms.py:for_webapp()
+    w, h = img.size
+    if w > h:
+        resize_w = int(w * 512 / h)
+        resize_px = (resize_w, 512)
+        margin_x = (resize_w - 448) // 2
+        crop_px = (margin_x, 32, 448 + margin_x, 480)
+    else:
+        resize_h = int(h * 512 / w)
+        resize_px = (512, resize_h)
+        margin_y = (resize_h - 448) // 2
+        crop_px = (32, margin_y, 480, 448 + margin_y)
+    return img.resize(resize_px, resample=Image.Resampling.BICUBIC).crop(crop_px)
+@beartype.beartype
+def img_to_base64(img: Image.Image) -> str:
+    buf = io.BytesIO()
+    img.save(buf, format="webp", lossless=True)
+    b64 = base64.b64encode(buf.getvalue())
+    s64 = b64.decode("utf8")
+    return "data:image/webp;base64," + s64
 ##########
 ############
 human_transform = transforms.Compose([
+    transforms.Resize(512, interpolation=transforms.InterpolationMode.BICUBIC),
     transforms.CenterCrop((448, 448)),
     transforms.ToTensor(),
     einops.layers.torch.Rearrange("channels width height -> width height channels"),
 with open(CWD / "data" / "image_labels.json") as fd:
+    img_labels = json.load(fd)
 logger.info("Loaded all datasets.")
 @beartype.beartype
+def get_img(img_i: int) -> Example:
+    img = get_dataset_img(img_i)
+    img = human_transform(img)
+    return {
+        "orig_url": img_to_base64(Image.fromarray((img * 255).to(torch.uint8).numpy())),
+        "target": img_labels[img_i],
+    }
 @beartype.beartype
+def get_random_class_img(cls: int) -> Example:
+    indices = [i for i, tgt in enumerate(img_labels) if tgt == cls]
     i = random.choice(indices)
+    img = get_dataset_img(i)
+    img = human_transform(img)
+    return {
+        "orig_url": img_to_base64(Image.fromarray((img * 255).to(torch.uint8).numpy())),
+        "target": cls,
+    }
 @torch.inference_mode
+def get_sae_latents(img_i: int, patches: list[int]) -> list[SaeLatent]:
     """
     Given a particular cell, returns some highlighted images showing what feature fires most on this cell.
     """
     if not patches:
+        return []
     logger.info("Getting SAE examples for patches %s.", patches)
+    img = get_dataset_img(img_i)
+    x_BCWH = vit_transform(img)[None, ...].to(device)
+    x_BPD = split_vit.forward_start(x_BCWH)
     # Need to add 1 to account for [CLS] token.
     vit_acts_MD = x_BPD[0, [p + 1 for p in patches]].to(device)
     latents = torch.argsort(f_x_S, descending=True).cpu()
     latents = latents[mask[latents]][:n_sae_latents].tolist()
+    sae_latents = []
     for latent in latents:
+        intermediates, seen_i_im = [], set()
         for i_im, values_p in zip(top_img_i[latent].tolist(), top_values[latent]):
             if i_im in seen_i_im:
                 continue
             example_img = get_dataset_img(i_im)
+            intermediates.append({
+                "img": example_img,
+                "patches": values_p,
+                "target": img_labels[i_im],
+            })
             seen_i_im.add(i_im)
         # How to scale values.
         if top_values[latent].numel() > 0:
             upper = top_values[latent].max().item()
+        examples = []
+        for intermediate in intermediates[:n_latent_examples]:
+            img_sized = to_sized(intermediate["img"])
+            examples.append({
+                "orig_url": img_to_base64(img_sized),
+                "highlighted_url": img_to_base64(
+                    add_highlights(
+                        img_sized,
+                        intermediate["patches"].to(float).numpy(),
+                        upper=upper,
+                    )
+                ),
+                "target": intermediate["target"],
+            })
+        sae_latents.append({"latent": latent, "examples": examples})
+    return sae_latents
 @torch.inference_mode
     overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
     draw = ImageDraw.Draw(overlay)
+    colors = np.zeros((len(patches), 3), dtype=np.uint8)
+    colors[:, 0] = ((patches / (upper + 1e-9)) * 255).astype(np.uint8)
     for p, (val, color) in enumerate(zip(patches, colors)):
         assert upper is not None
 with gr.Blocks() as demo:
+    ###########
+    # get-img #
+    ###########
+    # Inputs
+    number = gr.Number(label="Number", precision=0)
+    # Outputs
+    json_out = gr.JSON(label="get_img_out", value={})
+    get_img_btn = gr.Button(value="Get Input Image")
+    get_img_btn.click(
+        get_img,
+        inputs=[number],
+        outputs=[json_out],
+        api_name="get-img",
     )
+    ########################
+    # get-random-class-img #
+    ########################
     get_random_class_image_btn = gr.Button(value="Get Random Class Image")
+    get_img_btn.click(
+        get_random_class_img,
+        inputs=[number],
+        outputs=[json_out],
+        api_name="get-random-class-img",
     )
     patch_numbers = gr.CheckboxGroup(
         label="Image Patch", choices=list(range(n_patches_per_img))
     )
+    get_sae_latents_btn = gr.Button(value="Get SAE Examples")
+    get_sae_latents_btn.click(
+        get_sae_latents,
+        inputs=[number, patch_numbers],
+        outputs=json_out,
+        api_name="get-sae-latents",
         concurrency_limit=16,
     )
     get_pred_dist_btn = gr.Button(value="Get Pred. Distribution")
     get_pred_dist_btn.click(
         get_pred_dist,
+        inputs=[number],
         outputs=[pred_dist],
         api_name="get-preds",
     )
     get_modified_dist_btn = gr.Button(value="Get Modified Label")
     get_modified_dist_btn.click(
         get_modified_dist,
+        inputs=[number, patch_numbers] + latent_numbers + value_sliders,
         outputs=[pred_dist],
         api_name="get-modified",
         concurrency_limit=16,

requirements.txt CHANGED Viewed

@@ -47,7 +47,7 @@ contourpy==1.3.1
     # via matplotlib
 cycler==0.12.1
     # via matplotlib
-datasets==3.3.0
     # via saev
 dill==0.3.8
     # via
@@ -131,7 +131,7 @@ jsonschema-specifications==2024.10.1
     # via jsonschema
 kiwisolver==1.4.8
     # via matplotlib
-marimo==0.11.5
     # via saev
 markdown==3.7
     # via
@@ -155,7 +155,7 @@ multidict==6.1.0
     #   yarl
 multiprocess==0.70.16
     # via datasets
-narwhals==1.26.0
     # via
     #   altair
     #   marimo
@@ -297,7 +297,7 @@ ruff==0.9.6
     # via
     #   gradio
     #   marimo
-saev @ git+https://github.com/samuelstevens/saev@928cb62084e88118e792ff6fc8cc043ec250f0ff
     # via saev-image-classification (pyproject.toml)
 safehttpx==0.1.6
     # via gradio
@@ -352,7 +352,7 @@ tqdm==4.67.1
     #   saev
 triton==3.2.0
     # via torch
-typeguard==4.4.1
     # via tyro
 typer==0.15.1
     # via gradio

     # via matplotlib
 cycler==0.12.1
     # via matplotlib
+datasets==3.3.1
     # via saev
 dill==0.3.8
     # via
     # via jsonschema
 kiwisolver==1.4.8
     # via matplotlib
+marimo==0.11.6
     # via saev
 markdown==3.7
     # via
     #   yarl
 multiprocess==0.70.16
     # via datasets
+narwhals==1.27.1
     # via
     #   altair
     #   marimo
     # via
     #   gradio
     #   marimo
+saev @ git+https://github.com/samuelstevens/saev@298cabdb6b771c76b402d0fdddab6907d1941d7a
     # via saev-image-classification (pyproject.toml)
 safehttpx==0.1.6
     # via gradio
     #   saev
 triton==3.2.0
     # via torch
+typeguard==4.4.2
     # via tyro
 typer==0.15.1
     # via gradio