Spaces:

yeq6x
/

TripletGeoEncoder-demo

Sleeping

App Files Files Community

yeq6x commited on Nov 13, 2024

Commit

19d010a

1 Parent(s): 2e99060

keypoints

Browse files

Files changed (4) hide show

app.py +128 -86
dataset.py +78 -1
resources/DataList.json +0 -0
utils.py +90 -0

app.py CHANGED Viewed

@@ -1,74 +1,92 @@
 import gradio as gr
-import spaces
 import torch
 import torch.nn.functional as F
 from torch.utils.data import DataLoader
 import matplotlib.pyplot as plt
 from model_module import AutoencoderModule
-from dataset import MyDataset, load_filenames
 import numpy as np
 from PIL import Image
 import base64
 from io import BytesIO
 # モデルとデータの読み込み
-def load_model():
-    model_path = "checkpoints/autoencoder-epoch=49-train_loss=1.01.ckpt"
-    feature_dim = 64
     model = AutoencoderModule(feature_dim=feature_dim)
     state_dict = torch.load(model_path)
-    # # state_dict のキーを修正
-    # new_state_dict = {}
-    # for key in state_dict:
-    #     new_key = "model." + key
-    #     new_state_dict[new_key] = state_dict[key]
-    model.load_state_dict(state_dict['state_dict'])
-    model.eval()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     print("Model loaded successfully.")
     return model, device
-def load_data(device, img_dir="resources/trainB/", image_size=112, batch_size=32):
     filenames = load_filenames(img_dir)
     train_X = filenames[:1000]
     train_ds = MyDataset(train_X, img_dir=img_dir, img_size=image_size)
-    train_loader = DataLoader(
-        train_ds,
-        batch_size=batch_size,
-        shuffle=True,
-        num_workers=0,
-    )
     iterator = iter(train_loader)
     x, _, _ = next(iterator)
     x = x.to(device)
     x = x[:,0].to(device)
     print("Data loaded successfully.")
     return x
-model, device = load_model()
-image_size = 112
-batch_size = 32
-x = load_data(device)
-# アップロード画像の前処理
-def preprocess_uploaded_image(uploaded_image, image_size):
-    # ndarrayの場合はPILイメージに変換
-    if type(uploaded_image) == np.ndarray:
-        uploaded_image = Image.fromarray(uploaded_image)
-    uploaded_image = uploaded_image.convert("RGB")
-    uploaded_image = uploaded_image.resize((image_size, image_size))
-    uploaded_image = np.array(uploaded_image).transpose(2, 0, 1) / 255.0
-    uploaded_image = torch.tensor(uploaded_image, dtype=torch.float32).unsqueeze(0).to(device)
-    return uploaded_image
 # ヒートマップの生成関数
-@spaces.GPU
-def get_heatmaps(source_num, x_coords, y_coords, uploaded_image):
     if type(uploaded_image) == str:
         uploaded_image = Image.open(uploaded_image)
     if type(source_num) == str:
@@ -77,60 +95,68 @@ def get_heatmaps(source_num, x_coords, y_coords, uploaded_image):
         x_coords = int(x_coords)
     if type(y_coords) == str:
         y_coords = int(y_coords)
-    with torch.no_grad():
-        dec5, _ = model(x)
-        img = x
-        feature_map = dec5
-        batch_size = feature_map.size(0)
-        feature_dim = feature_map.size(1)
-        # アップロード画像の前処理
-        if uploaded_image is not None:
-            uploaded_image = preprocess_uploaded_image(uploaded_image['composite'], image_size)
-            target_feature_map, _ = model(uploaded_image)
-            img = torch.cat((img, uploaded_image))
-            feature_map = torch.cat((feature_map, target_feature_map))
-            batch_size += 1
-        else:
-            uploaded_image = torch.zeros(1, 3, image_size, image_size, device=device)
-        target_num = batch_size - 1
-        x_coords = [x_coords] * batch_size
-        y_coords = [y_coords] * batch_size
-        vectors = feature_map[torch.arange(feature_map.size(0)), :, y_coords, x_coords]
-        vector = vectors[source_num]
-        reshaped_feature_map = feature_map.permute(0, 2, 3, 1).view(feature_map.size(0), -1, feature_dim)
-        batch_distance_map = F.pairwise_distance(reshaped_feature_map, vector).view(feature_map.size(0), image_size, image_size)
-        norm_batch_distance_map = 1 / torch.cosh(20 * (batch_distance_map - batch_distance_map.min()) / (batch_distance_map.max() - batch_distance_map.min())) ** 2
-        source_map = norm_batch_distance_map[source_num]
-        target_map = norm_batch_distance_map[target_num]
-        alpha = 0.7
-        blended_source = (1 - alpha) * img[source_num] + alpha * torch.cat(((norm_batch_distance_map[source_num] / norm_batch_distance_map[source_num].max()).unsqueeze(0), torch.zeros(2, image_size, image_size, device=device)))
-        blended_target = (1 - alpha) * img[target_num] + alpha * torch.cat(((norm_batch_distance_map[target_num] / norm_batch_distance_map[target_num].max()).unsqueeze(0), torch.zeros(2, image_size, image_size, device=device)))
-        # Matplotlibでプロットして画像として保存
-        fig, axs = plt.subplots(2, 2, figsize=(10, 10))
-        axs[0, 0].imshow(source_map.cpu(), cmap='hot')
-        axs[0, 0].set_title("Source Map")
-        axs[0, 1].imshow(target_map.cpu(), cmap='hot')
-        axs[0, 1].set_title("Target Map")
-        axs[1, 0].imshow(blended_source.permute(1, 2, 0).cpu())
-        axs[1, 0].set_title("Blended Source")
-        axs[1, 1].imshow(blended_target.permute(1, 2, 0).cpu())
-        axs[1, 1].set_title("Blended Target")
-        for ax in axs.flat:
-            ax.axis('off')
-        plt.tight_layout()
-        plt.close(fig)
-        return fig
 with gr.Blocks() as demo:
     # title
@@ -142,9 +168,24 @@ with gr.Blocks() as demo:
                 "The blended source and target images show the source and target images with the source and target maps overlaid, respectively. "
                 "For further information, please contact me on X (formerly Twitter): @Yeq6X.")
     input_image = gr.ImageEditor(label="Cropped Image", elem_id="input_image", crop_size=(112, 112), show_fullscreen_button=True)
-    gr.Interface(
         get_heatmaps,
         inputs=[
             gr.Slider(0, batch_size - 1, step=1, label="Source Image Index"),
@@ -152,8 +193,9 @@ with gr.Blocks() as demo:
             gr.Slider(0, image_size - 1, step=1, value=image_size // 2, label="Y Coordinate"),
             input_image
         ],
-        outputs="plot",
         live=True,
     )
     # examples
     gr.Markdown("# Examples")

 import gradio as gr
 import torch
 import torch.nn.functional as F
 from torch.utils.data import DataLoader
 import matplotlib.pyplot as plt
 from model_module import AutoencoderModule
 import numpy as np
 from PIL import Image
 import base64
 from io import BytesIO
+import dataset
+from dataset import MyDataset, ImageKeypointDataset, load_filenames, load_keypoints
+import utils
+try:
+    import spaces
+except ImportError:
+    print("Spaces is not installed.")
+image_size = 112
+batch_size = 32
 # モデルとデータの読み込み
+def load_model(model_path="checkpoints/autoencoder-epoch=49-train_loss=1.01.ckpt", feature_dim=64):
     model = AutoencoderModule(feature_dim=feature_dim)
     state_dict = torch.load(model_path)
+    if "state_dict" in state_dict:
+        model.load_state_dict(state_dict['state_dict'])
+        model.eval()
+    else:
+        # state_dict のキーを修正
+        new_state_dict = {}
+        for key in state_dict:
+            new_key = "model." + key
+            new_state_dict[new_key] = state_dict[key]
+        model.load_state_dict(new_state_dict)
+        model.eval()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     print("Model loaded successfully.")
     return model, device
+def load_data(device, img_dir="resources/trainB/", image_size=112, batch_size=256):
     filenames = load_filenames(img_dir)
     train_X = filenames[:1000]
     train_ds = MyDataset(train_X, img_dir=img_dir, img_size=image_size)
+    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=0)
     iterator = iter(train_loader)
     x, _, _ = next(iterator)
     x = x.to(device)
     x = x[:,0].to(device)
     print("Data loaded successfully.")
     return x
+def load_keypoints(device, img_dir="resources/trainB/", image_size=112, batch_size=32):
+    filenames = load_filenames(img_dir)
+    train_X = filenames[:1000]
+    keypoints = dataset.load_keypoints('resources/DataList.json')
+    image_points_ds = ImageKeypointDataset(train_X, keypoints, img_dir='resources/trainB/', img_size=image_size)
+    image_points_loader = DataLoader(image_points_ds, batch_size=batch_size, shuffle=False)
+    iterator = iter(image_points_loader)
+    test_imgs, points = next(iterator)
+    test_imgs = test_imgs.to(device)
+    points = points.to(device)*(image_size)
+    print("Keypoints loaded successfully.")
+    return test_imgs, points
 # ヒートマップの生成関数
+try:
+    @spaces.GPU
+    def get_heatmaps(source_num, x_coords, y_coords, uploaded_image):
+        return _get_heatmaps(source_num, x_coords, y_coords, uploaded_image)
+except:
+    def get_heatmaps(source_num, x_coords, y_coords, uploaded_image):
+        return _get_heatmaps(source_num, x_coords, y_coords, uploaded_image)
+def _get_heatmaps(source_num, x_coords, y_coords, uploaded_image):
     if type(uploaded_image) == str:
         uploaded_image = Image.open(uploaded_image)
     if type(source_num) == str:
         x_coords = int(x_coords)
     if type(y_coords) == str:
         y_coords = int(y_coords)
+    dec5, _ = model(x)
+    feature_map = dec5
+    # アップロード画像の前処理
+    if uploaded_image is not None:
+        uploaded_image = utils.preprocess_uploaded_image(uploaded_image['composite'], image_size)
+    else:
+        uploaded_image = torch.zeros(1, 3, image_size, image_size, device=device)
+    target_feature_map, _ = model(uploaded_image)
+    img = torch.cat((x, uploaded_image))
+    feature_map = torch.cat((feature_map, target_feature_map))
+    source_map, target_map, blended_source, blended_target = utils.get_heatmaps(img, feature_map, source_num, x_coords, y_coords, uploaded_image)
+    keypoint_maps, blended_tensors = utils.get_keypoint_heatmaps(target_feature_map, mean_vector_list, points.size(1), uploaded_image)
+    # Matplotlibでプロットして画像として保存
+    fig, axs = plt.subplots(2, 3, figsize=(10, 6))
+    axs[0, 0].imshow(source_map, cmap='hot')
+    axs[0, 0].set_title("Source Map")
+    axs[0, 1].imshow(target_map, cmap='hot')
+    axs[0, 1].set_title("Target Map")
+    axs[0, 2].imshow(keypoint_maps[0], cmap='hot')
+    axs[0, 2].set_title("Keypoint Map")
+    axs[1, 0].imshow(blended_source.permute(1, 2, 0))
+    axs[1, 0].set_title("Blended Source")
+    axs[1, 1].imshow(blended_target.permute(1, 2, 0))
+    axs[1, 1].set_title("Blended Target")
+    axs[1, 2].imshow(blended_tensors[0].permute(1, 2, 0))
+    axs[1, 2].set_title("Blended Keypoint")
+    for ax in axs.flat:
+        ax.axis('off')
+    plt.tight_layout()
+    plt.close(fig)
+    return fig
+def setup(model_dict, input_image=None):
+    global model, device, x, test_imgs, points, mean_vector_list
+    # str -> dictに変換
+    if type(model_dict) == str:
+        model_dict = eval(model_dict)
+    model_name = model_dict["name"]
+    feature_dim = model_dict["feature_dim"]
+    model_path = f"checkpoints/{model_name}"
+    model, device = load_model(model_path, feature_dim)
+    x = load_data(device)
+    test_imgs, points = load_keypoints(device)
+    feature_map, _ = model(test_imgs)
+    mean_vector_list = utils.get_mean_vector(feature_map, points)
+    if input_image is not None:
+        fig = get_heatmaps(0, image_size // 2, image_size // 2, input_image)
+        return fig
+models = [{"name": "ae_model_tf_2024-03-05_00-35-21.pth", "feature_dim": 32},
+        {"name": "autoencoder-epoch=09-train_loss=1.00.ckpt", "feature_dim": 64},
+        {"name": "autoencoder-epoch=29-train_loss=1.01.ckpt", "feature_dim": 64},
+        {"name": "autoencoder-epoch=49-train_loss=1.01.ckpt", "feature_dim": 64}]
+setup(models[0])
 with gr.Blocks() as demo:
     # title
                 "The blended source and target images show the source and target images with the source and target maps overlaid, respectively. "
                 "For further information, please contact me on X (formerly Twitter): @Yeq6X.")
+    gr.Markdown("## Heatmap Visualization")
     input_image = gr.ImageEditor(label="Cropped Image", elem_id="input_image", crop_size=(112, 112), show_fullscreen_button=True)
+    output_plot = gr.Plot(value=None, elem_id="output_plot", show_label=False)
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                model_name = gr.Dropdown(
+                    choices=[str(model) for model in models],
+                    container=False
+                )
+            load_button = gr.Button("Load Model")
+            load_button.click(setup, inputs=[model_name, input_image], outputs=[output_plot])
+        with gr.Row():
+            pass
+    inference = gr.Interface(
         get_heatmaps,
         inputs=[
             gr.Slider(0, batch_size - 1, step=1, label="Source Image Index"),
             gr.Slider(0, image_size - 1, step=1, value=image_size // 2, label="Y Coordinate"),
             input_image
         ],
+        outputs=output_plot,
         live=True,
+        flagging_mode="never"
     )
     # examples
     gr.Markdown("# Examples")

dataset.py CHANGED Viewed

@@ -4,15 +4,48 @@ from torchvision import transforms
 import random
 from PIL import Image
 import os
 from utils import RandomAffineAndRetMat
 def load_filenames(data_dir):
   img_exts = ['.jpg', '.jpeg', '.png', '.bmp', '.ppm', '.pgm', '.tif', '.tiff']
   filenames = [f for f in os.listdir(data_dir) if os.path.splitext(f)[1].lower() in img_exts]
   return filenames
 class MyDataset:
   def __init__(self, X, valid=False, img_dir='resources/trainB/', img_size=256):
@@ -64,4 +97,48 @@ class MyDataset:
     X = torch.stack(xlist)
     mat = torch.stack(matlist)
-    return X, mat, f

 import random
 from PIL import Image
 import os
+import pandas as pd
 from utils import RandomAffineAndRetMat
 def load_filenames(data_dir):
+  # 画像の拡張子のみ
   img_exts = ['.jpg', '.jpeg', '.png', '.bmp', '.ppm', '.pgm', '.tif', '.tiff']
   filenames = [f for f in os.listdir(data_dir) if os.path.splitext(f)[1].lower() in img_exts]
   return filenames
+def load_keypoints(label_path):
+  label_data = pd.read_json(label_path)
+  label_data = label_data.sort_index()
+  tmp_points = []
+  for o in label_data.data[0:1000]:
+    tmps = []
+    for i in range(60):
+      tmps.append(o['points'][str(i)]['x'])
+      tmps.append(o['points'][str(i)]['y'])
+    tmp_points.append(tmps) # datanum
+  df_points = pd.DataFrame(tmp_points)
+  df_points = df_points.iloc[:,[
+      *list(range(0,16*2+1,4)), *list(range(1,16*2+2,4)),
+      *list(range(27*2,36*2+1,4)), *list(range(27*2+1,36*2+2,4)),
+      *list(range(37*2,46*2+1,4)), *list(range(37*2+1,46*2+2,4)),
+  #     49*2, 49*2+1,
+  #     *list(range(50*2,55*2+1,4)), *list(range(50*2+1,55*2+2,4)),
+      28*2, 28*2+1,
+      30*2, 30*2+1,
+      34*2, 34*2+1,
+      38*2, 38*2+1,
+      40*2, 40*2+1,
+      44*2, 44*2+1,
+  ]]
+  df_points = df_points.sort_index(axis=1)
+  df_points.columns = list(range(len(df_points.columns)))
+  # df_points[0:500].iloc[0]
+  return df_points
 class MyDataset:
   def __init__(self, X, valid=False, img_dir='resources/trainB/', img_size=256):
     X = torch.stack(xlist)
     mat = torch.stack(matlist)
+    return X, mat, f
+class ImageKeypointDataset:
+  def __init__(self, X, y, valid=False, img_dir='resources/trainB/', img_size=256):
+    self.X = X
+    self.y = y
+    self.valid = valid
+    self.img_dir = img_dir
+    self.img_size = img_size
+    # if not valid:
+    trans = [
+              transforms.Resize(self.img_size),
+              transforms.ToTensor(),
+              # transforms.Normalize(mean=means, std=stds),
+              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+              # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)
+    ]
+    self.trans = transforms.Compose(trans)
+  def __len__(self):
+    return len(self.X)
+  def __getitem__(self, index):
+    if type(index) is slice:
+      if index.step is None:
+        return (torch.stack([self.get_one_X(i) for i in range(index.start, index.stop)]),
+                torch.stack([self.get_one_y(i) for i in range(index.start, index.stop)]))
+      else:
+        return (torch.stack([self.get_one_X(i) for i in range(index.start, index.stop, index.step)]),
+                torch.stack([self.get_one_y(i) for i in range(index.start, index.stop, index.step)]))
+    if type(index) is int:
+      return self.get_one_X(index), self.get_one_y(index)
+  def get_one_X(self, index):
+    f = self.img_dir + self.X[index]
+    X = Image.open(f)
+    X = self.trans(X)
+    return X
+  def get_one_y(self, index):
+    y = self.y.iloc[index].copy()
+    y = torch.tensor(y)
+    y = y.float()
+    y = y.reshape(25,2)
+    return y

resources/DataList.json ADDED Viewed

The diff for this file is too large to render. See raw diff

utils.py CHANGED Viewed

@@ -77,3 +77,93 @@ class RandomAffineAndRetMat(torch.nn.Module):
     affine_matrix = translation_matrix.mm(rotation_matrix).mm(scaling_matrix).mm(shearing_matrix)
     return affine_matrix

     affine_matrix = translation_matrix.mm(rotation_matrix).mm(scaling_matrix).mm(shearing_matrix)
     return affine_matrix
+def norm_img(img):
+  return (img-img.min())/(img.max()-img.min())
+def preprocess_uploaded_image(uploaded_image, image_size):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # ndarrayの場合はPILイメージに変換
+    if type(uploaded_image) == np.ndarray:
+        uploaded_image = Image.fromarray(uploaded_image)
+    uploaded_image = uploaded_image.convert("RGB")
+    uploaded_image = uploaded_image.resize((image_size, image_size))
+    uploaded_image = np.array(uploaded_image).transpose(2, 0, 1) / 255.0
+    uploaded_image = torch.tensor(uploaded_image, dtype=torch.float32).unsqueeze(0).to(device)
+    return uploaded_image
+def get_heatmaps(img, feature_map, source_num, x_coords, y_coords, uploaded_image):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    image_size = img.size(2)
+    batch_size = feature_map.size(0)
+    feature_dim = feature_map.size(1)
+    target_num = batch_size - 1
+    x_coords = [x_coords] * batch_size
+    y_coords = [y_coords] * batch_size
+    vectors = feature_map[torch.arange(feature_map.size(0)), :, y_coords, x_coords]
+    vector = vectors[source_num]
+    reshaped_feature_map = feature_map.permute(0, 2, 3, 1).view(feature_map.size(0), -1, feature_dim)
+    batch_distance_map = F.pairwise_distance(reshaped_feature_map, vector).view(feature_map.size(0), image_size, image_size)
+    norm_batch_distance_map = 1 / torch.cosh(20 * (batch_distance_map - batch_distance_map.min()) / (batch_distance_map.max() - batch_distance_map.min())) ** 2
+    source_map = norm_batch_distance_map[source_num].detach().cpu()
+    target_map = norm_batch_distance_map[target_num].detach().cpu()
+    alpha = 0.7
+    blended_source = (1 - alpha) * img[source_num] + alpha * torch.cat(((norm_batch_distance_map[source_num] / norm_batch_distance_map[source_num].max()).unsqueeze(0), torch.zeros(2, image_size, image_size, device=device)))
+    blended_target = (1 - alpha) * img[target_num] + alpha * torch.cat(((norm_batch_distance_map[target_num] / norm_batch_distance_map[target_num].max()).unsqueeze(0), torch.zeros(2, image_size, image_size, device=device)))
+    blended_source = blended_source.detach().cpu()
+    blended_target = blended_target.detach().cpu()
+    return source_map, target_map, blended_source, blended_target
+def get_mean_vector(feature_map, points):
+  keypoints_size = points.size(1)
+  mean_vector_list = []
+  for i in range(keypoints_size):
+      x_coords, y_coords = torch.round(points[:,i].t()).to(torch.long)
+      vectors = feature_map[torch.arange(feature_map.size(0)), :, y_coords, x_coords]  # 1次元ベクトルに合わせてサイズを調整
+      # mean_vector = vectors[0:10].mean(0) # 10個の特徴マップの平均ベクトルを取得
+      mean_vector = vectors.mean(0)
+      mean_vector_list.append(mean_vector)
+  return mean_vector_list
+def get_keypoint_heatmaps(feature_map, mean_vector_list, keypoints_size, imgs):
+  if len(feature_map.size()) == 3:
+    feature_map = feature_map.unsqueeze(0)
+  device = feature_map.device
+  batch_size = feature_map.size(0)
+  feature_dim = feature_map.size(1)
+  size = feature_map.size(2)
+  norm_batch_distance_map = torch.zeros(batch_size,size,size,device=device)
+  for i in range(keypoints_size):
+      vector = mean_vector_list[i]
+      reshaped_feature_map = feature_map.permute(0, 2, 3, 1).view(feature_map.size(0), -1, feature_dim)
+      batch_distance_map = F.pairwise_distance(reshaped_feature_map, vector).view(feature_map.size(0), size, size)
+      batch_distance_map = 1/torch.cosh( 40*(batch_distance_map-batch_distance_map.min())
+                                              /(batch_distance_map.max()-batch_distance_map.min()) )**2
+      # 正規化
+      m = batch_distance_map/batch_distance_map.max(1).values.max(1).values.unsqueeze(0).unsqueeze(0).repeat(112,112,1).permute(2,0,1)
+      norm_batch_distance_map += m
+  # 1以上を消す
+  norm_batch_distance_map = (-F.relu(-norm_batch_distance_map+1)+1)
+  keypoint_maps = norm_batch_distance_map.detach().cpu()
+  alpha = 0.8  # Transparency factor for the heatmap overlay
+  blended_tensors = (1 - alpha) * imgs + alpha * torch.cat(
+    (norm_batch_distance_map.unsqueeze(1), torch.zeros(batch_size,2,size,size,device=device)),
+    dim=1
+    )
+  blended_tensors = norm_img(blended_tensors).detach().cpu()
+  return keypoint_maps, blended_tensors