swin2mose (#1)

Browse files

- swin2mose: runnable version (22a2f9f37b55bf583c5a2e28910e41d8124be4c8)

Co-authored-by: Leonardo Rossi <hachreak@users.noreply.huggingface.co>

Files changed (7) hide show

.gitignore +1 -0
swin2_mose/libs.py +56 -0
swin2_mose/model.py +9 -12
swin2_mose/moe.py +3 -2
swin2_mose/run.py +36 -20
swin2_mose/utils.py +77 -56
swin2_mose/weights/config-70.yml +46 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pyc

swin2_mose/libs.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from torch import nn
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size,
+                     window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None,
+                 act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size,
+               W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(
+        -1, window_size, window_size, C)
+    return windows

swin2_mose/model.py CHANGED Viewed

@@ -1,10 +1,9 @@
 #
-# Source code: https://github.com/mv-lab/swin2sr
 #
-# -----------------------------------------------------------------------------------
-# Swin2SR: Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, https://arxiv.org/abs/2209.11345
-# Written by Conde and Choi et al.
-# -----------------------------------------------------------------------------------
 import math
 import numpy as np
@@ -14,7 +13,7 @@ import torch.nn.functional as F
 import torch.utils.checkpoint as checkpoint
 from timm.models.layers import DropPath, to_2tuple, trunc_normal_
-from utils import window_reverse, Mlp, window_partition
 from moe import MoE
@@ -746,9 +745,8 @@ class UpsampleOneStep(nn.Sequential):
-class Swin2SR(nn.Module):
-    r""" Swin2SR
-        A PyTorch impl of : `Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration`.
     Args:
         img_size (int | tuple(int)): Input image size. Default 64
@@ -784,8 +782,7 @@ class Swin2SR(nn.Module):
                  MoE_config=None,
                  use_rpe_bias=False,
                  **kwargs):
-        super(Swin2SR, self).__init__()
-        print('==== SWIN 2SR')
         num_in_ch = in_chans
         num_out_ch = in_chans
         num_feat = 64
@@ -1154,4 +1151,4 @@ class Swin2SR(nn.Module):
             flops += layer.flops()
         flops += H * W * 3 * self.embed_dim * self.embed_dim
         flops += self.upsample.flops()
-        return flops

 #
+# Source code: https://github.com/IMPLabUniPr/swin2-mose
 #
+# ----------------------------------------------------------------------------
+# https://arxiv.org/abs/2404.18924
+# ----------------------------------------------------------------------------
 import math
 import numpy as np
 import torch.utils.checkpoint as checkpoint
 from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+from libs import window_reverse, Mlp, window_partition
 from moe import MoE
+class Swin2MoSE(nn.Module):
+    r""" Swin2-MoSE
     Args:
         img_size (int | tuple(int)): Input image size. Default 64
                  MoE_config=None,
                  use_rpe_bias=False,
                  **kwargs):
+        super(Swin2MoSE, self).__init__()
         num_in_ch = in_chans
         num_out_ch = in_chans
         num_feat = 64
             flops += layer.flops()
         flops += H * W * 3 * self.embed_dim * self.embed_dim
         flops += self.upsample.flops()
+        return flops

swin2_mose/moe.py CHANGED Viewed

@@ -18,7 +18,8 @@ from torch.distributions.normal import Normal
 from copy import deepcopy
 import numpy as np
-from utils import Mlp as MLP
 class SparseDispatcher(object):
     """Helper for implementing a mixture of experts.
@@ -320,4 +321,4 @@ class MoE(nn.Module):
         expert_outputs = [self.experts[i](expert_inputs[i])
                           for i in range(self.num_experts)]
         y = dispatcher.combine(expert_outputs, cnn_combine=self.cnn_combine)
-        return y, loss

 from copy import deepcopy
 import numpy as np
+from libs import Mlp as MLP
 class SparseDispatcher(object):
     """Helper for implementing a mixture of experts.
         expert_outputs = [self.experts[i](expert_inputs[i])
                           for i in range(self.num_experts)]
         y = dispatcher.combine(expert_outputs, cnn_combine=self.cnn_combine)
+        return y, loss

swin2_mose/run.py CHANGED Viewed

@@ -1,20 +1,36 @@
-import torch
-from model import Swin2SR
-model_weights = "model-70.pt"
-model_params = {
-    "upscale": 2,
-    "in_chans": 4,
-    "img_size": 64,
-    "window_size": 16,
-    "img_range": 1.,
-    "depths": [6, 6, 6, 6],
-    "embed_dim": 90,
-    "num_heads": [6, 6, 6, 6],
-    "mlp_ratio": 2,
-    "upsampler": "pixelshuffledirect",
-    "resi_connection": "1conv"
-}
-sr_model = Swin2SR(**model_params)
-sr_model.load_state_dict(torch.load(model_weights))

+import benchmark
+import matplotlib.pyplot as plt
+import opensr_test
+from utils import load_swin2_mose, load_config, run_swin2_mose
+path = 'swin2_mose/weights/config-70.yml'
+model_weights = "swin2_mose/weights/model-70.pt"
+index = 2
+# load config
+cfg = load_config(path)
+# load model
+model = load_swin2_mose(model_weights, cfg)
+# load the dataset
+dataset = opensr_test.load("venus")
+lr_dataset, hr_dataset = dataset["L2A"], dataset["HRharm"]
+results = run_swin2_mose(model, lr_dataset[index], hr_dataset[index])
+# Display the results
+fig, ax = plt.subplots(1, 3, figsize=(10, 5))
+ax[0].imshow(results['lr'].numpy().transpose(1, 2, 0)/3000)
+ax[0].set_title("LR")
+ax[0].axis("off")
+ax[1].imshow(results["sr"].detach().numpy().transpose(1, 2, 0)/3000)
+ax[1].set_title("SR")
+ax[1].axis("off")
+ax[2].imshow(results['hr'].numpy().transpose(1, 2, 0) / 3000)
+ax[2].set_title("HR")
+#  plt.show()
+# Run the experiment
+benchmark.create_geotiff(model, run_swin2_mose, "all", "swin2mose/")

swin2_mose/utils.py CHANGED Viewed

@@ -1,56 +1,77 @@
-from torch import nn
-def window_reverse(windows, window_size, H, W):
-    """
-    Args:
-        windows: (num_windows*B, window_size, window_size, C)
-        window_size (int): Window size
-        H (int): Height of image
-        W (int): Width of image
-    Returns:
-        x: (B, H, W, C)
-    """
-    B = int(windows.shape[0] / (H * W / window_size / window_size))
-    x = windows.view(B, H // window_size, W // window_size, window_size,
-                     window_size, -1)
-    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
-    return x
-class Mlp(nn.Module):
-    def __init__(self, in_features, hidden_features=None, out_features=None,
-                 act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-def window_partition(x, window_size):
-    """
-    Args:
-        x: (B, H, W, C)
-        window_size (int): window size
-    Returns:
-        windows: (num_windows*B, window_size, window_size, C)
-    """
-    B, H, W, C = x.shape
-    x = x.view(B, H // window_size, window_size,
-               W // window_size, window_size, C)
-    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(
-        -1, window_size, window_size, C)
-    return windows

+import torch
+import yaml
+from model import Swin2MoSE
+def to_shape(t1, t2):
+    t1 = t1[None].repeat(t2.shape[0], 1)
+    t1 = t1.view((t2.shape[:2] + (1, 1)))
+    return t1
+def norm(tensor, mean, std):
+    # get stats
+    mean = torch.tensor(mean).to(tensor.device)
+    std = torch.tensor(std).to(tensor.device)
+    # denorm
+    return (tensor - to_shape(mean, tensor)) / to_shape(std, tensor)
+def denorm(tensor, mean, std):
+    # get stats
+    mean = torch.tensor(mean).to(tensor.device)
+    std = torch.tensor(std).to(tensor.device)
+    # denorm
+    return (tensor * to_shape(std, tensor)) + to_shape(mean, tensor)
+def load_config(path):
+    # load config
+    with open(path, 'r') as f:
+        cfg = yaml.safe_load(f)
+    return cfg
+def load_swin2_mose(model_weights, cfg):
+    # load checkpoint
+    checkpoint = torch.load(model_weights)
+    # build model
+    sr_model = Swin2MoSE(**cfg['super_res']['model'])
+    sr_model.load_state_dict(
+        checkpoint['model_state_dict'])
+    sr_model.cfg = cfg
+    return sr_model
+def run_swin2_mose(model, lr, hr):
+    cfg = model.cfg
+    # norm fun
+    hr_stats = cfg['dataset']['stats']['tensor_05m_b2b3b4b8']
+    lr_stats = cfg['dataset']['stats']['tensor_10m_b2b3b4b8']
+    # select 10m lr bands: B02, B03, B04, B08 and hr bands
+    lr_orig = torch.tensor(lr)[None].float()[:, [3, 2, 1, 7]]
+    hr_orig = torch.tensor(hr)[None].float()
+    # normalize data
+    lr = norm(lr_orig, mean=lr_stats['mean'], std=lr_stats['std'])
+    hr = norm(hr_orig, mean=hr_stats['mean'], std=hr_stats['std'])
+    # predict a image
+    sr = model(lr)
+    if not torch.is_tensor(sr):
+        sr, _ = sr
+    # denorm sr
+    sr = denorm(sr, mean=hr_stats['mean'], std=hr_stats['std'])
+    return {
+        "lr": lr_orig[0],
+        "sr": sr[0],
+        "hr": hr_orig[0],
+    }

swin2_mose/weights/config-70.yml ADDED Viewed

	@@ -0,0 +1,46 @@

+dataset:
+  root_path: data/sen2venus
+  stats:
+    use_minmax: true
+    tensor_05m_b2b3b4b8: {
+      mean: [444.21923828125, 715.9031372070312, 813.4345703125, 2604.867919921875],
+      std: [279.85552978515625, 385.3569641113281, 648.458984375, 796.9918212890625],
+      min: [-1025.0, -3112.0, -5122.0, -3851.0],
+      max: [14748.0, 14960.0, 16472.0, 16109.0]
+    }
+    tensor_10m_b2b3b4b8: {
+      mean: [443.78643798828125, 715.4202270507812, 813.0512084960938, 2602.813232421875],
+      std: [283.89276123046875, 389.26361083984375, 651.094970703125, 811.5682373046875],
+      min: [-848.0, -902.0, -946.0, -323.0],
+      max: [19684.0, 17982.0, 17064.0, 15958.0]
+    }
+  hr_name: tensor_05m_b2b3b4b8
+  lr_name: tensor_10m_b2b3b4b8
+  collate_fn: mods.v3.collate_fn
+  denorm: mods.v3.uncollate_fn
+  printable: mods.v3.printable
+super_res: {
+  version: 'v2',
+  model: {
+    upscale: 2,
+    use_lepe: true,
+    use_cpb_bias: false,
+    use_rpe_bias: true,
+    mlp_ratio: 1,
+    MoE_config: {
+      k: 2,
+      num_experts: 8,
+      with_noise: false,
+      with_smart_merger: v1,
+    },
+    depths: [6, 6, 6, 6],
+    embed_dim: 90,
+    img_range: 1.,
+    img_size: 64,
+    in_chans: 4,
+    num_heads: [6, 6, 6, 6],
+    resi_connection: 1conv,
+    upsampler: pixelshuffledirect,
+    window_size: 16,
+  }
+}