Spaces:

amazinghaha
/

demo

Sleeping

App Files Files Community

amazinghaha commited on Mar 19

Commit

b4d6f1e

•

1 Parent(s): 5c4b9bd

Upload 106 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model/CoordAttention.py +110 -0
model/Vision_Transformer_with_mask.py +990 -0
model/__pycache__/CoordAttention.cpython-38.pyc +0 -0
model/__pycache__/Vision_Transformer_with_mask.cpython-38.pyc +0 -0
model/__pycache__/features.cpython-38.pyc +0 -0
model/__pycache__/helpers.cpython-38.pyc +0 -0
model/__pycache__/hub.cpython-38.pyc +0 -0
model/__pycache__/registry.cpython-38.pyc +0 -0
model/features.py +284 -0
model/helpers.py +508 -0
model/hub.py +96 -0
model/layers/__init__.py +40 -0
model/layers/__pycache__/__init__.cpython-38.pyc +0 -0
model/layers/__pycache__/activations.cpython-38.pyc +0 -0
model/layers/__pycache__/activations_jit.cpython-38.pyc +0 -0
model/layers/__pycache__/activations_me.cpython-38.pyc +0 -0
model/layers/__pycache__/adaptive_avgmax_pool.cpython-38.pyc +0 -0
model/layers/__pycache__/blur_pool.cpython-38.pyc +0 -0
model/layers/__pycache__/bottleneck_attn.cpython-38.pyc +0 -0
model/layers/__pycache__/cbam.cpython-38.pyc +0 -0
model/layers/__pycache__/classifier.cpython-38.pyc +0 -0
model/layers/__pycache__/cond_conv2d.cpython-38.pyc +0 -0
model/layers/__pycache__/config.cpython-38.pyc +0 -0
model/layers/__pycache__/conv2d_same.cpython-38.pyc +0 -0
model/layers/__pycache__/conv_bn_act.cpython-38.pyc +0 -0
model/layers/__pycache__/create_act.cpython-38.pyc +0 -0
model/layers/__pycache__/create_attn.cpython-38.pyc +0 -0
model/layers/__pycache__/create_conv2d.cpython-38.pyc +0 -0
model/layers/__pycache__/create_norm_act.cpython-38.pyc +0 -0
model/layers/__pycache__/drop.cpython-38.pyc +0 -0
model/layers/__pycache__/eca.cpython-38.pyc +0 -0
model/layers/__pycache__/evo_norm.cpython-38.pyc +0 -0
model/layers/__pycache__/gather_excite.cpython-38.pyc +0 -0
model/layers/__pycache__/global_context.cpython-38.pyc +0 -0
model/layers/__pycache__/halo_attn.cpython-38.pyc +0 -0
model/layers/__pycache__/helpers.cpython-38.pyc +0 -0
model/layers/__pycache__/inplace_abn.cpython-38.pyc +0 -0
model/layers/__pycache__/involution.cpython-38.pyc +0 -0
model/layers/__pycache__/lambda_layer.cpython-38.pyc +0 -0
model/layers/__pycache__/linear.cpython-38.pyc +0 -0
model/layers/__pycache__/mixed_conv2d.cpython-38.pyc +0 -0
model/layers/__pycache__/mlp.cpython-38.pyc +0 -0
model/layers/__pycache__/non_local_attn.cpython-38.pyc +0 -0
model/layers/__pycache__/norm.cpython-38.pyc +0 -0
model/layers/__pycache__/norm_act.cpython-38.pyc +0 -0
model/layers/__pycache__/padding.cpython-38.pyc +0 -0
model/layers/__pycache__/patch_embed.cpython-38.pyc +0 -0
model/layers/__pycache__/pool2d_same.cpython-38.pyc +0 -0
model/layers/__pycache__/selective_kernel.cpython-38.pyc +0 -0
model/layers/__pycache__/separable_conv.cpython-38.pyc +0 -0

model/CoordAttention.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class h_sigmoid(nn.Module):
+    def __init__(self, inplace=True):
+        super(h_sigmoid, self).__init__()
+        self.relu = nn.ReLU6(inplace=inplace)
+    def forward(self, x):
+        return self.relu(x + 3) / 6
+class h_swish(nn.Module):
+    def __init__(self, inplace=True):
+        super(h_swish, self).__init__()
+        self.sigmoid = h_sigmoid(inplace=inplace)
+    def forward(self, x):
+        return x * self.sigmoid(x)
+class CoordAtt(nn.Module):
+    def __init__(self, inp, oup, reduction=32):
+        super(CoordAtt, self).__init__()
+        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
+        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
+        mip = max(8, inp // reduction)
+        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
+        self.bn1 = nn.BatchNorm2d(mip)
+        self.bn2 = nn.BatchNorm2d(1)
+        self.bn3 = nn.BatchNorm2d(1)
+        self.act = h_swish()
+        self.bn4 = nn.BatchNorm2d(mip)
+        self.bn5 = nn.BatchNorm2d(mip)
+        self.bn6 = nn.BatchNorm2d(1)
+        self.bn7 = nn.BatchNorm2d(1)
+        self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
+        self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        x = torch.unsqueeze(x, 1) #2 1 2304 196
+        identity = x
+        n, c, h, w = x.size()#2 1 2304 196
+        x_h = self.bn2(self.pool_h(x))#2 1 2304 1
+        x_w = self.bn3(self.pool_w(x).permute(0, 1, 3, 2)) #2 1 196 1
+        identity_x_w = x_w
+        identity_x_h = x_h
+        y = torch.cat([x_h, x_w], dim=2)
+        y = self.conv1(y) #2 8 2500 1
+        y = self.bn1(y)
+        y = self.act(y)
+        x_h, x_w = torch.split(y, [h, w], dim=2) #2 8 2304 1 | 2 8 196 1
+        x_h = self.bn4(x_h)+identity_x_h
+        x_w = self.bn5(x_w)+identity_x_w
+        x_w = x_w.permute(0, 1, 3, 2)
+        a_h = self.bn6(self.conv_h(x_h)).sigmoid() #2 1 2304 1
+        a_w = self.bn7(self.conv_w(x_w)).sigmoid() #24 1 1 196
+        out = identity * a_w * a_h #点×
+        out = torch.squeeze(out, 1)
+        return out
+class CoordAtt_ori(nn.Module):
+    def __init__(self, inp, oup, reduction=32):
+        super(CoordAtt_ori, self).__init__()
+        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
+        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
+        mip = max(8, inp // reduction)
+        self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
+        self.bn1 = nn.BatchNorm2d(mip)
+        self.act = h_swish()
+        self.conv_h = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
+        self.conv_w = nn.Conv2d(mip, oup, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        x = torch.unsqueeze(x, 1)
+        identity = x
+        n, c, h, w = x.size()
+        x_h = self.pool_h(x)
+        x_w = self.pool_w(x).permute(0, 1, 3, 2)
+        y = torch.cat([x_h, x_w], dim=2)
+        y = self.conv1(y)
+        y = self.bn1(y)
+        y = self.act(y)
+        x_h, x_w = torch.split(y, [h, w], dim=2)
+        x_w = x_w.permute(0, 1, 3, 2)
+        a_h = self.conv_h(x_h).sigmoid()
+        a_w = self.conv_w(x_w).sigmoid()
+        out = identity * a_w * a_h
+        out = torch.squeeze(out, 1)
+        return out

model/Vision_Transformer_with_mask.py ADDED Viewed

	@@ -0,0 +1,990 @@

+""" Vision Transformer (ViT) in PyTorch
+A PyTorch implement of Vision Transformers as described in:
+'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale'
+    - https://arxiv.org/abs/2010.11929
+`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers`
+    - https://arxiv.org/abs/2106.10270
+The official jax code is released and available at https://github.com/google-research/vision_transformer
+DeiT model defs and weights from https://github.com/facebookresearch/deit,
+paper `DeiT: Data-efficient Image Transformers` - https://arxiv.org/abs/2012.12877
+Acknowledgments:
+* The paper authors for releasing code and weights, thanks!
+* I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch ... check it out
+for some einops/einsum fun
+* Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
+* Bert reference code checks against Huggingface Transformers and Tensorflow Bert
+Hacked together by / Copyright 2021 Ross Wightman
+"""
+import math
+import logging
+from functools import partial
+from collections import OrderedDict
+from copy import deepcopy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .layers import PatchEmbed, Mlp, DropPath, to_2tuple, trunc_normal_
+from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
+from .helpers import build_model_with_cfg, named_apply, adapt_input_conv
+from .layers import PatchEmbed, Mlp, DropPath, trunc_normal_, lecun_normal_
+from .registry import register_model
+_logger = logging.getLogger(__name__)
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic', 'fixed_input_size': True,
+        'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,
+        'first_conv': 'patch_embed.proj', 'classifier': 'head',
+        **kwargs
+    }
+default_cfgs = {
+    # patch models (weights from official Google JAX impl)
+    'vit_tiny_patch16_224': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'),
+    'vit_tiny_patch16_384': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    'vit_small_patch32_224': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'),
+    'vit_small_patch32_384': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    'vit_small_patch16_224': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'),
+    'vit_small_patch16_384': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    'vit_base_patch32_224': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz'),
+    'vit_base_patch32_384': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    'vit_base_patch16_224': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npz'),
+    'vit_base_patch16_384': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    'vit_large_patch32_224': _cfg(
+        url='',  # no official model weights for this combo, only for in21k
+        ),
+    'vit_large_patch32_384': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pth',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    'vit_large_patch16_224': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npz'),
+    'vit_large_patch16_384': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/'
+            'L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz',
+        input_size=(3, 384, 384), crop_pct=1.0),
+    # patch models, imagenet21k (weights from official Google JAX impl)
+    'vit_tiny_patch16_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npz',
+        num_classes=21843),
+    'vit_small_patch32_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npz',
+        num_classes=21843),
+    'vit_small_patch16_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npz',
+        num_classes=21843),
+    'vit_base_patch32_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npz',
+        num_classes=21843),
+    'vit_base_patch16_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npz',
+        num_classes=21843),
+    'vit_large_patch32_224_in21k': _cfg(
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_patch32_224_in21k-9046d2e7.pth',
+        num_classes=21843),
+    'vit_large_patch16_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npz',
+        num_classes=21843),
+    'vit_huge_patch14_224_in21k': _cfg(
+        url='https://storage.googleapis.com/vit_models/imagenet21k/ViT-H_14.npz',
+        hf_hub='timm/vit_huge_patch14_224_in21k',
+        num_classes=21843),
+    # deit models (FB weights)
+    'deit_tiny_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
+    'deit_small_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_small_patch16_224-cd65a155.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
+    'deit_base_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
+    'deit_base_patch16_384': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_base_patch16_384-8de9b5d1.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, input_size=(3, 384, 384), crop_pct=1.0),
+    'deit_tiny_distilled_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_tiny_distilled_patch16_224-b40b3cf7.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, classifier=('head', 'head_dist')),
+    'deit_small_distilled_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_small_distilled_patch16_224-649709d9.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, classifier=('head', 'head_dist')),
+    'deit_base_distilled_patch16_224': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_224-df68dfff.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, classifier=('head', 'head_dist')),
+    'deit_base_distilled_patch16_384': _cfg(
+        url='https://dl.fbaipublicfiles.com/deit/deit_base_distilled_patch16_384-d0272ac0.pth',
+        mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, input_size=(3, 384, 384), crop_pct=1.0,
+        classifier=('head', 'head_dist')),
+    # ViT ImageNet-21K-P pretraining by MILL
+    'vit_base_patch16_224_miil_in21k': _cfg(
+        url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/vit_base_patch16_224_in21k_miil.pth',
+        mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221,
+    ),
+    'vit_base_patch16_224_miil': _cfg(
+        url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm'
+            '/vit_base_patch16_224_1k_miil_84_4.pth',
+        mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear',
+    ),
+}
+class CrossAttention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5  #这行多了个qk_scale #0.125
+        self.wq = nn.Linear(dim, dim, bias=qkv_bias)
+        self.wk = nn.Linear(dim, dim, bias=qkv_bias)
+        self.wv = nn.Linear(dim, dim, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        B, N, C = x.shape #2 512 768
+        q = self.wq(x[:, 0:int(N/2), ...]).reshape(B, int(N/2), self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)#2 12 256 64
+        k = self.wk(x[:, (int(N/2)):, ...]).reshape(B, int(N/2), self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        v = self.wv(x[:, (int(N/2)):, ...]).reshape(B, int(N/2), self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
+        attn = (q @ k.transpose(-2, -1)) * self.scale
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, int(N/2), C) #变成了B/2 2 256 768
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, data):
+        b,c,h = data.shape
+        x,atten_mask = data[:,0:int(c/2),...],data[:,int(c/2):,...]
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  #2,12,49,64  # make torchscript happy (cannot use tensor as tuple)
+        attn = (q @ k.transpose(-2, -1)) * self.scale  #2,12,49,49 #mask 2,1,49,49
+        if atten_mask.sum() != 0:
+            atten_mask = atten_mask.unsqueeze(1)  # 2,1,49,49
+            attn = attn + atten_mask
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Attention_ori(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None,attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  #2,12,49,64  # make torchscript happy (cannot use tensor as tuple)
+        attn = (q @ k.transpose(-2, -1)) * self.scale  #2,12,49,49 #mask 2,1,49,49
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0.,
+                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, data):
+        b,c,h = data.shape
+        x,mask = data[:,0:int(c/2),...],data[:,int(c/2):,...]
+        x = x + self.drop_path(self.attn(torch.cat([self.norm1(x),mask],dim=1)))
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return torch.cat([x,mask],dim=1)
+class mask_PatchEmbed(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, norm_layer=None, flatten=True):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
+        self.flatten = flatten
+        self.num_patches = self.grid_size[0] * self.grid_size[1]
+        self.proj = nn.Conv2d(in_chans, 1, kernel_size=patch_size, stride=patch_size).requires_grad_(False)
+        nn.init.ones_(self.proj.weight)
+        nn.init.zeros_(self.proj.bias)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x)
+        if self.flatten:
+            x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC
+        return x
+class VisionTransformer(nn.Module):
+    """ Vision Transformer
+    A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`
+        - https://arxiv.org/abs/2010.11929
+    Includes distillation token & head support for `DeiT: Data-efficient Image Transformers`
+        - https://arxiv.org/abs/2012.12877
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=True, representation_size=None, distilled=False,
+                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0., embed_layer=PatchEmbed, norm_layer=None,
+                 act_layer=None,as_backbone=True, weight_init=''):
+        """
+        Args:
+            img_size (int, tuple): input image size
+            patch_size (int, tuple): patch size
+            in_chans (int): number of input channels
+            num_classes (int): number of classes for classification head
+            embed_dim (int): embedding dimension
+            depth (int): depth of transformer
+            num_heads (int): number of attention heads
+            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
+            qkv_bias (bool): enable bias for qkv if True
+            representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set
+            distilled (bool): model includes a distillation token and head as in DeiT models
+            drop_rate (float): dropout rate
+            attn_drop_rate (float): attention dropout rate
+            drop_path_rate (float): stochastic depth rate
+            embed_layer (nn.Module): patch embedding layer
+            norm_layer: (nn.Module): normalization layer
+            weight_init: (str): weight init scheme
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+        self.num_tokens = 2 if distilled else 1
+        self.num_heads = num_heads
+        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
+        act_layer = act_layer or nn.GELU
+        self.as_backbone = as_backbone #是否分类任务，如果不是，class不加上去
+        self.patch_embed = embed_layer(
+            img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        self.mask_embed = mask_PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans)
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.dist_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if distilled else None
+        if not self.as_backbone:
+            self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
+        else:
+            self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        self.blocks = nn.Sequential(*[
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop=drop_rate,
+                attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, act_layer=act_layer)
+            for i in range(depth)])
+        self.norm = norm_layer(embed_dim)
+        # Representation layer
+        if representation_size and not distilled:
+            self.num_features = representation_size
+            self.pre_logits = nn.Sequential(OrderedDict([
+                ('fc', nn.Linear(embed_dim, representation_size)),
+                ('act', nn.Tanh())
+            ]))
+        else:
+            self.pre_logits = nn.Identity()
+        if not self.as_backbone:
+            self.avgpool = nn.AdaptiveAvgPool1d(1)
+        # Classifier head(s)
+        self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
+        self.head_dist = None
+        if distilled:
+            self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
+        self.init_weights(weight_init)
+    def init_weights(self, mode=''):
+        assert mode in ('jax', 'jax_nlhb', 'nlhb', '')
+        head_bias = -math.log(self.num_classes) if 'nlhb' in mode else 0.
+        trunc_normal_(self.pos_embed, std=.02)
+        if self.dist_token is not None:
+            trunc_normal_(self.dist_token, std=.02)
+        if mode.startswith('jax'):
+            # leave cls token as zeros to match jax impl
+            named_apply(partial(_init_vit_weights, head_bias=head_bias, jax_impl=True), self)
+        else:
+            trunc_normal_(self.cls_token, std=.02)
+            self.apply(_init_vit_weights)
+    def _init_weights(self, m):
+        # this fn left here for compat with downstream users
+        _init_vit_weights(m)
+    @torch.jit.ignore()
+    def load_pretrained(self, checkpoint_path, prefix=''):
+        _load_weights(self, checkpoint_path, prefix)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token', 'dist_token'}
+    def get_classifier(self):
+        if self.dist_token is None:
+            return self.head
+        else:
+            return self.head, self.head_dist
+    def reset_classifier(self, num_classes, global_pool=''):
+        self.num_classes = num_classes
+        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        if self.num_tokens == 2:
+            self.head_dist = nn.Linear(self.embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
+    def forward_features(self, data):
+        x,mask = data[:,0,:,:].unsqueeze(1),data[:,1,:,:].unsqueeze(1)
+        x = self.patch_embed(x)#B N C
+        atten_mask = torch.zeros_like(x)  # 2 49 768
+        if mask.sum() != 0:
+            mask = self.mask_embed(mask)  ###
+            mask.squeeze_(dim=2)
+            mask[mask != 0] = 1  ### H W数目token C编码长度
+            k1 = mask[:, None, :]
+            k2 = torch.ones_like(mask)[:, :, None]
+            k3 = k1 * k2
+            atten_mask = (1.0 - k3) * (-1e6)
+            atten_mask.requires_grad_(True)
+        self.atten_mask = atten_mask
+        cls_token = self.cls_token.expand(x.shape[0], -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        if not self.as_backbone:
+            if self.dist_token is None:
+                x = torch.cat((cls_token, x), dim=1)
+            else:
+                x = torch.cat((cls_token, self.dist_token.expand(x.shape[0], -1, -1), x), dim=1)
+        x = self.pos_drop(x + self.pos_embed) #2 49 768
+        x = self.blocks(torch.cat([x,atten_mask],dim=1))
+        b,c,h = x.shape
+        x = x[:,0:int(c/2),...]
+        x = self.norm(x)
+        if self.as_backbone:
+            # x = self.avgpool(x.transpose(1, 2))  # B C 1
+            # x = torch.flatten(x, 1)
+            return x
+        if self.dist_token is None:
+            return self.pre_logits(x[:, 0])
+        else:
+            return x[:, 0], x[:, 1]
+    def forward(self, data):
+        x = self.forward_features(data) #2 49 768
+        if self.as_backbone:
+            return x
+        else:
+            if self.head_dist is not None:
+                x, x_dist = self.head(x[0]), self.head_dist(x[1])  # x must be a tuple
+                if self.training and not torch.jit.is_scripting():
+                    # during inference, return the average of both classifier predictions
+                    return x, x_dist
+                else:
+                    return (x + x_dist) / 2
+            else:
+                x = self.head(x)
+            return x
+def _init_vit_weights(module: nn.Module, name: str = '', head_bias: float = 0., jax_impl: bool = False):
+    """ ViT weight initialization
+    * When called without n, head_bias, jax_impl args it will behave exactly the same
+      as my original init for compatibility with prev hparam / downstream use cases (ie DeiT).
+    * When called w/ valid n (module name) and jax_impl=True, will (hopefully) match JAX impl
+    """
+    if isinstance(module, nn.Linear):
+        if name.startswith('head'):
+            nn.init.zeros_(module.weight)
+            nn.init.constant_(module.bias, head_bias)
+        elif name.startswith('pre_logits'):
+            lecun_normal_(module.weight)
+            nn.init.zeros_(module.bias)
+        else:
+            if jax_impl:
+                nn.init.xavier_uniform_(module.weight)
+                if module.bias is not None:
+                    if 'mlp' in name:
+                        nn.init.normal_(module.bias, std=1e-6)
+                    else:
+                        nn.init.zeros_(module.bias)
+            else:
+                trunc_normal_(module.weight, std=.02)
+                if module.bias is not None:
+                    nn.init.zeros_(module.bias)
+    elif jax_impl and isinstance(module, nn.Conv2d):
+        # NOTE conv was left to pytorch default in my original init
+        lecun_normal_(module.weight)
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+    elif isinstance(module, (nn.LayerNorm, nn.GroupNorm, nn.BatchNorm2d)):
+        nn.init.zeros_(module.bias)
+        nn.init.ones_(module.weight)
+@torch.no_grad()
+def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix: str = ''):
+    """ Load weights from .npz checkpoints for official Google Brain Flax implementation
+    """
+    import numpy as np
+    def _n2p(w, t=True):
+        if w.ndim == 4 and w.shape[0] == w.shape[1] == w.shape[2] == 1:
+            w = w.flatten()
+        if t:
+            if w.ndim == 4:
+                w = w.transpose([3, 2, 0, 1])
+            elif w.ndim == 3:
+                w = w.transpose([2, 0, 1])
+            elif w.ndim == 2:
+                w = w.transpose([1, 0])
+        return torch.from_numpy(w)
+    w = np.load(checkpoint_path)
+    if not prefix and 'opt/target/embedding/kernel' in w:
+        prefix = 'opt/target/'
+    if hasattr(model.patch_embed, 'backbone'):
+        # hybrid
+        backbone = model.patch_embed.backbone
+        stem_only = not hasattr(backbone, 'stem')
+        stem = backbone if stem_only else backbone.stem
+        stem.conv.weight.copy_(adapt_input_conv(stem.conv.weight.shape[1], _n2p(w[f'{prefix}conv_root/kernel'])))
+        stem.norm.weight.copy_(_n2p(w[f'{prefix}gn_root/scale']))
+        stem.norm.bias.copy_(_n2p(w[f'{prefix}gn_root/bias']))
+        if not stem_only:
+            for i, stage in enumerate(backbone.stages):
+                for j, block in enumerate(stage.blocks):
+                    bp = f'{prefix}block{i + 1}/unit{j + 1}/'
+                    for r in range(3):
+                        getattr(block, f'conv{r + 1}').weight.copy_(_n2p(w[f'{bp}conv{r + 1}/kernel']))
+                        getattr(block, f'norm{r + 1}').weight.copy_(_n2p(w[f'{bp}gn{r + 1}/scale']))
+                        getattr(block, f'norm{r + 1}').bias.copy_(_n2p(w[f'{bp}gn{r + 1}/bias']))
+                    if block.downsample is not None:
+                        block.downsample.conv.weight.copy_(_n2p(w[f'{bp}conv_proj/kernel']))
+                        block.downsample.norm.weight.copy_(_n2p(w[f'{bp}gn_proj/scale']))
+                        block.downsample.norm.bias.copy_(_n2p(w[f'{bp}gn_proj/bias']))
+        embed_conv_w = _n2p(w[f'{prefix}embedding/kernel'])
+    else:
+        embed_conv_w = adapt_input_conv(
+            model.patch_embed.proj.weight.shape[1], _n2p(w[f'{prefix}embedding/kernel']))
+    model.patch_embed.proj.weight.copy_(embed_conv_w)
+    model.patch_embed.proj.bias.copy_(_n2p(w[f'{prefix}embedding/bias']))
+    model.cls_token.copy_(_n2p(w[f'{prefix}cls'], t=False))
+    pos_embed_w = _n2p(w[f'{prefix}Transformer/posembed_input/pos_embedding'], t=False)
+    if pos_embed_w.shape != model.pos_embed.shape:
+        pos_embed_w = resize_pos_embed(  # resize pos embedding when different size from pretrained weights
+            pos_embed_w, model.pos_embed, getattr(model, 'num_tokens', 1), model.patch_embed.grid_size)
+    model.pos_embed.copy_(pos_embed_w)
+    model.norm.weight.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/scale']))
+    model.norm.bias.copy_(_n2p(w[f'{prefix}Transformer/encoder_norm/bias']))
+    if isinstance(model.head, nn.Linear) and model.head.bias.shape[0] == w[f'{prefix}head/bias'].shape[-1]:
+        model.head.weight.copy_(_n2p(w[f'{prefix}head/kernel']))
+        model.head.bias.copy_(_n2p(w[f'{prefix}head/bias']))
+    if isinstance(getattr(model.pre_logits, 'fc', None), nn.Linear) and f'{prefix}pre_logits/bias' in w:
+        model.pre_logits.fc.weight.copy_(_n2p(w[f'{prefix}pre_logits/kernel']))
+        model.pre_logits.fc.bias.copy_(_n2p(w[f'{prefix}pre_logits/bias']))
+    for i, block in enumerate(model.blocks.children()):
+        block_prefix = f'{prefix}Transformer/encoderblock_{i}/'
+        mha_prefix = block_prefix + 'MultiHeadDotProductAttention_1/'
+        block.norm1.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/scale']))
+        block.norm1.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_0/bias']))
+        block.attn.qkv.weight.copy_(torch.cat([
+            _n2p(w[f'{mha_prefix}{n}/kernel'], t=False).flatten(1).T for n in ('query', 'key', 'value')]))
+        block.attn.qkv.bias.copy_(torch.cat([
+            _n2p(w[f'{mha_prefix}{n}/bias'], t=False).reshape(-1) for n in ('query', 'key', 'value')]))
+        block.attn.proj.weight.copy_(_n2p(w[f'{mha_prefix}out/kernel']).flatten(1))
+        block.attn.proj.bias.copy_(_n2p(w[f'{mha_prefix}out/bias']))
+        for r in range(2):
+            getattr(block.mlp, f'fc{r + 1}').weight.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_{r}/kernel']))
+            getattr(block.mlp, f'fc{r + 1}').bias.copy_(_n2p(w[f'{block_prefix}MlpBlock_3/Dense_{r}/bias']))
+        block.norm2.weight.copy_(_n2p(w[f'{block_prefix}LayerNorm_2/scale']))
+        block.norm2.bias.copy_(_n2p(w[f'{block_prefix}LayerNorm_2/bias']))
+def resize_pos_embed(posemb, posemb_new, num_tokens=1, gs_new=()):
+    # Rescale the grid of position embeddings when loading from state_dict. Adapted from
+    # https://github.com/google-research/vision_transformer/blob/00883dd691c63a6830751563748663526e811cee/vit_jax/checkpoint.py#L224
+    _logger.info('Resized position embedding: %s to %s', posemb.shape, posemb_new.shape)
+    ntok_new = posemb_new.shape[1]
+    if num_tokens:
+        posemb_tok, posemb_grid = posemb[:, :num_tokens], posemb[0, num_tokens:]
+        ntok_new -= num_tokens
+    else:
+        posemb_tok, posemb_grid = posemb[:, :0], posemb[0]
+    gs_old = int(math.sqrt(len(posemb_grid)))
+    if not len(gs_new):  # backwards compatibility
+        gs_new = [int(math.sqrt(ntok_new))] * 2
+    assert len(gs_new) >= 2
+    _logger.info('Position embedding grid-size from %s to %s', [gs_old, gs_old], gs_new)
+    posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
+    posemb_grid = F.interpolate(posemb_grid, size=gs_new, mode='bilinear')
+    posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_new[0] * gs_new[1], -1)
+    posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
+    return posemb
+def checkpoint_filter_fn(state_dict, model):
+    """ convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    if 'model' in state_dict:
+        # For deit models
+        state_dict = state_dict['model']
+    for k, v in state_dict.items():
+        if 'patch_embed.proj.weight' in k and len(v.shape) < 4:
+            # For old models that I trained prior to conv based patchification
+            O, I, H, W = model.patch_embed.proj.weight.shape
+            v = v.reshape(O, -1, H, W)
+        elif k == 'pos_embed' and v.shape != model.pos_embed.shape:
+            # To resize pos embedding when using model at different size from pretrained weights
+            v = resize_pos_embed(
+                v, model.pos_embed, getattr(model, 'num_tokens', 1), model.patch_embed.grid_size)
+        out_dict[k] = v
+    return out_dict
+def _create_vision_transformer(variant, pretrained=False, default_cfg=None, **kwargs):
+    default_cfg = default_cfg or default_cfgs[variant]
+    if kwargs.get('features_only', None):
+        raise RuntimeError('features_only not implemented for Vision Transformer models.')
+    # NOTE this extra code to support handling of repr size for in21k pretrained models
+    default_num_classes = default_cfg['num_classes']
+    num_classes = kwargs.get('num_classes', default_num_classes)
+    repr_size = kwargs.pop('representation_size', None)
+    if repr_size is not None and num_classes != default_num_classes:
+        # Remove representation layer if fine-tuning. This may not always be the desired action,
+        # but I feel better than doing nothing by default for fine-tuning. Perhaps a better interface?
+        _logger.warning("Removing representation layer for fine-tuning.")
+        repr_size = None
+    model = build_model_with_cfg(
+        VisionTransformer, variant, pretrained,
+        default_cfg=default_cfg,
+        representation_size=repr_size,
+        pretrained_filter_fn=checkpoint_filter_fn,
+        pretrained_custom_load='npz' in default_cfg['url'],
+        **kwargs)
+    return model
+@register_model
+def vit_tiny_patch16_224(pretrained=False, **kwargs):
+    """ ViT-Tiny (Vit-Ti/16)
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs)
+    model = _create_vision_transformer('vit_tiny_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_tiny_patch16_384(pretrained=False, **kwargs):
+    """ ViT-Tiny (Vit-Ti/16) @ 384x384.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs)
+    model = _create_vision_transformer('vit_tiny_patch16_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_small_patch32_224(pretrained=False, **kwargs):
+    """ ViT-Small (ViT-S/32)
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('vit_small_patch32_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_small_patch32_384(pretrained=False, **kwargs):
+    """ ViT-Small (ViT-S/32) at 384x384.
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('vit_small_patch32_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_small_patch16_224(pretrained=False, **kwargs):
+    """ ViT-Small (ViT-S/16)
+    NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('vit_small_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_small_patch16_384(pretrained=False, **kwargs):
+    """ ViT-Small (ViT-S/16)
+    NOTE I've replaced my previous 'small' model definition and weights with the small variant from the DeiT paper
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('vit_small_patch16_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch32_224(pretrained=False, **kwargs):
+    """ ViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('vit_base_patch32_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch32_384(pretrained=False, **kwargs):
+    """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('vit_base_patch32_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('vit_base_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch16_384(pretrained=False, **kwargs):
+    """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('vit_base_patch16_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_large_patch32_224(pretrained=False, **kwargs):
+    """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs)
+    model = _create_vision_transformer('vit_large_patch32_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_large_patch32_384(pretrained=False, **kwargs):
+    """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=1024, depth=24, num_heads=16, **kwargs)
+    model = _create_vision_transformer('vit_large_patch32_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs)
+    model = _create_vision_transformer('vit_large_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_large_patch16_384(pretrained=False, **kwargs):
+    """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs)
+    model = _create_vision_transformer('vit_large_patch16_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_tiny_patch16_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Tiny (Vit-Ti/16).
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs)
+    model = _create_vision_transformer('vit_tiny_patch16_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_small_patch32_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Small (ViT-S/16)
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
+    """
+    model_kwargs = dict(patch_size=32, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('vit_small_patch32_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_small_patch16_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Small (ViT-S/16)
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('vit_small_patch16_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch32_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
+    """
+    model_kwargs = dict(
+        patch_size=32, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('vit_base_patch32_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch16_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
+    """
+    model_kwargs = dict(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('vit_base_patch16_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_large_patch32_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has a representation layer but the 21k classifier head is zero'd out in original weights
+    """
+    model_kwargs = dict(
+        patch_size=32, embed_dim=1024, depth=24, num_heads=16, representation_size=1024, **kwargs)
+    model = _create_vision_transformer('vit_large_patch32_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_large_patch16_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has valid 21k classifier head and no representation (pre-logits) layer
+    """
+    model_kwargs = dict(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, **kwargs)
+    model = _create_vision_transformer('vit_large_patch16_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_huge_patch14_224_in21k(pretrained=False, **kwargs):
+    """ ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
+    ImageNet-21k weights @ 224x224, source https://github.com/google-research/vision_transformer.
+    NOTE: this model has a representation layer but the 21k classifier head is zero'd out in original weights
+    """
+    model_kwargs = dict(
+        patch_size=14, embed_dim=1280, depth=32, num_heads=16, representation_size=1280, **kwargs)
+    model = _create_vision_transformer('vit_huge_patch14_224_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def deit_tiny_patch16_224(pretrained=False, **kwargs):
+    """ DeiT-tiny model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs)
+    model = _create_vision_transformer('deit_tiny_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def deit_small_patch16_224(pretrained=False, **kwargs):
+    """ DeiT-small model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer('deit_small_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def deit_base_patch16_224(pretrained=False, **kwargs):
+    """ DeiT base model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('deit_base_patch16_224', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def deit_base_patch16_384(pretrained=False, **kwargs):
+    """ DeiT base model @ 384x384 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer('deit_base_patch16_384', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def deit_tiny_distilled_patch16_224(pretrained=False, **kwargs):
+    """ DeiT-tiny distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=192, depth=12, num_heads=3, **kwargs)
+    model = _create_vision_transformer(
+        'deit_tiny_distilled_patch16_224', pretrained=pretrained,  distilled=True, **model_kwargs)
+    return model
+@register_model
+def deit_small_distilled_patch16_224(pretrained=False, **kwargs):
+    """ DeiT-small distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=384, depth=12, num_heads=6, **kwargs)
+    model = _create_vision_transformer(
+        'deit_small_distilled_patch16_224', pretrained=pretrained,  distilled=True, **model_kwargs)
+    return model
+@register_model
+def deit_base_distilled_patch16_224(pretrained=False, **kwargs):
+    """ DeiT-base distilled model @ 224x224 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer(
+        'deit_base_distilled_patch16_224', pretrained=pretrained,  distilled=True, **model_kwargs)
+    return model
+@register_model
+def deit_base_distilled_patch16_384(pretrained=False, **kwargs):
+    """ DeiT-base distilled model @ 384x384 from paper (https://arxiv.org/abs/2012.12877).
+    ImageNet-1k weights from https://github.com/facebookresearch/deit.
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, **kwargs)
+    model = _create_vision_transformer(
+        'deit_base_distilled_patch16_384', pretrained=pretrained, distilled=True, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch16_224_miil_in21k(pretrained=False, **kwargs):
+    """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
+    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=False, **kwargs)
+    model = _create_vision_transformer('vit_base_patch16_224_miil_in21k', pretrained=pretrained, **model_kwargs)
+    return model
+@register_model
+def vit_base_patch16_224_miil(pretrained=False, **kwargs):
+    """ ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
+    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
+    """
+    model_kwargs = dict(patch_size=16, embed_dim=768, depth=12, num_heads=12, qkv_bias=False, **kwargs)
+    model = _create_vision_transformer('vit_base_patch16_224_miil', pretrained=pretrained, **model_kwargs)
+    return model

model/__pycache__/CoordAttention.cpython-38.pyc ADDED Viewed

Binary file (3.58 kB). View file

model/__pycache__/Vision_Transformer_with_mask.cpython-38.pyc ADDED Viewed

Binary file (37.2 kB). View file

model/__pycache__/features.cpython-38.pyc ADDED Viewed

Binary file (12.4 kB). View file

model/__pycache__/helpers.cpython-38.pyc ADDED Viewed

Binary file (14.8 kB). View file

model/__pycache__/hub.cpython-38.pyc ADDED Viewed

Binary file (3.45 kB). View file

model/__pycache__/registry.cpython-38.pyc ADDED Viewed

Binary file (4.77 kB). View file

model/features.py ADDED Viewed

	@@ -0,0 +1,284 @@

+""" PyTorch Feature Extraction Helpers
+A collection of classes, functions, modules to help extract features from models
+and provide a common interface for describing them.
+The return_layers, module re-writing idea inspired by torchvision IntermediateLayerGetter
+https://github.com/pytorch/vision/blob/d88d8961ae51507d0cb680329d985b1488b1b76b/torchvision/models/_utils.py
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+from collections import OrderedDict, defaultdict
+from copy import deepcopy
+from functools import partial
+from typing import Dict, List, Tuple
+import torch
+import torch.nn as nn
+class FeatureInfo:
+    def __init__(self, feature_info: List[Dict], out_indices: Tuple[int]):
+        prev_reduction = 1
+        for fi in feature_info:
+            # sanity check the mandatory fields, there may be additional fields depending on the model
+            assert 'num_chs' in fi and fi['num_chs'] > 0
+            assert 'reduction' in fi and fi['reduction'] >= prev_reduction
+            prev_reduction = fi['reduction']
+            assert 'module' in fi
+        self.out_indices = out_indices
+        self.info = feature_info
+    def from_other(self, out_indices: Tuple[int]):
+        return FeatureInfo(deepcopy(self.info), out_indices)
+    def get(self, key, idx=None):
+        """ Get value by key at specified index (indices)
+        if idx == None, returns value for key at each output index
+        if idx is an integer, return value for that feature module index (ignoring output indices)
+        if idx is a list/tupple, return value for each module index (ignoring output indices)
+        """
+        if idx is None:
+            return [self.info[i][key] for i in self.out_indices]
+        if isinstance(idx, (tuple, list)):
+            return [self.info[i][key] for i in idx]
+        else:
+            return self.info[idx][key]
+    def get_dicts(self, keys=None, idx=None):
+        """ return info dicts for specified keys (or all if None) at specified indices (or out_indices if None)
+        """
+        if idx is None:
+            if keys is None:
+                return [self.info[i] for i in self.out_indices]
+            else:
+                return [{k: self.info[i][k] for k in keys} for i in self.out_indices]
+        if isinstance(idx, (tuple, list)):
+            return [self.info[i] if keys is None else {k: self.info[i][k] for k in keys} for i in idx]
+        else:
+            return self.info[idx] if keys is None else {k: self.info[idx][k] for k in keys}
+    def channels(self, idx=None):
+        """ feature channels accessor
+        """
+        return self.get('num_chs', idx)
+    def reduction(self, idx=None):
+        """ feature reduction (output stride) accessor
+        """
+        return self.get('reduction', idx)
+    def module_name(self, idx=None):
+        """ feature module name accessor
+        """
+        return self.get('module', idx)
+    def __getitem__(self, item):
+        return self.info[item]
+    def __len__(self):
+        return len(self.info)
+class FeatureHooks:
+    """ Feature Hook Helper
+    This module helps with the setup and extraction of hooks for extracting features from
+    internal nodes in a model by node name. This works quite well in eager Python but needs
+    redesign for torcscript.
+    """
+    def __init__(self, hooks, named_modules, out_map=None, default_hook_type='forward'):
+        # setup feature hooks
+        modules = {k: v for k, v in named_modules}
+        for i, h in enumerate(hooks):
+            hook_name = h['module']
+            m = modules[hook_name]
+            hook_id = out_map[i] if out_map else hook_name
+            hook_fn = partial(self._collect_output_hook, hook_id)
+            hook_type = h['hook_type'] if 'hook_type' in h else default_hook_type
+            if hook_type == 'forward_pre':
+                m.register_forward_pre_hook(hook_fn)
+            elif hook_type == 'forward':
+                m.register_forward_hook(hook_fn)
+            else:
+                assert False, "Unsupported hook type"
+        self._feature_outputs = defaultdict(OrderedDict)
+    def _collect_output_hook(self, hook_id, *args):
+        x = args[-1]  # tensor we want is last argument, output for fwd, input for fwd_pre
+        if isinstance(x, tuple):
+            x = x[0]  # unwrap input tuple
+        self._feature_outputs[x.device][hook_id] = x
+    def get_output(self, device) -> Dict[str, torch.tensor]:
+        output = self._feature_outputs[device]
+        self._feature_outputs[device] = OrderedDict()  # clear after reading
+        return output
+def _module_list(module, flatten_sequential=False):
+    # a yield/iter would be better for this but wouldn't be compatible with torchscript
+    ml = []
+    for name, module in module.named_children():
+        if flatten_sequential and isinstance(module, nn.Sequential):
+            # first level of Sequential containers is flattened into containing model
+            for child_name, child_module in module.named_children():
+                combined = [name, child_name]
+                ml.append(('_'.join(combined), '.'.join(combined), child_module))
+        else:
+            ml.append((name, name, module))
+    return ml
+def _get_feature_info(net, out_indices):
+    feature_info = getattr(net, 'feature_info')
+    if isinstance(feature_info, FeatureInfo):
+        return feature_info.from_other(out_indices)
+    elif isinstance(feature_info, (list, tuple)):
+        return FeatureInfo(net.feature_info, out_indices)
+    else:
+        assert False, "Provided feature_info is not valid"
+def _get_return_layers(feature_info, out_map):
+    module_names = feature_info.module_name()
+    return_layers = {}
+    for i, name in enumerate(module_names):
+        return_layers[name] = out_map[i] if out_map is not None else feature_info.out_indices[i]
+    return return_layers
+class FeatureDictNet(nn.ModuleDict):
+    """ Feature extractor with OrderedDict return
+    Wrap a model and extract features as specified by the out indices, the network is
+    partially re-built from contained modules.
+    There is a strong assumption that the modules have been registered into the model in the same
+    order as they are used. There should be no reuse of the same nn.Module more than once, including
+    trivial modules like `self.relu = nn.ReLU`.
+    Only submodules that are directly assigned to the model class (`model.feature1`) or at most
+    one Sequential container deep (`model.features.1`, with flatten_sequent=True) can be captured.
+    All Sequential containers that are directly assigned to the original model will have their
+    modules assigned to this module with the name `model.features.1` being changed to `model.features_1`
+    Arguments:
+        model (nn.Module): model from which we will extract the features
+        out_indices (tuple[int]): model output indices to extract features for
+        out_map (sequence): list or tuple specifying desired return id for each out index,
+            otherwise str(index) is used
+        feature_concat (bool): whether to concatenate intermediate features that are lists or tuples
+            vs select element [0]
+        flatten_sequential (bool): whether to flatten sequential modules assigned to model
+    """
+    def __init__(
+            self, model,
+            out_indices=(0, 1, 2, 3, 4), out_map=None, feature_concat=False, flatten_sequential=False):
+        super(FeatureDictNet, self).__init__()
+        self.feature_info = _get_feature_info(model, out_indices)
+        self.concat = feature_concat
+        self.return_layers = {}
+        return_layers = _get_return_layers(self.feature_info, out_map)
+        modules = _module_list(model, flatten_sequential=flatten_sequential)
+        remaining = set(return_layers.keys())
+        layers = OrderedDict()
+        for new_name, old_name, module in modules:
+            layers[new_name] = module
+            if old_name in remaining:
+                # return id has to be consistently str type for torchscript
+                self.return_layers[new_name] = str(return_layers[old_name])
+                remaining.remove(old_name)
+            if not remaining:
+                break
+        assert not remaining and len(self.return_layers) == len(return_layers), \
+            f'Return layers ({remaining}) are not present in model'
+        self.update(layers)
+    def _collect(self, x) -> (Dict[str, torch.Tensor]):
+        out = OrderedDict()
+        for name, module in self.items():
+            x = module(x)
+            if name in self.return_layers:
+                out_id = self.return_layers[name]
+                if isinstance(x, (tuple, list)):
+                    # If model tap is a tuple or list, concat or select first element
+                    # FIXME this may need to be more generic / flexible for some nets
+                    out[out_id] = torch.cat(x, 1) if self.concat else x[0]
+                else:
+                    out[out_id] = x
+        return out
+    def forward(self, x) -> Dict[str, torch.Tensor]:
+        return self._collect(x)
+class FeatureListNet(FeatureDictNet):
+    """ Feature extractor with list return
+    See docstring for FeatureDictNet above, this class exists only to appease Torchscript typing constraints.
+    In eager Python we could have returned List[Tensor] vs Dict[id, Tensor] based on a member bool.
+    """
+    def __init__(
+            self, model,
+            out_indices=(0, 1, 2, 3, 4), out_map=None, feature_concat=False, flatten_sequential=False):
+        super(FeatureListNet, self).__init__(
+            model, out_indices=out_indices, out_map=out_map, feature_concat=feature_concat,
+            flatten_sequential=flatten_sequential)
+    def forward(self, x) -> (List[torch.Tensor]):
+        return list(self._collect(x).values())
+class FeatureHookNet(nn.ModuleDict):
+    """ FeatureHookNet
+    Wrap a model and extract features specified by the out indices using forward/forward-pre hooks.
+    If `no_rewrite` is True, features are extracted via hooks without modifying the underlying
+    network in any way.
+    If `no_rewrite` is False, the model will be re-written as in the
+    FeatureList/FeatureDict case by folding first to second (Sequential only) level modules into this one.
+    FIXME this does not currently work with Torchscript, see FeatureHooks class
+    """
+    def __init__(
+            self, model,
+            out_indices=(0, 1, 2, 3, 4), out_map=None, out_as_dict=False, no_rewrite=False,
+            feature_concat=False, flatten_sequential=False, default_hook_type='forward'):
+        super(FeatureHookNet, self).__init__()
+        assert not torch.jit.is_scripting()
+        self.feature_info = _get_feature_info(model, out_indices)
+        self.out_as_dict = out_as_dict
+        layers = OrderedDict()
+        hooks = []
+        if no_rewrite:
+            assert not flatten_sequential
+            if hasattr(model, 'reset_classifier'):  # make sure classifier is removed?
+                model.reset_classifier(0)
+            layers['body'] = model
+            hooks.extend(self.feature_info.get_dicts())
+        else:
+            modules = _module_list(model, flatten_sequential=flatten_sequential)
+            remaining = {f['module']: f['hook_type'] if 'hook_type' in f else default_hook_type
+                         for f in self.feature_info.get_dicts()}
+            for new_name, old_name, module in modules:
+                layers[new_name] = module
+                for fn, fm in module.named_modules(prefix=old_name):
+                    if fn in remaining:
+                        hooks.append(dict(module=fn, hook_type=remaining[fn]))
+                        del remaining[fn]
+                if not remaining:
+                    break
+            assert not remaining, f'Return layers ({remaining}) are not present in model'
+        self.update(layers)
+        self.hooks = FeatureHooks(hooks, model.named_modules(), out_map=out_map)
+    def forward(self, x):
+        for name, module in self.items():
+            x = module(x)
+        out = self.hooks.get_output(x.device)
+        return out if self.out_as_dict else list(out.values())

model/helpers.py ADDED Viewed

	@@ -0,0 +1,508 @@

+""" Model creation / weight loading / state_dict helpers
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import logging
+import os
+import math
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Any, Callable, Optional, Tuple
+import torch
+import torch.nn as nn
+from .features import FeatureListNet, FeatureDictNet, FeatureHookNet
+from .hub import has_hf_hub, download_cached_file, load_state_dict_from_hf, load_state_dict_from_url
+from .layers import Conv2dSame, Linear
+_logger = logging.getLogger(__name__)
+def load_state_dict(checkpoint_path, use_ema=False):
+    if checkpoint_path and os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        state_dict_key = 'state_dict'
+        if isinstance(checkpoint, dict):
+            if use_ema and 'state_dict_ema' in checkpoint:
+                state_dict_key = 'state_dict_ema'
+        if state_dict_key and state_dict_key in checkpoint:
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint[state_dict_key].items():
+                # strip `module.` prefix
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            state_dict = new_state_dict
+        else:
+            state_dict = checkpoint
+        _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
+        return state_dict
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_checkpoint(model, checkpoint_path, use_ema=False, strict=True):
+    if os.path.splitext(checkpoint_path)[-1].lower() in ('.npz', '.npy'):
+        # numpy checkpoint, try to load via model specific load_pretrained fn
+        if hasattr(model, 'load_pretrained'):
+            model.load_pretrained(checkpoint_path)
+        else:
+            raise NotImplementedError('Model cannot load numpy checkpoint')
+        return
+    state_dict = load_state_dict(checkpoint_path, use_ema)
+    model.load_state_dict(state_dict, strict=strict)
+def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True):
+    resume_epoch = None
+    if os.path.isfile(checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location='cpu')
+        if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+            if log_info:
+                _logger.info('Restoring model state from checkpoint...')
+            new_state_dict = OrderedDict()
+            for k, v in checkpoint['state_dict'].items():
+                name = k[7:] if k.startswith('module') else k
+                new_state_dict[name] = v
+            model.load_state_dict(new_state_dict)
+            if optimizer is not None and 'optimizer' in checkpoint:
+                if log_info:
+                    _logger.info('Restoring optimizer state from checkpoint...')
+                optimizer.load_state_dict(checkpoint['optimizer'])
+            if loss_scaler is not None and loss_scaler.state_dict_key in checkpoint:
+                if log_info:
+                    _logger.info('Restoring AMP loss scaler state from checkpoint...')
+                loss_scaler.load_state_dict(checkpoint[loss_scaler.state_dict_key])
+            if 'epoch' in checkpoint:
+                resume_epoch = checkpoint['epoch']
+                if 'version' in checkpoint and checkpoint['version'] > 1:
+                    resume_epoch += 1  # start at the next epoch, old checkpoints incremented before save
+            if log_info:
+                _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
+        else:
+            model.load_state_dict(checkpoint)
+            if log_info:
+                _logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
+        return resume_epoch
+    else:
+        _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
+        raise FileNotFoundError()
+def load_custom_pretrained(model, default_cfg=None, load_fn=None, progress=False, check_hash=False):
+    r"""Loads a custom (read non .pth) weight file
+    Downloads checkpoint file into cache-dir like torch.hub based loaders, but calls
+    a passed in custom load fun, or the `load_pretrained` model member fn.
+    If the object is already present in `model_dir`, it's deserialized and returned.
+    The default value of `model_dir` is ``<hub_dir>/checkpoints`` where
+    `hub_dir` is the directory returned by :func:`~torch.hub.get_dir`.
+    Args:
+        model: The instantiated model to load weights into
+        default_cfg (dict): Default pretrained model cfg
+        load_fn: An external stand alone fn that loads weights into provided model, otherwise a fn named
+            'laod_pretrained' on the model will be called if it exists
+        progress (bool, optional): whether or not to display a progress bar to stderr. Default: False
+        check_hash(bool, optional): If True, the filename part of the URL should follow the naming convention
+            ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
+            digits of the SHA256 hash of the contents of the file. The hash is used to
+            ensure unique names and to verify the contents of the file. Default: False
+    """
+    default_cfg = default_cfg or getattr(model, 'default_cfg', None) or {}
+    pretrained_url = default_cfg.get('url', None)
+    if not pretrained_url:
+        _logger.warning("No pretrained weights exist for this model. Using random initialization.")
+        return
+    cached_file = download_cached_file(default_cfg['url'], check_hash=check_hash, progress=progress)
+    if load_fn is not None:
+        load_fn(model, cached_file)
+    elif hasattr(model, 'load_pretrained'):
+        model.load_pretrained(cached_file)
+    else:
+        _logger.warning("Valid function to load pretrained weights is not available, using random initialization.")
+def adapt_input_conv(in_chans, conv_weight):
+    conv_type = conv_weight.dtype
+    conv_weight = conv_weight.float()  # Some weights are in torch.half, ensure it's float for sum on CPU
+    O, I, J, K = conv_weight.shape
+    if in_chans == 1:
+        if I > 3:
+            assert conv_weight.shape[1] % 3 == 0
+            # For models with space2depth stems
+            conv_weight = conv_weight.reshape(O, I // 3, 3, J, K)
+            conv_weight = conv_weight.sum(dim=2, keepdim=False)
+        else:
+            conv_weight = conv_weight.sum(dim=1, keepdim=True)
+    elif in_chans != 3:
+        if I != 3:
+            raise NotImplementedError('Weight format not supported by conversion.')
+        else:
+            # NOTE this strategy should be better than random init, but there could be other combinations of
+            # the original RGB input layer weights that'd work better for specific cases.
+            repeat = int(math.ceil(in_chans / 3))
+            conv_weight = conv_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
+            conv_weight *= (3 / float(in_chans))
+    conv_weight = conv_weight.to(conv_type)
+    return conv_weight
+def load_pretrained(model, default_cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True, progress=False):
+    """ Load pretrained checkpoint
+    Args:
+        model (nn.Module) : PyTorch model module
+        default_cfg (Optional[Dict]): default configuration for pretrained weights / target dataset
+        num_classes (int): num_classes for model
+        in_chans (int): in_chans for model
+        filter_fn (Optional[Callable]): state_dict filter fn for load (takes state_dict, model as args)
+        strict (bool): strict load of checkpoint
+        progress (bool): enable progress bar for weight download
+    """
+    default_cfg = default_cfg or getattr(model, 'default_cfg', None) or {}
+    pretrained_url = default_cfg.get('url', None)
+    hf_hub_id = default_cfg.get('hf_hub', None)
+    if not pretrained_url and not hf_hub_id:
+        _logger.warning("No pretrained weights exist for this model. Using random initialization.")
+        return
+    if hf_hub_id and has_hf_hub(necessary=not pretrained_url):
+        _logger.info(f'Loading pretrained weights from Hugging Face hub ({hf_hub_id})')
+        state_dict = load_state_dict_from_hf(hf_hub_id)
+    else:
+        _logger.info(f'Loading pretrained weights from url ({pretrained_url})')
+        state_dict = load_state_dict_from_url(pretrained_url, progress=progress, map_location='cpu')
+    if filter_fn is not None:
+        # for backwards compat with filter fn that take one arg, try one first, the two
+        try:
+            state_dict = filter_fn(state_dict)
+        except TypeError:
+            state_dict = filter_fn(state_dict, model)
+    input_convs = default_cfg.get('first_conv', None)
+    if input_convs is not None and in_chans != 3:
+        if isinstance(input_convs, str):
+            input_convs = (input_convs,)
+        for input_conv_name in input_convs:
+            weight_name = input_conv_name + '.weight'
+            try:
+                state_dict[weight_name] = adapt_input_conv(in_chans, state_dict[weight_name])
+                _logger.info(
+                    f'Converted input conv {input_conv_name} pretrained weights from 3 to {in_chans} channel(s)')
+            except NotImplementedError as e:
+                del state_dict[weight_name]
+                strict = False
+                _logger.warning(
+                    f'Unable to convert pretrained {input_conv_name} weights, using random init for this layer.')
+    classifiers = default_cfg.get('classifier', None)
+    label_offset = default_cfg.get('label_offset', 0)
+    if classifiers is not None:
+        if isinstance(classifiers, str):
+            classifiers = (classifiers,)
+        if num_classes != default_cfg['num_classes']:
+            for classifier_name in classifiers:
+                # completely discard fully connected if model num_classes doesn't match pretrained weights
+                del state_dict[classifier_name + '.weight']
+                del state_dict[classifier_name + '.bias']
+            strict = False
+        elif label_offset > 0:
+            for classifier_name in classifiers:
+                # special case for pretrained weights with an extra background class in pretrained weights
+                classifier_weight = state_dict[classifier_name + '.weight']
+                state_dict[classifier_name + '.weight'] = classifier_weight[label_offset:]
+                classifier_bias = state_dict[classifier_name + '.bias']
+                state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:]
+    model.load_state_dict(state_dict, strict=strict)
+def extract_layer(model, layer):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    if not hasattr(model, 'module') and layer[0] == 'module':
+        layer = layer[1:]
+    for l in layer:
+        if hasattr(module, l):
+            if not l.isdigit():
+                module = getattr(module, l)
+            else:
+                module = module[int(l)]
+        else:
+            return module
+    return module
+def set_layer(model, layer, val):
+    layer = layer.split('.')
+    module = model
+    if hasattr(model, 'module') and layer[0] != 'module':
+        module = model.module
+    lst_index = 0
+    module2 = module
+    for l in layer:
+        if hasattr(module2, l):
+            if not l.isdigit():
+                module2 = getattr(module2, l)
+            else:
+                module2 = module2[int(l)]
+            lst_index += 1
+    lst_index -= 1
+    for l in layer[:lst_index]:
+        if not l.isdigit():
+            module = getattr(module, l)
+        else:
+            module = module[int(l)]
+    l = layer[lst_index]
+    setattr(module, l, val)
+def adapt_model_from_string(parent_module, model_string):
+    separator = '***'
+    state_dict = {}
+    lst_shape = model_string.split(separator)
+    for k in lst_shape:
+        k = k.split(':')
+        key = k[0]
+        shape = k[1][1:-1].split(',')
+        if shape[0] != '':
+            state_dict[key] = [int(i) for i in shape]
+    new_module = deepcopy(parent_module)
+    for n, m in parent_module.named_modules():
+        old_module = extract_layer(parent_module, n)
+        if isinstance(old_module, nn.Conv2d) or isinstance(old_module, Conv2dSame):
+            if isinstance(old_module, Conv2dSame):
+                conv = Conv2dSame
+            else:
+                conv = nn.Conv2d
+            s = state_dict[n + '.weight']
+            in_channels = s[1]
+            out_channels = s[0]
+            g = 1
+            if old_module.groups > 1:
+                in_channels = out_channels
+                g = in_channels
+            new_conv = conv(
+                in_channels=in_channels, out_channels=out_channels, kernel_size=old_module.kernel_size,
+                bias=old_module.bias is not None, padding=old_module.padding, dilation=old_module.dilation,
+                groups=g, stride=old_module.stride)
+            set_layer(new_module, n, new_conv)
+        if isinstance(old_module, nn.BatchNorm2d):
+            new_bn = nn.BatchNorm2d(
+                num_features=state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum,
+                affine=old_module.affine, track_running_stats=True)
+            set_layer(new_module, n, new_bn)
+        if isinstance(old_module, nn.Linear):
+            # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?
+            num_features = state_dict[n + '.weight'][1]
+            new_fc = Linear(
+                in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None)
+            set_layer(new_module, n, new_fc)
+            if hasattr(new_module, 'num_features'):
+                new_module.num_features = num_features
+    new_module.eval()
+    parent_module.eval()
+    return new_module
+def adapt_model_from_file(parent_module, model_variant):
+    adapt_file = os.path.join(os.path.dirname(__file__), 'pruned', model_variant + '.txt')
+    with open(adapt_file, 'r') as f:
+        return adapt_model_from_string(parent_module, f.read().strip())
+def default_cfg_for_features(default_cfg):
+    default_cfg = deepcopy(default_cfg)
+    # remove default pretrained cfg fields that don't have much relevance for feature backbone
+    to_remove = ('num_classes', 'crop_pct', 'classifier', 'global_pool')  # add default final pool size?
+    for tr in to_remove:
+        default_cfg.pop(tr, None)
+    return default_cfg
+def overlay_external_default_cfg(default_cfg, kwargs):
+    """ Overlay 'external_default_cfg' in kwargs on top of default_cfg arg.
+    """
+    external_default_cfg = kwargs.pop('external_default_cfg', None)
+    if external_default_cfg:
+        default_cfg.pop('url', None)  # url should come from external cfg
+        default_cfg.pop('hf_hub', None)  # hf hub id should come from external cfg
+        default_cfg.update(external_default_cfg)
+def set_default_kwargs(kwargs, names, default_cfg):
+    for n in names:
+        # for legacy reasons, model __init__args uses img_size + in_chans as separate args while
+        # default_cfg has one input_size=(C, H ,W) entry
+        if n == 'img_size':
+            input_size = default_cfg.get('input_size', None)
+            if input_size is not None:
+                assert len(input_size) == 3
+                kwargs.setdefault(n, input_size[-2:])
+        elif n == 'in_chans':
+            input_size = default_cfg.get('input_size', None)
+            if input_size is not None:
+                assert len(input_size) == 3
+                kwargs.setdefault(n, input_size[0])
+        else:
+            default_val = default_cfg.get(n, None)
+            if default_val is not None:
+                kwargs.setdefault(n, default_cfg[n])
+def filter_kwargs(kwargs, names):
+    if not kwargs or not names:
+        return
+    for n in names:
+        kwargs.pop(n, None)
+def update_default_cfg_and_kwargs(default_cfg, kwargs, kwargs_filter):
+    """ Update the default_cfg and kwargs before passing to model
+    FIXME this sequence of overlay default_cfg, set default kwargs, filter kwargs
+    could/should be replaced by an improved configuration mechanism
+    Args:
+        default_cfg: input default_cfg (updated in-place)
+        kwargs: keyword args passed to model build fn (updated in-place)
+        kwargs_filter: keyword arg keys that must be removed before model __init__
+    """
+    # Overlay default cfg values from `external_default_cfg` if it exists in kwargs
+    overlay_external_default_cfg(default_cfg, kwargs)
+    # Set model __init__ args that can be determined by default_cfg (if not already passed as kwargs)
+    default_kwarg_names = ('num_classes', 'global_pool', 'in_chans')
+    if default_cfg.get('fixed_input_size', False):
+        # if fixed_input_size exists and is True, model takes an img_size arg that fixes its input size
+        default_kwarg_names += ('img_size',)
+    set_default_kwargs(kwargs, names=default_kwarg_names, default_cfg=default_cfg)
+    # Filter keyword args for task specific model variants (some 'features only' models, etc.)
+    filter_kwargs(kwargs, names=kwargs_filter)
+def build_model_with_cfg(
+        model_cls: Callable,
+        variant: str,
+        pretrained: bool,
+        default_cfg: dict,
+        model_cfg: Optional[Any] = None,
+        feature_cfg: Optional[dict] = None,
+        pretrained_strict: bool = True,
+        pretrained_filter_fn: Optional[Callable] = None,
+        pretrained_custom_load: bool = False,
+        kwargs_filter: Optional[Tuple[str]] = None,
+        **kwargs):
+    """ Build model with specified default_cfg and optional model_cfg
+    This helper fn aids in the construction of a model including:
+      * handling default_cfg and associated pretained weight loading
+      * passing through optional model_cfg for models with config based arch spec
+      * features_only model adaptation
+      * pruning config / model adaptation
+    Args:
+        model_cls (nn.Module): model class
+        variant (str): model variant name
+        pretrained (bool): load pretrained weights
+        default_cfg (dict): model's default pretrained/task config
+        model_cfg (Optional[Dict]): model's architecture config
+        feature_cfg (Optional[Dict]: feature extraction adapter config
+        pretrained_strict (bool): load pretrained weights strictly
+        pretrained_filter_fn (Optional[Callable]): filter callable for pretrained weights
+        pretrained_custom_load (bool): use custom load fn, to load numpy or other non PyTorch weights
+        kwargs_filter (Optional[Tuple]): kwargs to filter before passing to model
+        **kwargs: model args passed through to model __init__
+    """
+    pruned = kwargs.pop('pruned', False)
+    features = False
+    feature_cfg = feature_cfg or {}
+    default_cfg = deepcopy(default_cfg) if default_cfg else {}
+    update_default_cfg_and_kwargs(default_cfg, kwargs, kwargs_filter)
+    default_cfg.setdefault('architecture', variant)
+    # Setup for feature extraction wrapper done at end of this fn
+    if kwargs.pop('features_only', False):
+        features = True
+        feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4))
+        if 'out_indices' in kwargs:
+            feature_cfg['out_indices'] = kwargs.pop('out_indices')
+    # Build the model
+    model = model_cls(**kwargs) if model_cfg is None else model_cls(cfg=model_cfg, **kwargs)
+    model.default_cfg = default_cfg
+    if pruned:
+        model = adapt_model_from_file(model, variant)
+    # For classification models, check class attr, then kwargs, then default to 1k, otherwise 0 for feats
+    num_classes_pretrained = 0 if features else getattr(model, 'num_classes', kwargs.get('num_classes', 1000))
+    if pretrained:
+        if pretrained_custom_load:
+            load_custom_pretrained(model)
+        else:
+            load_pretrained(
+                model,
+                num_classes=num_classes_pretrained,
+                in_chans=kwargs.get('in_chans', 3),
+                filter_fn=pretrained_filter_fn,
+                strict=pretrained_strict)
+    # Wrap the model in a feature extraction module if enabled
+    if features:
+        feature_cls = FeatureListNet
+        if 'feature_cls' in feature_cfg:
+            feature_cls = feature_cfg.pop('feature_cls')
+            if isinstance(feature_cls, str):
+                feature_cls = feature_cls.lower()
+                if 'hook' in feature_cls:
+                    feature_cls = FeatureHookNet
+                else:
+                    assert False, f'Unknown feature class {feature_cls}'
+        model = feature_cls(model, **feature_cfg)
+        model.default_cfg = default_cfg_for_features(default_cfg)  # add back default_cfg
+    return model
+def model_parameters(model, exclude_head=False):
+    if exclude_head:
+        # FIXME this a bit of a quick and dirty hack to skip classifier head params based on ordering
+        return [p for p in model.parameters()][:-2]
+    else:
+        return model.parameters()
+def named_apply(fn: Callable, module: nn.Module, name='', depth_first=True, include_root=False) -> nn.Module:
+    if not depth_first and include_root:
+        fn(module=module, name=name)
+    for child_name, child_module in module.named_children():
+        child_name = '.'.join((name, child_name)) if name else child_name
+        named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
+    if depth_first and include_root:
+        fn(module=module, name=name)
+    return module
+def named_modules(module: nn.Module, name='', depth_first=True, include_root=False):
+    if not depth_first and include_root:
+        yield name, module
+    for child_name, child_module in module.named_children():
+        child_name = '.'.join((name, child_name)) if name else child_name
+        yield from named_modules(
+            module=child_module, name=child_name, depth_first=depth_first, include_root=True)
+    if depth_first and include_root:
+        yield name, module

model/hub.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import json
+import logging
+import os
+from functools import partial
+from typing import Union, Optional
+import torch
+from torch.hub import load_state_dict_from_url, download_url_to_file, urlparse, HASH_REGEX
+try:
+    from torch.hub import get_dir
+except ImportError:
+    from torch.hub import _get_torch_home as get_dir
+from timm import __version__
+try:
+    from huggingface_hub import hf_hub_url
+    from huggingface_hub import cached_download
+    cached_download = partial(cached_download, library_name="timm", library_version=__version__)
+except ImportError:
+    hf_hub_url = None
+    cached_download = None
+_logger = logging.getLogger(__name__)
+def get_cache_dir(child_dir=''):
+    """
+    Returns the location of the directory where models are cached (and creates it if necessary).
+    """
+    # Issue warning to move data if old env is set
+    if os.getenv('TORCH_MODEL_ZOO'):
+        _logger.warning('TORCH_MODEL_ZOO is deprecated, please use env TORCH_HOME instead')
+    hub_dir = get_dir()
+    child_dir = () if not child_dir else (child_dir,)
+    model_dir = os.path.join(hub_dir, 'checkpoints', *child_dir)
+    os.makedirs(model_dir, exist_ok=True)
+    return model_dir
+def download_cached_file(url, check_hash=True, progress=False):
+    parts = urlparse(url)
+    filename = os.path.basename(parts.path)
+    cached_file = os.path.join(get_cache_dir(), filename)
+    if not os.path.exists(cached_file):
+        _logger.info('Downloading: "{}" to {}\n'.format(url, cached_file))
+        hash_prefix = None
+        if check_hash:
+            r = HASH_REGEX.search(filename)  # r is Optional[Match[str]]
+            hash_prefix = r.group(1) if r else None
+        download_url_to_file(url, cached_file, hash_prefix, progress=progress)
+    return cached_file
+def has_hf_hub(necessary=False):
+    if hf_hub_url is None and necessary:
+        # if no HF Hub module installed and it is necessary to continue, raise error
+        raise RuntimeError(
+            'Hugging Face hub model specified but package not installed. Run `pip install huggingface_hub`.')
+    return hf_hub_url is not None
+def hf_split(hf_id):
+    rev_split = hf_id.split('@')
+    assert 0 < len(rev_split) <= 2, 'hf_hub id should only contain one @ character to identify revision.'
+    hf_model_id = rev_split[0]
+    hf_revision = rev_split[-1] if len(rev_split) > 1 else None
+    return hf_model_id, hf_revision
+def load_cfg_from_json(json_file: Union[str, os.PathLike]):
+    with open(json_file, "r", encoding="utf-8") as reader:
+        text = reader.read()
+    return json.loads(text)
+def _download_from_hf(model_id: str, filename: str):
+    hf_model_id, hf_revision = hf_split(model_id)
+    url = hf_hub_url(hf_model_id, filename, revision=hf_revision)
+    return cached_download(url, cache_dir=get_cache_dir('hf'))
+def load_model_config_from_hf(model_id: str):
+    assert has_hf_hub(True)
+    cached_file = _download_from_hf(model_id, 'config.json')
+    default_cfg = load_cfg_from_json(cached_file)
+    default_cfg['hf_hub'] = model_id  # insert hf_hub id for pretrained weight load during model creation
+    model_name = default_cfg.get('architecture')
+    return default_cfg, model_name
+def load_state_dict_from_hf(model_id: str):
+    assert has_hf_hub(True)
+    cached_file = _download_from_hf(model_id, 'pytorch_model.bin')
+    state_dict = torch.load(cached_file, map_location='cpu')
+    return state_dict

model/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from .activations import *
+from .adaptive_avgmax_pool import \
+    adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d
+from .blur_pool import BlurPool2d
+from .classifier import ClassifierHead, create_classifier
+from .cond_conv2d import CondConv2d, get_condconv_initializer
+from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\
+    set_layer_config
+from .conv2d_same import Conv2dSame, conv2d_same
+from .conv_bn_act import ConvBnAct
+from .create_act import create_act_layer, get_act_layer, get_act_fn
+from .create_attn import get_attn, create_attn
+from .create_conv2d import create_conv2d
+from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act
+from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path
+from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn
+from .evo_norm import EvoNormBatch2d, EvoNormSample2d
+from .gather_excite import GatherExcite
+from .global_context import GlobalContext
+from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible
+from .inplace_abn import InplaceAbn
+from .involution import Involution
+from .linear import Linear
+from .mixed_conv2d import MixedConv2d
+from .mlp import Mlp, GluMlp, GatedMlp
+from .non_local_attn import NonLocalAttn, BatNonLocalAttn
+from .norm import GroupNorm, LayerNorm2d
+from .norm_act import BatchNormAct2d, GroupNormAct
+from .padding import get_padding, get_same_padding, pad_same
+from .patch_embed import PatchEmbed
+from .pool2d_same import AvgPool2dSame, create_pool2d
+from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite
+from .selective_kernel import SelectiveKernel
+from .separable_conv import SeparableConv2d, SeparableConvBnAct
+from .space_to_depth import SpaceToDepthModule
+from .split_attn import SplitAttn
+from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model
+from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame
+from .test_time_pool import TestTimePoolHead, apply_test_time_pool
+from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_

model/layers/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (3.1 kB). View file

model/layers/__pycache__/activations.cpython-38.pyc ADDED Viewed

Binary file (6.66 kB). View file

model/layers/__pycache__/activations_jit.cpython-38.pyc ADDED Viewed

Binary file (4.14 kB). View file

model/layers/__pycache__/activations_me.cpython-38.pyc ADDED Viewed

Binary file (8.92 kB). View file

model/layers/__pycache__/adaptive_avgmax_pool.cpython-38.pyc ADDED Viewed

Binary file (4.78 kB). View file

model/layers/__pycache__/blur_pool.cpython-38.pyc ADDED Viewed

Binary file (2.1 kB). View file

model/layers/__pycache__/bottleneck_attn.cpython-38.pyc ADDED Viewed

Binary file (4.76 kB). View file

model/layers/__pycache__/cbam.cpython-38.pyc ADDED Viewed

Binary file (5.17 kB). View file

model/layers/__pycache__/classifier.cpython-38.pyc ADDED Viewed

Binary file (2.24 kB). View file

model/layers/__pycache__/cond_conv2d.cpython-38.pyc ADDED Viewed

Binary file (3.84 kB). View file

model/layers/__pycache__/config.cpython-38.pyc ADDED Viewed

Binary file (3.44 kB). View file

model/layers/__pycache__/conv2d_same.cpython-38.pyc ADDED Viewed

Binary file (1.96 kB). View file

model/layers/__pycache__/conv_bn_act.cpython-38.pyc ADDED Viewed

Binary file (1.66 kB). View file

model/layers/__pycache__/create_act.cpython-38.pyc ADDED Viewed

Binary file (3.65 kB). View file

model/layers/__pycache__/create_attn.cpython-38.pyc ADDED Viewed

Binary file (2.09 kB). View file

model/layers/__pycache__/create_conv2d.cpython-38.pyc ADDED Viewed

Binary file (1.09 kB). View file

model/layers/__pycache__/create_norm_act.cpython-38.pyc ADDED Viewed

Binary file (2.33 kB). View file

model/layers/__pycache__/drop.cpython-38.pyc ADDED Viewed

Binary file (5.74 kB). View file

model/layers/__pycache__/eca.cpython-38.pyc ADDED Viewed

Binary file (6.15 kB). View file

model/layers/__pycache__/evo_norm.cpython-38.pyc ADDED Viewed

Binary file (3.39 kB). View file

model/layers/__pycache__/gather_excite.cpython-38.pyc ADDED Viewed

Binary file (3.11 kB). View file

model/layers/__pycache__/global_context.cpython-38.pyc ADDED Viewed

Binary file (2.43 kB). View file

model/layers/__pycache__/halo_attn.cpython-38.pyc ADDED Viewed

Binary file (5.59 kB). View file

model/layers/__pycache__/helpers.cpython-38.pyc ADDED Viewed

Binary file (1.03 kB). View file

model/layers/__pycache__/inplace_abn.cpython-38.pyc ADDED Viewed

Binary file (3.18 kB). View file

model/layers/__pycache__/involution.cpython-38.pyc ADDED Viewed

Binary file (1.83 kB). View file

model/layers/__pycache__/lambda_layer.cpython-38.pyc ADDED Viewed

Binary file (3.01 kB). View file

model/layers/__pycache__/linear.cpython-38.pyc ADDED Viewed

Binary file (1.08 kB). View file

model/layers/__pycache__/mixed_conv2d.cpython-38.pyc ADDED Viewed

Binary file (2.29 kB). View file

model/layers/__pycache__/mlp.cpython-38.pyc ADDED Viewed

Binary file (3.81 kB). View file

model/layers/__pycache__/non_local_attn.cpython-38.pyc ADDED Viewed

Binary file (5.64 kB). View file

model/layers/__pycache__/norm.cpython-38.pyc ADDED Viewed

Binary file (1.52 kB). View file

model/layers/__pycache__/norm_act.cpython-38.pyc ADDED Viewed

Binary file (3.07 kB). View file

model/layers/__pycache__/padding.cpython-38.pyc ADDED Viewed

Binary file (1.8 kB). View file

model/layers/__pycache__/patch_embed.cpython-38.pyc ADDED Viewed

Binary file (1.68 kB). View file

model/layers/__pycache__/pool2d_same.cpython-38.pyc ADDED Viewed

Binary file (3.12 kB). View file

model/layers/__pycache__/selective_kernel.cpython-38.pyc ADDED Viewed

Binary file (5.51 kB). View file

model/layers/__pycache__/separable_conv.cpython-38.pyc ADDED Viewed

Binary file (2.98 kB). View file