timm
/

Image Classification
timm
PyTorch
Safetensors
rwightman HF staff commited on
Commit
4ffd704
1 Parent(s): 3634f6f

Update model config and README

Browse files
Files changed (3) hide show
  1. README.md +25 -21
  2. config.json +1 -1
  3. model.safetensors +3 -0
README.md CHANGED
@@ -2,17 +2,17 @@
2
  tags:
3
  - image-classification
4
  - timm
5
- library_tag: timm
6
  license: apache-2.0
7
  datasets:
8
  - imagenet-12k
9
  ---
10
- # Model card for coatnet_3_rw_224.in12k
11
 
12
  A timm specific CoAtNet image classification model. Trained in `timm` on ImageNet-12k (a 11821 class subset of full ImageNet-22k) by Ross Wightman.
13
 
14
 
15
- ### Model Variants in [maxxvit.py](https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/maxxvit.py)
16
 
17
  MaxxViT covers a number of related model architectures that share a common structure including:
18
  - CoAtNet - Combining MBConv (depthwise-separable) convolutional blocks in early stages with self-attention transformer blocks in later stages.
@@ -42,10 +42,11 @@ from urllib.request import urlopen
42
  from PIL import Image
43
  import timm
44
 
45
- img = Image.open(
46
- urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
 
47
 
48
- model = timm.create_model('coatnet_3_rw_224.in12k', pretrained=True)
49
  model = model.eval()
50
 
51
  # get model specific transforms (normalization, resize)
@@ -63,11 +64,12 @@ from urllib.request import urlopen
63
  from PIL import Image
64
  import timm
65
 
66
- img = Image.open(
67
- urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
 
68
 
69
  model = timm.create_model(
70
- 'coatnet_3_rw_224.in12k',
71
  pretrained=True,
72
  features_only=True,
73
  )
@@ -81,12 +83,13 @@ output = model(transforms(img).unsqueeze(0)) # unsqueeze single image into batc
81
 
82
  for o in output:
83
  # print shape of each feature map in output
84
- # e.g.:
85
- # torch.Size([1, 128, 192, 192])
86
- # torch.Size([1, 128, 96, 96])
87
- # torch.Size([1, 256, 48, 48])
88
- # torch.Size([1, 512, 24, 24])
89
- # torch.Size([1, 1024, 12, 12])
 
90
  print(o.shape)
91
  ```
92
 
@@ -96,11 +99,12 @@ from urllib.request import urlopen
96
  from PIL import Image
97
  import timm
98
 
99
- img = Image.open(
100
- urlopen('https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'))
 
101
 
102
  model = timm.create_model(
103
- 'coatnet_3_rw_224.in12k',
104
  pretrained=True,
105
  num_classes=0, # remove classifier nn.Linear
106
  )
@@ -115,10 +119,10 @@ output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_featu
115
  # or equivalently (without needing to set num_classes=0)
116
 
117
  output = model.forward_features(transforms(img).unsqueeze(0))
118
- # output is unpooled (ie.e a (batch_size, num_features, H, W) tensor
119
 
120
  output = model.forward_head(output, pre_logits=True)
121
- # output is (batch_size, num_features) tensor
122
  ```
123
 
124
  ## Model Comparison
@@ -226,7 +230,7 @@ output = model.forward_head(output, pre_logits=True)
226
  publisher = {GitHub},
227
  journal = {GitHub repository},
228
  doi = {10.5281/zenodo.4414861},
229
- howpublished = {\url{https://github.com/rwightman/pytorch-image-models}}
230
  }
231
  ```
232
  ```bibtex
 
2
  tags:
3
  - image-classification
4
  - timm
5
+ library_name: timm
6
  license: apache-2.0
7
  datasets:
8
  - imagenet-12k
9
  ---
10
+ # Model card for coatnet_3_rw_224.sw_in12k
11
 
12
  A timm specific CoAtNet image classification model. Trained in `timm` on ImageNet-12k (a 11821 class subset of full ImageNet-22k) by Ross Wightman.
13
 
14
 
15
+ ### Model Variants in [maxxvit.py](https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/maxxvit.py)
16
 
17
  MaxxViT covers a number of related model architectures that share a common structure including:
18
  - CoAtNet - Combining MBConv (depthwise-separable) convolutional blocks in early stages with self-attention transformer blocks in later stages.
 
42
  from PIL import Image
43
  import timm
44
 
45
+ img = Image.open(urlopen(
46
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
47
+ ))
48
 
49
+ model = timm.create_model('coatnet_3_rw_224.sw_in12k', pretrained=True)
50
  model = model.eval()
51
 
52
  # get model specific transforms (normalization, resize)
 
64
  from PIL import Image
65
  import timm
66
 
67
+ img = Image.open(urlopen(
68
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
69
+ ))
70
 
71
  model = timm.create_model(
72
+ 'coatnet_3_rw_224.sw_in12k',
73
  pretrained=True,
74
  features_only=True,
75
  )
 
83
 
84
  for o in output:
85
  # print shape of each feature map in output
86
+ # e.g.:
87
+ # torch.Size([1, 192, 112, 112])
88
+ # torch.Size([1, 192, 56, 56])
89
+ # torch.Size([1, 384, 28, 28])
90
+ # torch.Size([1, 768, 14, 14])
91
+ # torch.Size([1, 1536, 7, 7])
92
+
93
  print(o.shape)
94
  ```
95
 
 
99
  from PIL import Image
100
  import timm
101
 
102
+ img = Image.open(urlopen(
103
+ 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
104
+ ))
105
 
106
  model = timm.create_model(
107
+ 'coatnet_3_rw_224.sw_in12k',
108
  pretrained=True,
109
  num_classes=0, # remove classifier nn.Linear
110
  )
 
119
  # or equivalently (without needing to set num_classes=0)
120
 
121
  output = model.forward_features(transforms(img).unsqueeze(0))
122
+ # output is unpooled, a (1, 1536, 7, 7) shaped tensor
123
 
124
  output = model.forward_head(output, pre_logits=True)
125
+ # output is a (1, num_features) shaped tensor
126
  ```
127
 
128
  ## Model Comparison
 
230
  publisher = {GitHub},
231
  journal = {GitHub repository},
232
  doi = {10.5281/zenodo.4414861},
233
+ howpublished = {\url{https://github.com/huggingface/pytorch-image-models}}
234
  }
235
  ```
236
  ```bibtex
config.json CHANGED
@@ -4,7 +4,7 @@
4
  "num_features": 1536,
5
  "global_pool": "avg",
6
  "pretrained_cfg": {
7
- "tag": "in12k",
8
  "custom_load": false,
9
  "input_size": [
10
  3,
 
4
  "num_features": 1536,
5
  "global_pool": "avg",
6
  "pretrained_cfg": {
7
+ "tag": "sw_in12k",
8
  "custom_load": false,
9
  "input_size": [
10
  3,
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a014b72581b872ec0e55dc463fbfc25d02cc10f014565fd58d87bb13eaa6718
3
+ size 727446832