laszlokiss27 commited on
Commit
813a325
1 Parent(s): 987dfc2

doodle-zero

Browse files
README.md CHANGED
@@ -1,6 +1,4 @@
1
  ---
2
- license: other
3
- base_model: apple/mobilevitv2-1.0-imagenet1k-256
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,10 +13,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # results
17
 
18
- This model is a fine-tuned version of [apple/mobilevitv2-1.0-imagenet1k-256](https://huggingface.co/apple/mobilevitv2-1.0-imagenet1k-256) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.7879
21
- - Accuracy: 0.7951
22
 
23
  ## Model description
24
 
@@ -49,83 +47,25 @@ The following hyperparameters were used during training:
49
 
50
  ### Training results
51
 
52
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
- |:-------------:|:------:|:------:|:---------------:|:--------:|
54
- | 1.254 | 0.0267 | 5000 | 1.2409 | 0.6897 |
55
- | 1.1472 | 0.0534 | 10000 | 1.1382 | 0.7127 |
56
- | 1.093 | 0.0802 | 15000 | 1.0852 | 0.7244 |
57
- | 1.0517 | 0.1069 | 20000 | 1.0415 | 0.7345 |
58
- | 1.023 | 0.1336 | 25000 | 1.0170 | 0.7413 |
59
- | 1.0047 | 0.1603 | 30000 | 0.9939 | 0.7452 |
60
- | 0.9878 | 0.1870 | 35000 | 0.9921 | 0.7466 |
61
- | 0.9758 | 0.2138 | 40000 | 0.9680 | 0.7522 |
62
- | 0.9661 | 0.2405 | 45000 | 0.9567 | 0.7545 |
63
- | 0.9582 | 0.2672 | 50000 | 0.9473 | 0.7569 |
64
- | 0.9425 | 0.2939 | 55000 | 0.9480 | 0.7569 |
65
- | 0.9391 | 0.3206 | 60000 | 0.9270 | 0.7616 |
66
- | 0.9301 | 0.3474 | 65000 | 0.9483 | 0.7570 |
67
- | 0.9196 | 0.3741 | 70000 | 0.9175 | 0.7635 |
68
- | 0.9168 | 0.4008 | 75000 | 0.9047 | 0.7674 |
69
- | 0.9139 | 0.4275 | 80000 | 0.9010 | 0.7680 |
70
- | 0.9022 | 0.4542 | 85000 | 0.8986 | 0.7683 |
71
- | 0.8986 | 0.4810 | 90000 | 0.8959 | 0.7689 |
72
- | 0.8965 | 0.5077 | 95000 | 0.8851 | 0.7720 |
73
- | 0.8906 | 0.5344 | 100000 | 0.8870 | 0.7715 |
74
- | 0.8888 | 0.5611 | 105000 | 0.8867 | 0.7716 |
75
- | 0.8852 | 0.5878 | 110000 | 0.8793 | 0.7732 |
76
- | 0.8822 | 0.6145 | 115000 | 0.8773 | 0.7735 |
77
- | 0.8796 | 0.6413 | 120000 | 0.8713 | 0.7747 |
78
- | 0.8707 | 0.6680 | 125000 | 0.8662 | 0.7763 |
79
- | 0.8735 | 0.6947 | 130000 | 0.8776 | 0.7741 |
80
- | 0.8659 | 0.7214 | 135000 | 0.8614 | 0.7771 |
81
- | 0.8635 | 0.7481 | 140000 | 0.8618 | 0.7772 |
82
- | 0.865 | 0.7749 | 145000 | 0.8561 | 0.7783 |
83
- | 0.8563 | 0.8016 | 150000 | 0.8585 | 0.7781 |
84
- | 0.8566 | 0.8283 | 155000 | 0.8493 | 0.7797 |
85
- | 0.8541 | 0.8550 | 160000 | 0.8493 | 0.7805 |
86
- | 0.8568 | 0.8817 | 165000 | 0.8431 | 0.7818 |
87
- | 0.846 | 0.9085 | 170000 | 0.8424 | 0.7819 |
88
- | 0.8479 | 0.9352 | 175000 | 0.8433 | 0.7812 |
89
- | 0.8486 | 0.9619 | 180000 | 0.8412 | 0.7823 |
90
- | 0.8398 | 0.9886 | 185000 | 0.8421 | 0.7818 |
91
- | 0.825 | 1.0153 | 190000 | 0.8355 | 0.7837 |
92
- | 0.8316 | 1.0421 | 195000 | 0.8354 | 0.7831 |
93
- | 0.8293 | 1.0688 | 200000 | 0.8571 | 0.7790 |
94
- | 0.8243 | 1.0955 | 205000 | 0.8288 | 0.7852 |
95
- | 0.824 | 1.1222 | 210000 | 0.8293 | 0.7851 |
96
- | 0.8277 | 1.1489 | 215000 | 0.8256 | 0.7859 |
97
- | 0.823 | 1.1757 | 220000 | 0.8223 | 0.7869 |
98
- | 0.8189 | 1.2024 | 225000 | 0.8226 | 0.7865 |
99
- | 0.8138 | 1.2291 | 230000 | 0.8217 | 0.7871 |
100
- | 0.8149 | 1.2558 | 235000 | 0.8215 | 0.7872 |
101
- | 0.8135 | 1.2825 | 240000 | 0.8163 | 0.7880 |
102
- | 0.8208 | 1.3093 | 245000 | 0.8141 | 0.7889 |
103
- | 0.8168 | 1.3360 | 250000 | 0.8150 | 0.7882 |
104
- | 0.8124 | 1.3627 | 255000 | 0.8124 | 0.7888 |
105
- | 0.8082 | 1.3894 | 260000 | 0.8113 | 0.7894 |
106
- | 0.8162 | 1.4161 | 265000 | 0.8140 | 0.7884 |
107
- | 0.8061 | 1.4429 | 270000 | 0.8129 | 0.7890 |
108
- | 0.8076 | 1.4696 | 275000 | 0.8072 | 0.7899 |
109
- | 0.8112 | 1.4963 | 280000 | 0.8064 | 0.7907 |
110
- | 0.8047 | 1.5230 | 285000 | 0.8061 | 0.7903 |
111
- | 0.8101 | 1.5497 | 290000 | 0.8086 | 0.7901 |
112
- | 0.8065 | 1.5765 | 295000 | 0.8032 | 0.7912 |
113
- | 0.7998 | 1.6032 | 300000 | 0.8048 | 0.7909 |
114
- | 0.8053 | 1.6299 | 305000 | 0.7993 | 0.7920 |
115
- | 0.8007 | 1.6566 | 310000 | 0.8007 | 0.7921 |
116
- | 0.7985 | 1.6833 | 315000 | 0.7988 | 0.7923 |
117
- | 0.8006 | 1.7101 | 320000 | 0.8230 | 0.7873 |
118
- | 0.8046 | 1.7368 | 325000 | 0.7959 | 0.7930 |
119
- | 0.794 | 1.7635 | 330000 | 0.7956 | 0.7928 |
120
- | 0.8047 | 1.7902 | 335000 | 0.8202 | 0.7881 |
121
- | 0.802 | 1.8169 | 340000 | 0.7953 | 0.7930 |
122
- | 0.7926 | 1.8436 | 345000 | 0.8007 | 0.7919 |
123
- | 0.7939 | 1.8704 | 350000 | 0.7918 | 0.7937 |
124
- | 0.7894 | 1.8971 | 355000 | 0.7911 | 0.7939 |
125
- | 0.7953 | 1.9238 | 360000 | 0.7879 | 0.7951 |
126
- | 0.7904 | 1.9505 | 365000 | 0.7922 | 0.7936 |
127
- | 0.7887 | 1.9772 | 370000 | 0.7899 | 0.7941 |
128
- | 0.7788 | 2.0040 | 375000 | 0.7888 | 0.7950 |
129
 
130
 
131
  ### Framework versions
 
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  metrics:
 
13
 
14
  # results
15
 
16
+ This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.1000
19
+ - Accuracy: 0.7236
20
 
21
  ## Model description
22
 
 
47
 
48
  ### Training results
49
 
50
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
+ |:-------------:|:------:|:-----:|:---------------:|:--------:|
52
+ | 1.7698 | 0.2844 | 5000 | 1.7124 | 0.5802 |
53
+ | 1.5445 | 0.5689 | 10000 | 1.5021 | 0.6270 |
54
+ | 1.439 | 0.8533 | 15000 | 1.3989 | 0.6520 |
55
+ | 1.3625 | 1.1377 | 20000 | 1.3447 | 0.6647 |
56
+ | 1.3192 | 1.4222 | 25000 | 1.2965 | 0.6756 |
57
+ | 1.3 | 1.7066 | 30000 | 1.2788 | 0.6795 |
58
+ | 1.2695 | 1.9910 | 35000 | 1.2347 | 0.6900 |
59
+ | 1.2297 | 2.2754 | 40000 | 1.2160 | 0.6955 |
60
+ | 1.2144 | 2.5599 | 45000 | 1.1894 | 0.7021 |
61
+ | 1.1945 | 2.8443 | 50000 | 1.1734 | 0.7058 |
62
+ | 1.1551 | 3.1287 | 55000 | 1.1611 | 0.7084 |
63
+ | 1.1471 | 3.4132 | 60000 | 1.1523 | 0.7104 |
64
+ | 1.1301 | 3.6976 | 65000 | 1.1314 | 0.7156 |
65
+ | 1.1286 | 3.9820 | 70000 | 1.1220 | 0.7186 |
66
+ | 1.0898 | 4.2665 | 75000 | 1.1140 | 0.7203 |
67
+ | 1.093 | 4.5509 | 80000 | 1.1040 | 0.7232 |
68
+ | 1.0893 | 4.8353 | 85000 | 1.0986 | 0.7246 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 2.0039651791010478,
3
- "eval_accuracy": 0.7951413429664056,
4
- "eval_loss": 0.787926971912384,
5
- "eval_runtime": 394.5318,
6
- "eval_samples_per_second": 6390.648,
7
- "eval_steps_per_second": 24.964,
8
- "total_flos": 8.246362688213484e+18,
9
- "train_loss": 0.8725958955078125,
10
- "train_runtime": 61229.4812,
11
- "train_samples_per_second": 3911.919,
12
- "train_steps_per_second": 15.281
13
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.723616,
4
+ "eval_loss": 1.100016713142395,
5
+ "eval_runtime": 118.4292,
6
+ "eval_samples_per_second": 2110.967,
7
+ "eval_steps_per_second": 8.25,
8
+ "total_flos": 5.4597445596112486e+17,
9
+ "train_loss": 1.296092871571504,
10
+ "train_runtime": 24664.1985,
11
+ "train_samples_per_second": 912.253,
12
+ "train_steps_per_second": 3.564
13
  }
config.json CHANGED
@@ -1,31 +1,127 @@
1
  {
2
- "_name_or_path": "apple/mobilevitv2-1.0-imagenet1k-256",
3
  "architectures": [
4
- "MobileViTV2ForImageClassification"
5
  ],
6
  "aspp_dropout_prob": 0.1,
7
- "aspp_out_channels": 512,
8
  "atrous_rates": [
9
  6,
10
  12,
11
  18
12
  ],
13
- "attn_dropout": 0.0,
14
- "base_attn_unit_dims": [
15
- 128,
16
- 192,
17
- 256
18
- ],
19
  "classifier_dropout_prob": 0.1,
20
  "conv_kernel_size": 3,
21
- "expand_ratio": 2.0,
22
- "ffn_dropout": 0.0,
23
- "ffn_multiplier": 2,
24
- "hidden_act": "swish",
 
 
 
 
 
25
  "id2label": {
26
  "0": "aircraft carrier",
27
  "1": "airplane",
 
 
 
 
 
 
 
 
28
  "10": "asparagus",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "100": "dumbbell",
30
  "101": "ear",
31
  "102": "elbow",
@@ -36,7 +132,6 @@
36
  "107": "eyeglasses",
37
  "108": "face",
38
  "109": "fan",
39
- "11": "axe",
40
  "110": "feather",
41
  "111": "fence",
42
  "112": "finger",
@@ -47,7 +142,6 @@
47
  "117": "flamingo",
48
  "118": "flashlight",
49
  "119": "flip flops",
50
- "12": "backpack",
51
  "120": "floor lamp",
52
  "121": "flower",
53
  "122": "flying saucer",
@@ -58,7 +152,6 @@
58
  "127": "garden hose",
59
  "128": "garden",
60
  "129": "giraffe",
61
- "13": "banana",
62
  "130": "goatee",
63
  "131": "golf club",
64
  "132": "grapes",
@@ -69,7 +162,6 @@
69
  "137": "hand",
70
  "138": "harp",
71
  "139": "hat",
72
- "14": "bandage",
73
  "140": "headphones",
74
  "141": "hedgehog",
75
  "142": "helicopter",
@@ -80,7 +172,6 @@
80
  "147": "horse",
81
  "148": "hospital",
82
  "149": "hot air balloon",
83
- "15": "barn",
84
  "150": "hot dog",
85
  "151": "hot tub",
86
  "152": "hourglass",
@@ -91,7 +182,6 @@
91
  "157": "jacket",
92
  "158": "jail",
93
  "159": "kangaroo",
94
- "16": "baseball bat",
95
  "160": "key",
96
  "161": "keyboard",
97
  "162": "knee",
@@ -102,7 +192,6 @@
102
  "167": "leaf",
103
  "168": "leg",
104
  "169": "light bulb",
105
- "17": "baseball",
106
  "170": "lighter",
107
  "171": "lighthouse",
108
  "172": "lightning",
@@ -113,7 +202,6 @@
113
  "177": "lollipop",
114
  "178": "mailbox",
115
  "179": "map",
116
- "18": "basket",
117
  "180": "marker",
118
  "181": "matches",
119
  "182": "megaphone",
@@ -124,7 +212,6 @@
124
  "187": "moon",
125
  "188": "mosquito",
126
  "189": "motorbike",
127
- "19": "basketball",
128
  "190": "mountain",
129
  "191": "mouse",
130
  "192": "moustache",
@@ -135,8 +222,6 @@
135
  "197": "necklace",
136
  "198": "nose",
137
  "199": "ocean",
138
- "2": "alarm clock",
139
- "20": "bat",
140
  "200": "octagon",
141
  "201": "octopus",
142
  "202": "onion",
@@ -147,7 +232,6 @@
147
  "207": "palm tree",
148
  "208": "panda",
149
  "209": "pants",
150
- "21": "bathtub",
151
  "210": "paper clip",
152
  "211": "parachute",
153
  "212": "parrot",
@@ -158,7 +242,6 @@
158
  "217": "pencil",
159
  "218": "penguin",
160
  "219": "piano",
161
- "22": "beach",
162
  "220": "pickup truck",
163
  "221": "picture frame",
164
  "222": "pig",
@@ -169,7 +252,6 @@
169
  "227": "police car",
170
  "228": "pond",
171
  "229": "pool",
172
- "23": "bear",
173
  "230": "popsicle",
174
  "231": "postcard",
175
  "232": "potato",
@@ -180,7 +262,6 @@
180
  "237": "radio",
181
  "238": "rain",
182
  "239": "rainbow",
183
- "24": "beard",
184
  "240": "rake",
185
  "241": "remote control",
186
  "242": "rhinoceros",
@@ -191,7 +272,6 @@
191
  "247": "sailboat",
192
  "248": "sandwich",
193
  "249": "saw",
194
- "25": "bed",
195
  "250": "saxophone",
196
  "251": "school bus",
197
  "252": "scissors",
@@ -202,7 +282,6 @@
202
  "257": "shark",
203
  "258": "sheep",
204
  "259": "shoe",
205
- "26": "bee",
206
  "260": "shorts",
207
  "261": "shovel",
208
  "262": "sink",
@@ -213,7 +292,6 @@
213
  "267": "smiley face",
214
  "268": "snail",
215
  "269": "snake",
216
- "27": "belt",
217
  "270": "snorkel",
218
  "271": "snowflake",
219
  "272": "snowman",
@@ -224,7 +302,6 @@
224
  "277": "spoon",
225
  "278": "spreadsheet",
226
  "279": "square",
227
- "28": "bench",
228
  "280": "squiggle",
229
  "281": "squirrel",
230
  "282": "stairs",
@@ -235,7 +312,6 @@
235
  "287": "stitches",
236
  "288": "stop sign",
237
  "289": "stove",
238
- "29": "bicycle",
239
  "290": "strawberry",
240
  "291": "streetlight",
241
  "292": "string bean",
@@ -246,8 +322,6 @@
246
  "297": "sweater",
247
  "298": "swing set",
248
  "299": "sword",
249
- "3": "ambulance",
250
- "30": "binoculars",
251
  "300": "syringe",
252
  "301": "t-shirt",
253
  "302": "table",
@@ -258,7 +332,6 @@
258
  "307": "tennis racquet",
259
  "308": "tent",
260
  "309": "The Eiffel Tower",
261
- "31": "bird",
262
  "310": "The Great Wall of China",
263
  "311": "The Mona Lisa",
264
  "312": "tiger",
@@ -269,7 +342,6 @@
269
  "317": "toothbrush",
270
  "318": "toothpaste",
271
  "319": "tornado",
272
- "32": "birthday cake",
273
  "320": "tractor",
274
  "321": "traffic light",
275
  "322": "train",
@@ -280,7 +352,6 @@
280
  "327": "trumpet",
281
  "328": "umbrella",
282
  "329": "underwear",
283
- "33": "blackberry",
284
  "330": "van",
285
  "331": "vase",
286
  "332": "violin",
@@ -291,86 +362,16 @@
291
  "337": "wheel",
292
  "338": "windmill",
293
  "339": "wine bottle",
294
- "34": "blueberry",
295
  "340": "wine glass",
296
  "341": "wristwatch",
297
  "342": "yoga",
298
  "343": "zebra",
299
- "344": "zigzag",
300
- "35": "book",
301
- "36": "boomerang",
302
- "37": "bottlecap",
303
- "38": "bowtie",
304
- "39": "bracelet",
305
- "4": "angel",
306
- "40": "brain",
307
- "41": "bread",
308
- "42": "bridge",
309
- "43": "broccoli",
310
- "44": "broom",
311
- "45": "bucket",
312
- "46": "bulldozer",
313
- "47": "bus",
314
- "48": "bush",
315
- "49": "butterfly",
316
- "5": "animal migration",
317
- "50": "cactus",
318
- "51": "cake",
319
- "52": "calculator",
320
- "53": "calendar",
321
- "54": "camel",
322
- "55": "camera",
323
- "56": "camouflage",
324
- "57": "campfire",
325
- "58": "candle",
326
- "59": "cannon",
327
- "6": "ant",
328
- "60": "canoe",
329
- "61": "car",
330
- "62": "carrot",
331
- "63": "castle",
332
- "64": "cat",
333
- "65": "ceiling fan",
334
- "66": "cell phone",
335
- "67": "cello",
336
- "68": "chair",
337
- "69": "chandelier",
338
- "7": "anvil",
339
- "70": "church",
340
- "71": "circle",
341
- "72": "clarinet",
342
- "73": "clock",
343
- "74": "cloud",
344
- "75": "coffee cup",
345
- "76": "compass",
346
- "77": "computer",
347
- "78": "cookie",
348
- "79": "cooler",
349
- "8": "apple",
350
- "80": "couch",
351
- "81": "cow",
352
- "82": "crab",
353
- "83": "crayon",
354
- "84": "crocodile",
355
- "85": "crown",
356
- "86": "cruise ship",
357
- "87": "cup",
358
- "88": "diamond",
359
- "89": "dishwasher",
360
- "9": "arm",
361
- "90": "diving board",
362
- "91": "dog",
363
- "92": "dolphin",
364
- "93": "donut",
365
- "94": "door",
366
- "95": "dragon",
367
- "96": "dresser",
368
- "97": "drill",
369
- "98": "drums",
370
- "99": "duck"
371
  },
 
372
  "image_size": 64,
373
  "initializer_range": 0.02,
 
374
  "label2id": {
375
  "The Eiffel Tower": "309",
376
  "The Great Wall of China": "310",
@@ -720,18 +721,24 @@
720
  },
721
  "layer_norm_eps": 1e-05,
722
  "mlp_ratio": 2.0,
723
- "model_type": "mobilevitv2",
724
- "n_attn_blocks": [
725
- 2,
726
- 4,
727
- 3
 
 
 
 
728
  ],
 
729
  "num_channels": 1,
 
730
  "output_stride": 32,
731
- "patch_size": 2,
732
  "problem_type": "single_label_classification",
 
733
  "semantic_loss_ignore_index": 255,
734
  "torch_dtype": "float32",
735
- "transformers_version": "4.40.0",
736
- "width_multiplier": 1.0
737
  }
 
1
  {
 
2
  "architectures": [
3
+ "MobileViTForImageClassification"
4
  ],
5
  "aspp_dropout_prob": 0.1,
6
+ "aspp_out_channels": 256,
7
  "atrous_rates": [
8
  6,
9
  12,
10
  18
11
  ],
12
+ "attention_probs_dropout_prob": 0.1,
 
 
 
 
 
13
  "classifier_dropout_prob": 0.1,
14
  "conv_kernel_size": 3,
15
+ "expand_ratio": 4.0,
16
+ "hidden_act": "gelu",
17
+ "hidden_dropout_prob": 0.1,
18
+ "hidden_size": 768,
19
+ "hidden_sizes": [
20
+ 144,
21
+ 192,
22
+ 240
23
+ ],
24
  "id2label": {
25
  "0": "aircraft carrier",
26
  "1": "airplane",
27
+ "2": "alarm clock",
28
+ "3": "ambulance",
29
+ "4": "angel",
30
+ "5": "animal migration",
31
+ "6": "ant",
32
+ "7": "anvil",
33
+ "8": "apple",
34
+ "9": "arm",
35
  "10": "asparagus",
36
+ "11": "axe",
37
+ "12": "backpack",
38
+ "13": "banana",
39
+ "14": "bandage",
40
+ "15": "barn",
41
+ "16": "baseball bat",
42
+ "17": "baseball",
43
+ "18": "basket",
44
+ "19": "basketball",
45
+ "20": "bat",
46
+ "21": "bathtub",
47
+ "22": "beach",
48
+ "23": "bear",
49
+ "24": "beard",
50
+ "25": "bed",
51
+ "26": "bee",
52
+ "27": "belt",
53
+ "28": "bench",
54
+ "29": "bicycle",
55
+ "30": "binoculars",
56
+ "31": "bird",
57
+ "32": "birthday cake",
58
+ "33": "blackberry",
59
+ "34": "blueberry",
60
+ "35": "book",
61
+ "36": "boomerang",
62
+ "37": "bottlecap",
63
+ "38": "bowtie",
64
+ "39": "bracelet",
65
+ "40": "brain",
66
+ "41": "bread",
67
+ "42": "bridge",
68
+ "43": "broccoli",
69
+ "44": "broom",
70
+ "45": "bucket",
71
+ "46": "bulldozer",
72
+ "47": "bus",
73
+ "48": "bush",
74
+ "49": "butterfly",
75
+ "50": "cactus",
76
+ "51": "cake",
77
+ "52": "calculator",
78
+ "53": "calendar",
79
+ "54": "camel",
80
+ "55": "camera",
81
+ "56": "camouflage",
82
+ "57": "campfire",
83
+ "58": "candle",
84
+ "59": "cannon",
85
+ "60": "canoe",
86
+ "61": "car",
87
+ "62": "carrot",
88
+ "63": "castle",
89
+ "64": "cat",
90
+ "65": "ceiling fan",
91
+ "66": "cell phone",
92
+ "67": "cello",
93
+ "68": "chair",
94
+ "69": "chandelier",
95
+ "70": "church",
96
+ "71": "circle",
97
+ "72": "clarinet",
98
+ "73": "clock",
99
+ "74": "cloud",
100
+ "75": "coffee cup",
101
+ "76": "compass",
102
+ "77": "computer",
103
+ "78": "cookie",
104
+ "79": "cooler",
105
+ "80": "couch",
106
+ "81": "cow",
107
+ "82": "crab",
108
+ "83": "crayon",
109
+ "84": "crocodile",
110
+ "85": "crown",
111
+ "86": "cruise ship",
112
+ "87": "cup",
113
+ "88": "diamond",
114
+ "89": "dishwasher",
115
+ "90": "diving board",
116
+ "91": "dog",
117
+ "92": "dolphin",
118
+ "93": "donut",
119
+ "94": "door",
120
+ "95": "dragon",
121
+ "96": "dresser",
122
+ "97": "drill",
123
+ "98": "drums",
124
+ "99": "duck",
125
  "100": "dumbbell",
126
  "101": "ear",
127
  "102": "elbow",
 
132
  "107": "eyeglasses",
133
  "108": "face",
134
  "109": "fan",
 
135
  "110": "feather",
136
  "111": "fence",
137
  "112": "finger",
 
142
  "117": "flamingo",
143
  "118": "flashlight",
144
  "119": "flip flops",
 
145
  "120": "floor lamp",
146
  "121": "flower",
147
  "122": "flying saucer",
 
152
  "127": "garden hose",
153
  "128": "garden",
154
  "129": "giraffe",
 
155
  "130": "goatee",
156
  "131": "golf club",
157
  "132": "grapes",
 
162
  "137": "hand",
163
  "138": "harp",
164
  "139": "hat",
 
165
  "140": "headphones",
166
  "141": "hedgehog",
167
  "142": "helicopter",
 
172
  "147": "horse",
173
  "148": "hospital",
174
  "149": "hot air balloon",
 
175
  "150": "hot dog",
176
  "151": "hot tub",
177
  "152": "hourglass",
 
182
  "157": "jacket",
183
  "158": "jail",
184
  "159": "kangaroo",
 
185
  "160": "key",
186
  "161": "keyboard",
187
  "162": "knee",
 
192
  "167": "leaf",
193
  "168": "leg",
194
  "169": "light bulb",
 
195
  "170": "lighter",
196
  "171": "lighthouse",
197
  "172": "lightning",
 
202
  "177": "lollipop",
203
  "178": "mailbox",
204
  "179": "map",
 
205
  "180": "marker",
206
  "181": "matches",
207
  "182": "megaphone",
 
212
  "187": "moon",
213
  "188": "mosquito",
214
  "189": "motorbike",
 
215
  "190": "mountain",
216
  "191": "mouse",
217
  "192": "moustache",
 
222
  "197": "necklace",
223
  "198": "nose",
224
  "199": "ocean",
 
 
225
  "200": "octagon",
226
  "201": "octopus",
227
  "202": "onion",
 
232
  "207": "palm tree",
233
  "208": "panda",
234
  "209": "pants",
 
235
  "210": "paper clip",
236
  "211": "parachute",
237
  "212": "parrot",
 
242
  "217": "pencil",
243
  "218": "penguin",
244
  "219": "piano",
 
245
  "220": "pickup truck",
246
  "221": "picture frame",
247
  "222": "pig",
 
252
  "227": "police car",
253
  "228": "pond",
254
  "229": "pool",
 
255
  "230": "popsicle",
256
  "231": "postcard",
257
  "232": "potato",
 
262
  "237": "radio",
263
  "238": "rain",
264
  "239": "rainbow",
 
265
  "240": "rake",
266
  "241": "remote control",
267
  "242": "rhinoceros",
 
272
  "247": "sailboat",
273
  "248": "sandwich",
274
  "249": "saw",
 
275
  "250": "saxophone",
276
  "251": "school bus",
277
  "252": "scissors",
 
282
  "257": "shark",
283
  "258": "sheep",
284
  "259": "shoe",
 
285
  "260": "shorts",
286
  "261": "shovel",
287
  "262": "sink",
 
292
  "267": "smiley face",
293
  "268": "snail",
294
  "269": "snake",
 
295
  "270": "snorkel",
296
  "271": "snowflake",
297
  "272": "snowman",
 
302
  "277": "spoon",
303
  "278": "spreadsheet",
304
  "279": "square",
 
305
  "280": "squiggle",
306
  "281": "squirrel",
307
  "282": "stairs",
 
312
  "287": "stitches",
313
  "288": "stop sign",
314
  "289": "stove",
 
315
  "290": "strawberry",
316
  "291": "streetlight",
317
  "292": "string bean",
 
322
  "297": "sweater",
323
  "298": "swing set",
324
  "299": "sword",
 
 
325
  "300": "syringe",
326
  "301": "t-shirt",
327
  "302": "table",
 
332
  "307": "tennis racquet",
333
  "308": "tent",
334
  "309": "The Eiffel Tower",
 
335
  "310": "The Great Wall of China",
336
  "311": "The Mona Lisa",
337
  "312": "tiger",
 
342
  "317": "toothbrush",
343
  "318": "toothpaste",
344
  "319": "tornado",
 
345
  "320": "tractor",
346
  "321": "traffic light",
347
  "322": "train",
 
352
  "327": "trumpet",
353
  "328": "umbrella",
354
  "329": "underwear",
 
355
  "330": "van",
356
  "331": "vase",
357
  "332": "violin",
 
362
  "337": "wheel",
363
  "338": "windmill",
364
  "339": "wine bottle",
 
365
  "340": "wine glass",
366
  "341": "wristwatch",
367
  "342": "yoga",
368
  "343": "zebra",
369
+ "344": "zigzag"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  },
371
+ "ignore_mismatched_sizes": true,
372
  "image_size": 64,
373
  "initializer_range": 0.02,
374
+ "intermediate_size": 3072,
375
  "label2id": {
376
  "The Eiffel Tower": "309",
377
  "The Great Wall of China": "310",
 
721
  },
722
  "layer_norm_eps": 1e-05,
723
  "mlp_ratio": 2.0,
724
+ "model_type": "mobilevit",
725
+ "neck_hidden_sizes": [
726
+ 16,
727
+ 32,
728
+ 64,
729
+ 96,
730
+ 128,
731
+ 160,
732
+ 640
733
  ],
734
+ "num_attention_heads": 12,
735
  "num_channels": 1,
736
+ "num_hidden_layers": 12,
737
  "output_stride": 32,
738
+ "patch_size": 16,
739
  "problem_type": "single_label_classification",
740
+ "qkv_bias": true,
741
  "semantic_loss_ignore_index": 255,
742
  "torch_dtype": "float32",
743
+ "transformers_version": "4.40.0"
 
744
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c56ce027098d56d11ac636e6c45af9c186175031adfaaf0e614eeb9ac1d2dba
3
- size 18360744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35f61619e3ffc371ead68851aa0232c0fe6cc18ab6a5362d0e589c58eb59a19
3
+ size 20730036
preprocessor_config.json CHANGED
@@ -15,8 +15,8 @@
15
  "input_data_format"
16
  ],
17
  "crop_size": {
18
- "height": 56,
19
- "width": 56
20
  },
21
  "do_center_crop": true,
22
  "do_convert_rgb": false,
@@ -27,6 +27,6 @@
27
  "resample": 2,
28
  "rescale_factor": 0.00392156862745098,
29
  "size": {
30
- "shortest_edge": 56
31
  }
32
  }
 
15
  "input_data_format"
16
  ],
17
  "crop_size": {
18
+ "height": 28,
19
+ "width": 28
20
  },
21
  "do_center_crop": true,
22
  "do_convert_rgb": false,
 
27
  "resample": 2,
28
  "rescale_factor": 0.00392156862745098,
29
  "size": {
30
+ "shortest_edge": 28
31
  }
32
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0039651791010478,
3
- "eval_accuracy": 0.7951413429664056,
4
- "eval_loss": 0.787926971912384,
5
- "eval_runtime": 394.5318,
6
- "eval_samples_per_second": 6390.648,
7
- "eval_steps_per_second": 24.964
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.723616,
4
+ "eval_loss": 1.100016713142395,
5
+ "eval_runtime": 118.4292,
6
+ "eval_samples_per_second": 2110.967,
7
+ "eval_steps_per_second": 8.25
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0039651791010478,
3
- "total_flos": 8.246362688213484e+18,
4
- "train_loss": 0.8725958955078125,
5
- "train_runtime": 61229.4812,
6
- "train_samples_per_second": 3911.919,
7
- "train_steps_per_second": 15.281
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "total_flos": 5.4597445596112486e+17,
4
+ "train_loss": 1.296092871571504,
5
+ "train_runtime": 24664.1985,
6
+ "train_samples_per_second": 912.253,
7
+ "train_steps_per_second": 3.564
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b89a07bb3cdaf58b38f7161c3c9c6c2f098869dbaa81424749efa50f11303fc3
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6b0f81e80c1f33b16aaf9c2fc69495be719e29fb2dfcec7d7a4debabe294f0
3
+ size 4920