dima806 commited on
Commit
83d42da
·
verified ·
1 Parent(s): 1ce6079

Upload folder using huggingface_hub

Browse files
checkpoint-29100/config.json ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Afghan",
13
+ "1": "African Wild Dog",
14
+ "2": "Airedale",
15
+ "3": "American Spaniel",
16
+ "4": "American Hairless",
17
+ "5": "American Spaniel",
18
+ "6": "Basenji",
19
+ "7": "Basset",
20
+ "8": "Beagle",
21
+ "9": "Bearded Collie",
22
+ "10": "Bermaise",
23
+ "11": "Bichon Frise",
24
+ "12": "Blenheim",
25
+ "13": "Bloodhound",
26
+ "14": "Bluetick",
27
+ "15": "Border Collie",
28
+ "16": "Borzoi",
29
+ "17": "Boston Terrier",
30
+ "18": "Boxer",
31
+ "19": "Bull Mastiff",
32
+ "20": "Bull Terrier",
33
+ "21": "Bulldog",
34
+ "22": "Cairn",
35
+ "23": "Chihuahua",
36
+ "24": "Chinese Crested",
37
+ "25": "Chow",
38
+ "26": "Clumber",
39
+ "27": "Cockapoo",
40
+ "28": "Cocker",
41
+ "29": "Collie",
42
+ "30": "Corgi",
43
+ "31": "Coyote",
44
+ "32": "Dalmation",
45
+ "33": "Dhole",
46
+ "34": "Dingo",
47
+ "35": "Doberman",
48
+ "36": "Elk Hound",
49
+ "37": "French Bulldog",
50
+ "38": "German Sheperd",
51
+ "39": "Golden Retriever",
52
+ "40": "Great Dane",
53
+ "41": "Great Perenees",
54
+ "42": "Greyhound",
55
+ "43": "Groenendael",
56
+ "44": "Irish Spaniel",
57
+ "45": "Irish Wolfhound",
58
+ "46": "Japanese Spaniel",
59
+ "47": "Komondor",
60
+ "48": "Labradoodle",
61
+ "49": "Labrador",
62
+ "50": "Lhasa",
63
+ "51": "Malinois",
64
+ "52": "Maltese",
65
+ "53": "Mex Hairless",
66
+ "54": "Newfoundland",
67
+ "55": "Pekinese",
68
+ "56": "Pit Bull",
69
+ "57": "Pomeranian",
70
+ "58": "Poodle",
71
+ "59": "Pug",
72
+ "60": "Rhodesian",
73
+ "61": "Rottweiler",
74
+ "62": "Saint Bernard",
75
+ "63": "Schnauzer",
76
+ "64": "Scotch Terrier",
77
+ "65": "Shar_Pei",
78
+ "66": "Shiba Inu",
79
+ "67": "Shih-Tzu",
80
+ "68": "Siberian Husky",
81
+ "69": "Vizsla",
82
+ "70": "Yorkie"
83
+ },
84
+ "image_size": 224,
85
+ "initializer_range": 0.02,
86
+ "intermediate_size": 3072,
87
+ "label2id": {
88
+ "Afghan": 0,
89
+ "African Wild Dog": 1,
90
+ "Airedale": 2,
91
+ "American Spaniel": 3,
92
+ "American Hairless": 4,
93
+ "American Spaniel": 5,
94
+ "Basenji": 6,
95
+ "Basset": 7,
96
+ "Beagle": 8,
97
+ "Bearded Collie": 9,
98
+ "Bermaise": 10,
99
+ "Bichon Frise": 11,
100
+ "Blenheim": 12,
101
+ "Bloodhound": 13,
102
+ "Bluetick": 14,
103
+ "Border Collie": 15,
104
+ "Borzoi": 16,
105
+ "Boston Terrier": 17,
106
+ "Boxer": 18,
107
+ "Bull Mastiff": 19,
108
+ "Bull Terrier": 20,
109
+ "Bulldog": 21,
110
+ "Cairn": 22,
111
+ "Chihuahua": 23,
112
+ "Chinese Crested": 24,
113
+ "Chow": 25,
114
+ "Clumber": 26,
115
+ "Cockapoo": 27,
116
+ "Cocker": 28,
117
+ "Collie": 29,
118
+ "Corgi": 30,
119
+ "Coyote": 31,
120
+ "Dalmation": 32,
121
+ "Dhole": 33,
122
+ "Dingo": 34,
123
+ "Doberman": 35,
124
+ "Elk Hound": 36,
125
+ "French Bulldog": 37,
126
+ "German Sheperd": 38,
127
+ "Golden Retriever": 39,
128
+ "Great Dane": 40,
129
+ "Great Perenees": 41,
130
+ "Greyhound": 42,
131
+ "Groenendael": 43,
132
+ "Irish Spaniel": 44,
133
+ "Irish Wolfhound": 45,
134
+ "Japanese Spaniel": 46,
135
+ "Komondor": 47,
136
+ "Labradoodle": 48,
137
+ "Labrador": 49,
138
+ "Lhasa": 50,
139
+ "Malinois": 51,
140
+ "Maltese": 52,
141
+ "Mex Hairless": 53,
142
+ "Newfoundland": 54,
143
+ "Pekinese": 55,
144
+ "Pit Bull": 56,
145
+ "Pomeranian": 57,
146
+ "Poodle": 58,
147
+ "Pug": 59,
148
+ "Rhodesian": 60,
149
+ "Rottweiler": 61,
150
+ "Saint Bernard": 62,
151
+ "Schnauzer": 63,
152
+ "Scotch Terrier": 64,
153
+ "Shar_Pei": 65,
154
+ "Shiba Inu": 66,
155
+ "Shih-Tzu": 67,
156
+ "Siberian Husky": 68,
157
+ "Vizsla": 69,
158
+ "Yorkie": 70
159
+ },
160
+ "layer_norm_eps": 1e-12,
161
+ "model_type": "vit",
162
+ "num_attention_heads": 12,
163
+ "num_channels": 3,
164
+ "num_hidden_layers": 12,
165
+ "patch_size": 16,
166
+ "problem_type": "single_label_classification",
167
+ "qkv_bias": true,
168
+ "torch_dtype": "float32",
169
+ "transformers_version": "4.45.2"
170
+ }
checkpoint-29100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda49ac12533271a8bbe1efb216edbf2fe4d89614936f48267431eb3a0ba3b2d
3
+ size 343436228
checkpoint-29100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:511281389be430b2fb03483e94a311b1a2c75376a431f18dc4ccf850fa7ea08f
3
+ size 686992901
checkpoint-29100/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-29100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d7eb547490fd3ce44f16684a4cffd4d2919eeb2e450290c045a18961b7af64
3
+ size 14575
checkpoint-29100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0be2d33b4b212c32e1428697fafe194b0d0af3587c727030975cf991d6c3b5
3
+ size 627
checkpoint-29100/trainer_state.json ADDED
@@ -0,0 +1,1439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.2304611206054688,
3
+ "best_model_checkpoint": "dogs_70_breeds_image_detection/checkpoint-29100",
4
+ "epoch": 100.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.03811369509043928,
14
+ "eval_loss": 4.224255561828613,
15
+ "eval_model_preparation_time": 0.005,
16
+ "eval_runtime": 53.4638,
17
+ "eval_samples_per_second": 115.817,
18
+ "eval_steps_per_second": 14.477,
19
+ "step": 291
20
+ },
21
+ {
22
+ "epoch": 1.718213058419244,
23
+ "grad_norm": 1.1522711515426636,
24
+ "learning_rate": 9.845094664371773e-07,
25
+ "loss": 4.2286,
26
+ "step": 500
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "eval_accuracy": 0.08301033591731266,
31
+ "eval_loss": 4.186123371124268,
32
+ "eval_model_preparation_time": 0.005,
33
+ "eval_runtime": 53.282,
34
+ "eval_samples_per_second": 116.212,
35
+ "eval_steps_per_second": 14.526,
36
+ "step": 582
37
+ },
38
+ {
39
+ "epoch": 3.0,
40
+ "eval_accuracy": 0.15536175710594316,
41
+ "eval_loss": 4.142723560333252,
42
+ "eval_model_preparation_time": 0.005,
43
+ "eval_runtime": 53.6144,
44
+ "eval_samples_per_second": 115.491,
45
+ "eval_steps_per_second": 14.436,
46
+ "step": 873
47
+ },
48
+ {
49
+ "epoch": 3.436426116838488,
50
+ "grad_norm": 1.341554880142212,
51
+ "learning_rate": 9.672977624784853e-07,
52
+ "loss": 4.1533,
53
+ "step": 1000
54
+ },
55
+ {
56
+ "epoch": 4.0,
57
+ "eval_accuracy": 0.24563953488372092,
58
+ "eval_loss": 4.09642219543457,
59
+ "eval_model_preparation_time": 0.005,
60
+ "eval_runtime": 53.2746,
61
+ "eval_samples_per_second": 116.228,
62
+ "eval_steps_per_second": 14.529,
63
+ "step": 1164
64
+ },
65
+ {
66
+ "epoch": 5.0,
67
+ "eval_accuracy": 0.35852713178294576,
68
+ "eval_loss": 4.050450801849365,
69
+ "eval_model_preparation_time": 0.005,
70
+ "eval_runtime": 53.2922,
71
+ "eval_samples_per_second": 116.19,
72
+ "eval_steps_per_second": 14.524,
73
+ "step": 1455
74
+ },
75
+ {
76
+ "epoch": 5.154639175257732,
77
+ "grad_norm": 1.4653651714324951,
78
+ "learning_rate": 9.500860585197934e-07,
79
+ "loss": 4.0657,
80
+ "step": 1500
81
+ },
82
+ {
83
+ "epoch": 6.0,
84
+ "eval_accuracy": 0.44993540051679587,
85
+ "eval_loss": 4.003639221191406,
86
+ "eval_model_preparation_time": 0.005,
87
+ "eval_runtime": 53.2227,
88
+ "eval_samples_per_second": 116.341,
89
+ "eval_steps_per_second": 14.543,
90
+ "step": 1746
91
+ },
92
+ {
93
+ "epoch": 6.872852233676976,
94
+ "grad_norm": 1.3512729406356812,
95
+ "learning_rate": 9.328743545611016e-07,
96
+ "loss": 3.9797,
97
+ "step": 2000
98
+ },
99
+ {
100
+ "epoch": 7.0,
101
+ "eval_accuracy": 0.5489341085271318,
102
+ "eval_loss": 3.956012725830078,
103
+ "eval_model_preparation_time": 0.005,
104
+ "eval_runtime": 53.824,
105
+ "eval_samples_per_second": 115.042,
106
+ "eval_steps_per_second": 14.38,
107
+ "step": 2037
108
+ },
109
+ {
110
+ "epoch": 8.0,
111
+ "eval_accuracy": 0.6153100775193798,
112
+ "eval_loss": 3.908579111099243,
113
+ "eval_model_preparation_time": 0.005,
114
+ "eval_runtime": 53.6148,
115
+ "eval_samples_per_second": 115.491,
116
+ "eval_steps_per_second": 14.436,
117
+ "step": 2328
118
+ },
119
+ {
120
+ "epoch": 8.59106529209622,
121
+ "grad_norm": 1.6852034330368042,
122
+ "learning_rate": 9.156626506024095e-07,
123
+ "loss": 3.8945,
124
+ "step": 2500
125
+ },
126
+ {
127
+ "epoch": 9.0,
128
+ "eval_accuracy": 0.6690891472868217,
129
+ "eval_loss": 3.861515522003174,
130
+ "eval_model_preparation_time": 0.005,
131
+ "eval_runtime": 53.3317,
132
+ "eval_samples_per_second": 116.104,
133
+ "eval_steps_per_second": 14.513,
134
+ "step": 2619
135
+ },
136
+ {
137
+ "epoch": 10.0,
138
+ "eval_accuracy": 0.7041343669250646,
139
+ "eval_loss": 3.8151276111602783,
140
+ "eval_model_preparation_time": 0.005,
141
+ "eval_runtime": 53.7261,
142
+ "eval_samples_per_second": 115.251,
143
+ "eval_steps_per_second": 14.406,
144
+ "step": 2910
145
+ },
146
+ {
147
+ "epoch": 10.309278350515465,
148
+ "grad_norm": 1.4977772235870361,
149
+ "learning_rate": 8.984509466437177e-07,
150
+ "loss": 3.8073,
151
+ "step": 3000
152
+ },
153
+ {
154
+ "epoch": 11.0,
155
+ "eval_accuracy": 0.7333656330749354,
156
+ "eval_loss": 3.767709732055664,
157
+ "eval_model_preparation_time": 0.005,
158
+ "eval_runtime": 53.2216,
159
+ "eval_samples_per_second": 116.344,
160
+ "eval_steps_per_second": 14.543,
161
+ "step": 3201
162
+ },
163
+ {
164
+ "epoch": 12.0,
165
+ "eval_accuracy": 0.7543604651162791,
166
+ "eval_loss": 3.7216570377349854,
167
+ "eval_model_preparation_time": 0.005,
168
+ "eval_runtime": 52.8673,
169
+ "eval_samples_per_second": 117.123,
170
+ "eval_steps_per_second": 14.64,
171
+ "step": 3492
172
+ },
173
+ {
174
+ "epoch": 12.027491408934708,
175
+ "grad_norm": 1.4954266548156738,
176
+ "learning_rate": 8.812392426850258e-07,
177
+ "loss": 3.7217,
178
+ "step": 3500
179
+ },
180
+ {
181
+ "epoch": 13.0,
182
+ "eval_accuracy": 0.7698643410852714,
183
+ "eval_loss": 3.6769490242004395,
184
+ "eval_model_preparation_time": 0.005,
185
+ "eval_runtime": 53.1587,
186
+ "eval_samples_per_second": 116.481,
187
+ "eval_steps_per_second": 14.56,
188
+ "step": 3783
189
+ },
190
+ {
191
+ "epoch": 13.745704467353953,
192
+ "grad_norm": 1.6733068227767944,
193
+ "learning_rate": 8.640275387263338e-07,
194
+ "loss": 3.6412,
195
+ "step": 4000
196
+ },
197
+ {
198
+ "epoch": 14.0,
199
+ "eval_accuracy": 0.7805232558139535,
200
+ "eval_loss": 3.6332757472991943,
201
+ "eval_model_preparation_time": 0.005,
202
+ "eval_runtime": 52.6862,
203
+ "eval_samples_per_second": 117.526,
204
+ "eval_steps_per_second": 14.691,
205
+ "step": 4074
206
+ },
207
+ {
208
+ "epoch": 15.0,
209
+ "eval_accuracy": 0.7876291989664083,
210
+ "eval_loss": 3.58959698677063,
211
+ "eval_model_preparation_time": 0.005,
212
+ "eval_runtime": 52.5851,
213
+ "eval_samples_per_second": 117.752,
214
+ "eval_steps_per_second": 14.719,
215
+ "step": 4365
216
+ },
217
+ {
218
+ "epoch": 15.463917525773196,
219
+ "grad_norm": 1.5803037881851196,
220
+ "learning_rate": 8.46815834767642e-07,
221
+ "loss": 3.56,
222
+ "step": 4500
223
+ },
224
+ {
225
+ "epoch": 16.0,
226
+ "eval_accuracy": 0.7963501291989664,
227
+ "eval_loss": 3.547574520111084,
228
+ "eval_model_preparation_time": 0.005,
229
+ "eval_runtime": 52.7736,
230
+ "eval_samples_per_second": 117.331,
231
+ "eval_steps_per_second": 14.666,
232
+ "step": 4656
233
+ },
234
+ {
235
+ "epoch": 17.0,
236
+ "eval_accuracy": 0.8026485788113695,
237
+ "eval_loss": 3.507601737976074,
238
+ "eval_model_preparation_time": 0.005,
239
+ "eval_runtime": 53.0827,
240
+ "eval_samples_per_second": 116.648,
241
+ "eval_steps_per_second": 14.581,
242
+ "step": 4947
243
+ },
244
+ {
245
+ "epoch": 17.18213058419244,
246
+ "grad_norm": 1.741551399230957,
247
+ "learning_rate": 8.2960413080895e-07,
248
+ "loss": 3.4839,
249
+ "step": 5000
250
+ },
251
+ {
252
+ "epoch": 18.0,
253
+ "eval_accuracy": 0.8086240310077519,
254
+ "eval_loss": 3.467883825302124,
255
+ "eval_model_preparation_time": 0.005,
256
+ "eval_runtime": 52.7555,
257
+ "eval_samples_per_second": 117.372,
258
+ "eval_steps_per_second": 14.671,
259
+ "step": 5238
260
+ },
261
+ {
262
+ "epoch": 18.900343642611684,
263
+ "grad_norm": 1.6814472675323486,
264
+ "learning_rate": 8.123924268502581e-07,
265
+ "loss": 3.4135,
266
+ "step": 5500
267
+ },
268
+ {
269
+ "epoch": 19.0,
270
+ "eval_accuracy": 0.8162144702842378,
271
+ "eval_loss": 3.429760694503784,
272
+ "eval_model_preparation_time": 0.005,
273
+ "eval_runtime": 52.6582,
274
+ "eval_samples_per_second": 117.589,
275
+ "eval_steps_per_second": 14.699,
276
+ "step": 5529
277
+ },
278
+ {
279
+ "epoch": 20.0,
280
+ "eval_accuracy": 0.8207364341085271,
281
+ "eval_loss": 3.3927395343780518,
282
+ "eval_model_preparation_time": 0.005,
283
+ "eval_runtime": 52.6007,
284
+ "eval_samples_per_second": 117.717,
285
+ "eval_steps_per_second": 14.715,
286
+ "step": 5820
287
+ },
288
+ {
289
+ "epoch": 20.61855670103093,
290
+ "grad_norm": 1.8037664890289307,
291
+ "learning_rate": 7.951807228915662e-07,
292
+ "loss": 3.3452,
293
+ "step": 6000
294
+ },
295
+ {
296
+ "epoch": 21.0,
297
+ "eval_accuracy": 0.8254198966408268,
298
+ "eval_loss": 3.3561041355133057,
299
+ "eval_model_preparation_time": 0.005,
300
+ "eval_runtime": 52.766,
301
+ "eval_samples_per_second": 117.348,
302
+ "eval_steps_per_second": 14.669,
303
+ "step": 6111
304
+ },
305
+ {
306
+ "epoch": 22.0,
307
+ "eval_accuracy": 0.8283268733850129,
308
+ "eval_loss": 3.3211123943328857,
309
+ "eval_model_preparation_time": 0.005,
310
+ "eval_runtime": 52.9233,
311
+ "eval_samples_per_second": 117.0,
312
+ "eval_steps_per_second": 14.625,
313
+ "step": 6402
314
+ },
315
+ {
316
+ "epoch": 22.33676975945017,
317
+ "grad_norm": 1.7638081312179565,
318
+ "learning_rate": 7.779690189328744e-07,
319
+ "loss": 3.2809,
320
+ "step": 6500
321
+ },
322
+ {
323
+ "epoch": 23.0,
324
+ "eval_accuracy": 0.8330103359173127,
325
+ "eval_loss": 3.2866406440734863,
326
+ "eval_model_preparation_time": 0.005,
327
+ "eval_runtime": 53.5163,
328
+ "eval_samples_per_second": 115.703,
329
+ "eval_steps_per_second": 14.463,
330
+ "step": 6693
331
+ },
332
+ {
333
+ "epoch": 24.0,
334
+ "eval_accuracy": 0.838501291989664,
335
+ "eval_loss": 3.2538506984710693,
336
+ "eval_model_preparation_time": 0.005,
337
+ "eval_runtime": 53.3194,
338
+ "eval_samples_per_second": 116.13,
339
+ "eval_steps_per_second": 14.516,
340
+ "step": 6984
341
+ },
342
+ {
343
+ "epoch": 24.054982817869416,
344
+ "grad_norm": 2.052138328552246,
345
+ "learning_rate": 7.607573149741824e-07,
346
+ "loss": 3.2174,
347
+ "step": 7000
348
+ },
349
+ {
350
+ "epoch": 25.0,
351
+ "eval_accuracy": 0.8410852713178295,
352
+ "eval_loss": 3.221360445022583,
353
+ "eval_model_preparation_time": 0.005,
354
+ "eval_runtime": 53.5019,
355
+ "eval_samples_per_second": 115.734,
356
+ "eval_steps_per_second": 14.467,
357
+ "step": 7275
358
+ },
359
+ {
360
+ "epoch": 25.77319587628866,
361
+ "grad_norm": 1.6119282245635986,
362
+ "learning_rate": 7.435456110154906e-07,
363
+ "loss": 3.1592,
364
+ "step": 7500
365
+ },
366
+ {
367
+ "epoch": 26.0,
368
+ "eval_accuracy": 0.8443152454780362,
369
+ "eval_loss": 3.189776659011841,
370
+ "eval_model_preparation_time": 0.005,
371
+ "eval_runtime": 53.7691,
372
+ "eval_samples_per_second": 115.159,
373
+ "eval_steps_per_second": 14.395,
374
+ "step": 7566
375
+ },
376
+ {
377
+ "epoch": 27.0,
378
+ "eval_accuracy": 0.8464147286821705,
379
+ "eval_loss": 3.1592888832092285,
380
+ "eval_model_preparation_time": 0.005,
381
+ "eval_runtime": 53.6596,
382
+ "eval_samples_per_second": 115.394,
383
+ "eval_steps_per_second": 14.424,
384
+ "step": 7857
385
+ },
386
+ {
387
+ "epoch": 27.491408934707902,
388
+ "grad_norm": 1.8645163774490356,
389
+ "learning_rate": 7.263339070567986e-07,
390
+ "loss": 3.1041,
391
+ "step": 8000
392
+ },
393
+ {
394
+ "epoch": 28.0,
395
+ "eval_accuracy": 0.8481912144702842,
396
+ "eval_loss": 3.1283702850341797,
397
+ "eval_model_preparation_time": 0.005,
398
+ "eval_runtime": 53.3822,
399
+ "eval_samples_per_second": 115.994,
400
+ "eval_steps_per_second": 14.499,
401
+ "step": 8148
402
+ },
403
+ {
404
+ "epoch": 29.0,
405
+ "eval_accuracy": 0.8510981912144703,
406
+ "eval_loss": 3.1002068519592285,
407
+ "eval_model_preparation_time": 0.005,
408
+ "eval_runtime": 53.0972,
409
+ "eval_samples_per_second": 116.616,
410
+ "eval_steps_per_second": 14.577,
411
+ "step": 8439
412
+ },
413
+ {
414
+ "epoch": 29.209621993127147,
415
+ "grad_norm": 2.0290517807006836,
416
+ "learning_rate": 7.091222030981066e-07,
417
+ "loss": 3.047,
418
+ "step": 8500
419
+ },
420
+ {
421
+ "epoch": 30.0,
422
+ "eval_accuracy": 0.853843669250646,
423
+ "eval_loss": 3.071702480316162,
424
+ "eval_model_preparation_time": 0.005,
425
+ "eval_runtime": 53.8219,
426
+ "eval_samples_per_second": 115.046,
427
+ "eval_steps_per_second": 14.381,
428
+ "step": 8730
429
+ },
430
+ {
431
+ "epoch": 30.927835051546392,
432
+ "grad_norm": 1.9484608173370361,
433
+ "learning_rate": 6.919104991394148e-07,
434
+ "loss": 2.9999,
435
+ "step": 9000
436
+ },
437
+ {
438
+ "epoch": 31.0,
439
+ "eval_accuracy": 0.8552971576227391,
440
+ "eval_loss": 3.044823408126831,
441
+ "eval_model_preparation_time": 0.005,
442
+ "eval_runtime": 54.0725,
443
+ "eval_samples_per_second": 114.513,
444
+ "eval_steps_per_second": 14.314,
445
+ "step": 9021
446
+ },
447
+ {
448
+ "epoch": 32.0,
449
+ "eval_accuracy": 0.8562661498708011,
450
+ "eval_loss": 3.0170109272003174,
451
+ "eval_model_preparation_time": 0.005,
452
+ "eval_runtime": 54.2696,
453
+ "eval_samples_per_second": 114.097,
454
+ "eval_steps_per_second": 14.262,
455
+ "step": 9312
456
+ },
457
+ {
458
+ "epoch": 32.64604810996563,
459
+ "grad_norm": 1.9190058708190918,
460
+ "learning_rate": 6.746987951807228e-07,
461
+ "loss": 2.951,
462
+ "step": 9500
463
+ },
464
+ {
465
+ "epoch": 33.0,
466
+ "eval_accuracy": 0.8580426356589147,
467
+ "eval_loss": 2.9910173416137695,
468
+ "eval_model_preparation_time": 0.005,
469
+ "eval_runtime": 53.2749,
470
+ "eval_samples_per_second": 116.227,
471
+ "eval_steps_per_second": 14.528,
472
+ "step": 9603
473
+ },
474
+ {
475
+ "epoch": 34.0,
476
+ "eval_accuracy": 0.8594961240310077,
477
+ "eval_loss": 2.965754747390747,
478
+ "eval_model_preparation_time": 0.005,
479
+ "eval_runtime": 53.3652,
480
+ "eval_samples_per_second": 116.031,
481
+ "eval_steps_per_second": 14.504,
482
+ "step": 9894
483
+ },
484
+ {
485
+ "epoch": 34.36426116838488,
486
+ "grad_norm": 2.104431390762329,
487
+ "learning_rate": 6.57487091222031e-07,
488
+ "loss": 2.8999,
489
+ "step": 10000
490
+ },
491
+ {
492
+ "epoch": 35.0,
493
+ "eval_accuracy": 0.8609496124031008,
494
+ "eval_loss": 2.9400594234466553,
495
+ "eval_model_preparation_time": 0.005,
496
+ "eval_runtime": 52.9247,
497
+ "eval_samples_per_second": 116.996,
498
+ "eval_steps_per_second": 14.625,
499
+ "step": 10185
500
+ },
501
+ {
502
+ "epoch": 36.0,
503
+ "eval_accuracy": 0.8627260981912145,
504
+ "eval_loss": 2.9151716232299805,
505
+ "eval_model_preparation_time": 0.005,
506
+ "eval_runtime": 52.9313,
507
+ "eval_samples_per_second": 116.982,
508
+ "eval_steps_per_second": 14.623,
509
+ "step": 10476
510
+ },
511
+ {
512
+ "epoch": 36.08247422680412,
513
+ "grad_norm": 1.8955918550491333,
514
+ "learning_rate": 6.402753872633391e-07,
515
+ "loss": 2.8589,
516
+ "step": 10500
517
+ },
518
+ {
519
+ "epoch": 37.0,
520
+ "eval_accuracy": 0.8643410852713178,
521
+ "eval_loss": 2.8920605182647705,
522
+ "eval_model_preparation_time": 0.005,
523
+ "eval_runtime": 53.1759,
524
+ "eval_samples_per_second": 116.444,
525
+ "eval_steps_per_second": 14.555,
526
+ "step": 10767
527
+ },
528
+ {
529
+ "epoch": 37.80068728522337,
530
+ "grad_norm": 2.0740840435028076,
531
+ "learning_rate": 6.230636833046471e-07,
532
+ "loss": 2.8145,
533
+ "step": 11000
534
+ },
535
+ {
536
+ "epoch": 38.0,
537
+ "eval_accuracy": 0.8646640826873385,
538
+ "eval_loss": 2.8690123558044434,
539
+ "eval_model_preparation_time": 0.005,
540
+ "eval_runtime": 52.8932,
541
+ "eval_samples_per_second": 117.066,
542
+ "eval_steps_per_second": 14.633,
543
+ "step": 11058
544
+ },
545
+ {
546
+ "epoch": 39.0,
547
+ "eval_accuracy": 0.8659560723514211,
548
+ "eval_loss": 2.845964193344116,
549
+ "eval_model_preparation_time": 0.005,
550
+ "eval_runtime": 52.8287,
551
+ "eval_samples_per_second": 117.209,
552
+ "eval_steps_per_second": 14.651,
553
+ "step": 11349
554
+ },
555
+ {
556
+ "epoch": 39.51890034364261,
557
+ "grad_norm": 1.9286776781082153,
558
+ "learning_rate": 6.058519793459552e-07,
559
+ "loss": 2.7734,
560
+ "step": 11500
561
+ },
562
+ {
563
+ "epoch": 40.0,
564
+ "eval_accuracy": 0.8674095607235142,
565
+ "eval_loss": 2.8238561153411865,
566
+ "eval_model_preparation_time": 0.005,
567
+ "eval_runtime": 52.9947,
568
+ "eval_samples_per_second": 116.842,
569
+ "eval_steps_per_second": 14.605,
570
+ "step": 11640
571
+ },
572
+ {
573
+ "epoch": 41.0,
574
+ "eval_accuracy": 0.8680555555555556,
575
+ "eval_loss": 2.8031866550445557,
576
+ "eval_model_preparation_time": 0.005,
577
+ "eval_runtime": 53.11,
578
+ "eval_samples_per_second": 116.588,
579
+ "eval_steps_per_second": 14.574,
580
+ "step": 11931
581
+ },
582
+ {
583
+ "epoch": 41.23711340206186,
584
+ "grad_norm": 1.892899751663208,
585
+ "learning_rate": 5.886402753872633e-07,
586
+ "loss": 2.7352,
587
+ "step": 12000
588
+ },
589
+ {
590
+ "epoch": 42.0,
591
+ "eval_accuracy": 0.8704780361757106,
592
+ "eval_loss": 2.782097816467285,
593
+ "eval_model_preparation_time": 0.005,
594
+ "eval_runtime": 53.2125,
595
+ "eval_samples_per_second": 116.364,
596
+ "eval_steps_per_second": 14.545,
597
+ "step": 12222
598
+ },
599
+ {
600
+ "epoch": 42.955326460481096,
601
+ "grad_norm": 1.8656749725341797,
602
+ "learning_rate": 5.714285714285714e-07,
603
+ "loss": 2.695,
604
+ "step": 12500
605
+ },
606
+ {
607
+ "epoch": 43.0,
608
+ "eval_accuracy": 0.8716085271317829,
609
+ "eval_loss": 2.7616491317749023,
610
+ "eval_model_preparation_time": 0.005,
611
+ "eval_runtime": 52.6755,
612
+ "eval_samples_per_second": 117.55,
613
+ "eval_steps_per_second": 14.694,
614
+ "step": 12513
615
+ },
616
+ {
617
+ "epoch": 44.0,
618
+ "eval_accuracy": 0.873062015503876,
619
+ "eval_loss": 2.7412848472595215,
620
+ "eval_model_preparation_time": 0.005,
621
+ "eval_runtime": 53.2571,
622
+ "eval_samples_per_second": 116.266,
623
+ "eval_steps_per_second": 14.533,
624
+ "step": 12804
625
+ },
626
+ {
627
+ "epoch": 44.67353951890034,
628
+ "grad_norm": 1.9270859956741333,
629
+ "learning_rate": 5.542168674698795e-07,
630
+ "loss": 2.6587,
631
+ "step": 13000
632
+ },
633
+ {
634
+ "epoch": 45.0,
635
+ "eval_accuracy": 0.8717700258397932,
636
+ "eval_loss": 2.721040725708008,
637
+ "eval_model_preparation_time": 0.005,
638
+ "eval_runtime": 52.9963,
639
+ "eval_samples_per_second": 116.838,
640
+ "eval_steps_per_second": 14.605,
641
+ "step": 13095
642
+ },
643
+ {
644
+ "epoch": 46.0,
645
+ "eval_accuracy": 0.8741925064599483,
646
+ "eval_loss": 2.702544689178467,
647
+ "eval_model_preparation_time": 0.005,
648
+ "eval_runtime": 53.1374,
649
+ "eval_samples_per_second": 116.528,
650
+ "eval_steps_per_second": 14.566,
651
+ "step": 13386
652
+ },
653
+ {
654
+ "epoch": 46.391752577319586,
655
+ "grad_norm": 2.173807382583618,
656
+ "learning_rate": 5.370051635111877e-07,
657
+ "loss": 2.6245,
658
+ "step": 13500
659
+ },
660
+ {
661
+ "epoch": 47.0,
662
+ "eval_accuracy": 0.8743540051679587,
663
+ "eval_loss": 2.6839137077331543,
664
+ "eval_model_preparation_time": 0.005,
665
+ "eval_runtime": 53.0364,
666
+ "eval_samples_per_second": 116.75,
667
+ "eval_steps_per_second": 14.594,
668
+ "step": 13677
669
+ },
670
+ {
671
+ "epoch": 48.0,
672
+ "eval_accuracy": 0.8761304909560723,
673
+ "eval_loss": 2.666257619857788,
674
+ "eval_model_preparation_time": 0.005,
675
+ "eval_runtime": 52.9704,
676
+ "eval_samples_per_second": 116.895,
677
+ "eval_steps_per_second": 14.612,
678
+ "step": 13968
679
+ },
680
+ {
681
+ "epoch": 48.10996563573883,
682
+ "grad_norm": 2.061178207397461,
683
+ "learning_rate": 5.197934595524956e-07,
684
+ "loss": 2.5929,
685
+ "step": 14000
686
+ },
687
+ {
688
+ "epoch": 49.0,
689
+ "eval_accuracy": 0.875968992248062,
690
+ "eval_loss": 2.648406505584717,
691
+ "eval_model_preparation_time": 0.005,
692
+ "eval_runtime": 52.8971,
693
+ "eval_samples_per_second": 117.057,
694
+ "eval_steps_per_second": 14.632,
695
+ "step": 14259
696
+ },
697
+ {
698
+ "epoch": 49.828178694158076,
699
+ "grad_norm": 2.2463059425354004,
700
+ "learning_rate": 5.025817555938038e-07,
701
+ "loss": 2.5577,
702
+ "step": 14500
703
+ },
704
+ {
705
+ "epoch": 50.0,
706
+ "eval_accuracy": 0.8780684754521964,
707
+ "eval_loss": 2.630814790725708,
708
+ "eval_model_preparation_time": 0.005,
709
+ "eval_runtime": 52.7435,
710
+ "eval_samples_per_second": 117.398,
711
+ "eval_steps_per_second": 14.675,
712
+ "step": 14550
713
+ },
714
+ {
715
+ "epoch": 51.0,
716
+ "eval_accuracy": 0.8793604651162791,
717
+ "eval_loss": 2.613746404647827,
718
+ "eval_model_preparation_time": 0.005,
719
+ "eval_runtime": 53.3318,
720
+ "eval_samples_per_second": 116.103,
721
+ "eval_steps_per_second": 14.513,
722
+ "step": 14841
723
+ },
724
+ {
725
+ "epoch": 51.54639175257732,
726
+ "grad_norm": 2.0589890480041504,
727
+ "learning_rate": 4.853700516351119e-07,
728
+ "loss": 2.5317,
729
+ "step": 15000
730
+ },
731
+ {
732
+ "epoch": 52.0,
733
+ "eval_accuracy": 0.8791989664082688,
734
+ "eval_loss": 2.5976715087890625,
735
+ "eval_model_preparation_time": 0.005,
736
+ "eval_runtime": 52.6721,
737
+ "eval_samples_per_second": 117.558,
738
+ "eval_steps_per_second": 14.695,
739
+ "step": 15132
740
+ },
741
+ {
742
+ "epoch": 53.0,
743
+ "eval_accuracy": 0.8804909560723514,
744
+ "eval_loss": 2.5813040733337402,
745
+ "eval_model_preparation_time": 0.005,
746
+ "eval_runtime": 53.2754,
747
+ "eval_samples_per_second": 116.226,
748
+ "eval_steps_per_second": 14.528,
749
+ "step": 15423
750
+ },
751
+ {
752
+ "epoch": 53.264604810996566,
753
+ "grad_norm": 2.004828929901123,
754
+ "learning_rate": 4.6815834767641994e-07,
755
+ "loss": 2.4971,
756
+ "step": 15500
757
+ },
758
+ {
759
+ "epoch": 54.0,
760
+ "eval_accuracy": 0.8808139534883721,
761
+ "eval_loss": 2.566027879714966,
762
+ "eval_model_preparation_time": 0.005,
763
+ "eval_runtime": 53.3637,
764
+ "eval_samples_per_second": 116.034,
765
+ "eval_steps_per_second": 14.504,
766
+ "step": 15714
767
+ },
768
+ {
769
+ "epoch": 54.982817869415804,
770
+ "grad_norm": 1.9317859411239624,
771
+ "learning_rate": 4.5094664371772807e-07,
772
+ "loss": 2.4695,
773
+ "step": 16000
774
+ },
775
+ {
776
+ "epoch": 55.0,
777
+ "eval_accuracy": 0.8812984496124031,
778
+ "eval_loss": 2.551055431365967,
779
+ "eval_model_preparation_time": 0.005,
780
+ "eval_runtime": 52.4399,
781
+ "eval_samples_per_second": 118.078,
782
+ "eval_steps_per_second": 14.76,
783
+ "step": 16005
784
+ },
785
+ {
786
+ "epoch": 56.0,
787
+ "eval_accuracy": 0.8806524547803618,
788
+ "eval_loss": 2.536409854888916,
789
+ "eval_model_preparation_time": 0.005,
790
+ "eval_runtime": 52.5325,
791
+ "eval_samples_per_second": 117.87,
792
+ "eval_steps_per_second": 14.734,
793
+ "step": 16296
794
+ },
795
+ {
796
+ "epoch": 56.70103092783505,
797
+ "grad_norm": 2.0537452697753906,
798
+ "learning_rate": 4.3373493975903615e-07,
799
+ "loss": 2.444,
800
+ "step": 16500
801
+ },
802
+ {
803
+ "epoch": 57.0,
804
+ "eval_accuracy": 0.8827519379844961,
805
+ "eval_loss": 2.521925449371338,
806
+ "eval_model_preparation_time": 0.005,
807
+ "eval_runtime": 52.5122,
808
+ "eval_samples_per_second": 117.915,
809
+ "eval_steps_per_second": 14.739,
810
+ "step": 16587
811
+ },
812
+ {
813
+ "epoch": 58.0,
814
+ "eval_accuracy": 0.8824289405684754,
815
+ "eval_loss": 2.508209228515625,
816
+ "eval_model_preparation_time": 0.005,
817
+ "eval_runtime": 52.9146,
818
+ "eval_samples_per_second": 117.019,
819
+ "eval_steps_per_second": 14.627,
820
+ "step": 16878
821
+ },
822
+ {
823
+ "epoch": 58.419243986254294,
824
+ "grad_norm": 2.472633123397827,
825
+ "learning_rate": 4.165232358003442e-07,
826
+ "loss": 2.4143,
827
+ "step": 17000
828
+ },
829
+ {
830
+ "epoch": 59.0,
831
+ "eval_accuracy": 0.8845284237726099,
832
+ "eval_loss": 2.4946951866149902,
833
+ "eval_model_preparation_time": 0.005,
834
+ "eval_runtime": 53.263,
835
+ "eval_samples_per_second": 116.253,
836
+ "eval_steps_per_second": 14.532,
837
+ "step": 17169
838
+ },
839
+ {
840
+ "epoch": 60.0,
841
+ "eval_accuracy": 0.8837209302325582,
842
+ "eval_loss": 2.4811995029449463,
843
+ "eval_model_preparation_time": 0.005,
844
+ "eval_runtime": 53.3521,
845
+ "eval_samples_per_second": 116.059,
846
+ "eval_steps_per_second": 14.507,
847
+ "step": 17460
848
+ },
849
+ {
850
+ "epoch": 60.13745704467354,
851
+ "grad_norm": 2.3744590282440186,
852
+ "learning_rate": 3.9931153184165226e-07,
853
+ "loss": 2.3935,
854
+ "step": 17500
855
+ },
856
+ {
857
+ "epoch": 61.0,
858
+ "eval_accuracy": 0.8838824289405685,
859
+ "eval_loss": 2.468451499938965,
860
+ "eval_model_preparation_time": 0.005,
861
+ "eval_runtime": 53.0799,
862
+ "eval_samples_per_second": 116.654,
863
+ "eval_steps_per_second": 14.582,
864
+ "step": 17751
865
+ },
866
+ {
867
+ "epoch": 61.855670103092784,
868
+ "grad_norm": 2.0248947143554688,
869
+ "learning_rate": 3.820998278829604e-07,
870
+ "loss": 2.3717,
871
+ "step": 18000
872
+ },
873
+ {
874
+ "epoch": 62.0,
875
+ "eval_accuracy": 0.8858204134366925,
876
+ "eval_loss": 2.456092119216919,
877
+ "eval_model_preparation_time": 0.005,
878
+ "eval_runtime": 52.7271,
879
+ "eval_samples_per_second": 117.435,
880
+ "eval_steps_per_second": 14.679,
881
+ "step": 18042
882
+ },
883
+ {
884
+ "epoch": 63.0,
885
+ "eval_accuracy": 0.8866279069767442,
886
+ "eval_loss": 2.444223403930664,
887
+ "eval_model_preparation_time": 0.005,
888
+ "eval_runtime": 53.2783,
889
+ "eval_samples_per_second": 116.22,
890
+ "eval_steps_per_second": 14.528,
891
+ "step": 18333
892
+ },
893
+ {
894
+ "epoch": 63.57388316151203,
895
+ "grad_norm": 2.6887121200561523,
896
+ "learning_rate": 3.648881239242685e-07,
897
+ "loss": 2.3475,
898
+ "step": 18500
899
+ },
900
+ {
901
+ "epoch": 64.0,
902
+ "eval_accuracy": 0.8875968992248062,
903
+ "eval_loss": 2.432462215423584,
904
+ "eval_model_preparation_time": 0.005,
905
+ "eval_runtime": 53.2565,
906
+ "eval_samples_per_second": 116.267,
907
+ "eval_steps_per_second": 14.533,
908
+ "step": 18624
909
+ },
910
+ {
911
+ "epoch": 65.0,
912
+ "eval_accuracy": 0.8875968992248062,
913
+ "eval_loss": 2.42094087600708,
914
+ "eval_model_preparation_time": 0.005,
915
+ "eval_runtime": 53.453,
916
+ "eval_samples_per_second": 115.84,
917
+ "eval_steps_per_second": 14.48,
918
+ "step": 18915
919
+ },
920
+ {
921
+ "epoch": 65.29209621993127,
922
+ "grad_norm": 1.8884248733520508,
923
+ "learning_rate": 3.4767641996557657e-07,
924
+ "loss": 2.3262,
925
+ "step": 19000
926
+ },
927
+ {
928
+ "epoch": 66.0,
929
+ "eval_accuracy": 0.8884043927648578,
930
+ "eval_loss": 2.410001754760742,
931
+ "eval_model_preparation_time": 0.005,
932
+ "eval_runtime": 53.9318,
933
+ "eval_samples_per_second": 114.812,
934
+ "eval_steps_per_second": 14.351,
935
+ "step": 19206
936
+ },
937
+ {
938
+ "epoch": 67.0,
939
+ "eval_accuracy": 0.8887273901808785,
940
+ "eval_loss": 2.3990447521209717,
941
+ "eval_model_preparation_time": 0.005,
942
+ "eval_runtime": 53.9445,
943
+ "eval_samples_per_second": 114.785,
944
+ "eval_steps_per_second": 14.348,
945
+ "step": 19497
946
+ },
947
+ {
948
+ "epoch": 67.01030927835052,
949
+ "grad_norm": 2.0446999073028564,
950
+ "learning_rate": 3.304647160068847e-07,
951
+ "loss": 2.3043,
952
+ "step": 19500
953
+ },
954
+ {
955
+ "epoch": 68.0,
956
+ "eval_accuracy": 0.8885658914728682,
957
+ "eval_loss": 2.3888654708862305,
958
+ "eval_model_preparation_time": 0.005,
959
+ "eval_runtime": 53.8449,
960
+ "eval_samples_per_second": 114.997,
961
+ "eval_steps_per_second": 14.375,
962
+ "step": 19788
963
+ },
964
+ {
965
+ "epoch": 68.72852233676976,
966
+ "grad_norm": 2.0572509765625,
967
+ "learning_rate": 3.132530120481928e-07,
968
+ "loss": 2.2841,
969
+ "step": 20000
970
+ },
971
+ {
972
+ "epoch": 69.0,
973
+ "eval_accuracy": 0.8906653746770026,
974
+ "eval_loss": 2.3787026405334473,
975
+ "eval_model_preparation_time": 0.005,
976
+ "eval_runtime": 53.7045,
977
+ "eval_samples_per_second": 115.298,
978
+ "eval_steps_per_second": 14.412,
979
+ "step": 20079
980
+ },
981
+ {
982
+ "epoch": 70.0,
983
+ "eval_accuracy": 0.8903423772609819,
984
+ "eval_loss": 2.369292736053467,
985
+ "eval_model_preparation_time": 0.005,
986
+ "eval_runtime": 53.8822,
987
+ "eval_samples_per_second": 114.917,
988
+ "eval_steps_per_second": 14.365,
989
+ "step": 20370
990
+ },
991
+ {
992
+ "epoch": 70.44673539518901,
993
+ "grad_norm": 1.8821637630462646,
994
+ "learning_rate": 2.9604130808950087e-07,
995
+ "loss": 2.2718,
996
+ "step": 20500
997
+ },
998
+ {
999
+ "epoch": 71.0,
1000
+ "eval_accuracy": 0.8906653746770026,
1001
+ "eval_loss": 2.3600070476531982,
1002
+ "eval_model_preparation_time": 0.005,
1003
+ "eval_runtime": 53.9421,
1004
+ "eval_samples_per_second": 114.79,
1005
+ "eval_steps_per_second": 14.349,
1006
+ "step": 20661
1007
+ },
1008
+ {
1009
+ "epoch": 72.0,
1010
+ "eval_accuracy": 0.8919573643410853,
1011
+ "eval_loss": 2.350867509841919,
1012
+ "eval_model_preparation_time": 0.005,
1013
+ "eval_runtime": 54.6506,
1014
+ "eval_samples_per_second": 113.302,
1015
+ "eval_steps_per_second": 14.163,
1016
+ "step": 20952
1017
+ },
1018
+ {
1019
+ "epoch": 72.16494845360825,
1020
+ "grad_norm": 2.260333299636841,
1021
+ "learning_rate": 2.788296041308089e-07,
1022
+ "loss": 2.2487,
1023
+ "step": 21000
1024
+ },
1025
+ {
1026
+ "epoch": 73.0,
1027
+ "eval_accuracy": 0.8924418604651163,
1028
+ "eval_loss": 2.3422977924346924,
1029
+ "eval_model_preparation_time": 0.005,
1030
+ "eval_runtime": 53.8113,
1031
+ "eval_samples_per_second": 115.069,
1032
+ "eval_steps_per_second": 14.384,
1033
+ "step": 21243
1034
+ },
1035
+ {
1036
+ "epoch": 73.88316151202748,
1037
+ "grad_norm": 2.037961006164551,
1038
+ "learning_rate": 2.6161790017211703e-07,
1039
+ "loss": 2.2375,
1040
+ "step": 21500
1041
+ },
1042
+ {
1043
+ "epoch": 74.0,
1044
+ "eval_accuracy": 0.8916343669250646,
1045
+ "eval_loss": 2.3344151973724365,
1046
+ "eval_model_preparation_time": 0.005,
1047
+ "eval_runtime": 53.589,
1048
+ "eval_samples_per_second": 115.546,
1049
+ "eval_steps_per_second": 14.443,
1050
+ "step": 21534
1051
+ },
1052
+ {
1053
+ "epoch": 75.0,
1054
+ "eval_accuracy": 0.8930878552971576,
1055
+ "eval_loss": 2.326270580291748,
1056
+ "eval_model_preparation_time": 0.005,
1057
+ "eval_runtime": 54.0166,
1058
+ "eval_samples_per_second": 114.631,
1059
+ "eval_steps_per_second": 14.329,
1060
+ "step": 21825
1061
+ },
1062
+ {
1063
+ "epoch": 75.60137457044674,
1064
+ "grad_norm": 2.1131231784820557,
1065
+ "learning_rate": 2.444061962134251e-07,
1066
+ "loss": 2.2226,
1067
+ "step": 22000
1068
+ },
1069
+ {
1070
+ "epoch": 76.0,
1071
+ "eval_accuracy": 0.8922803617571059,
1072
+ "eval_loss": 2.318610668182373,
1073
+ "eval_model_preparation_time": 0.005,
1074
+ "eval_runtime": 53.7404,
1075
+ "eval_samples_per_second": 115.221,
1076
+ "eval_steps_per_second": 14.403,
1077
+ "step": 22116
1078
+ },
1079
+ {
1080
+ "epoch": 77.0,
1081
+ "eval_accuracy": 0.8932493540051679,
1082
+ "eval_loss": 2.3115651607513428,
1083
+ "eval_model_preparation_time": 0.005,
1084
+ "eval_runtime": 53.7352,
1085
+ "eval_samples_per_second": 115.232,
1086
+ "eval_steps_per_second": 14.404,
1087
+ "step": 22407
1088
+ },
1089
+ {
1090
+ "epoch": 77.31958762886597,
1091
+ "grad_norm": 2.1695611476898193,
1092
+ "learning_rate": 2.2719449225473322e-07,
1093
+ "loss": 2.206,
1094
+ "step": 22500
1095
+ },
1096
+ {
1097
+ "epoch": 78.0,
1098
+ "eval_accuracy": 0.8935723514211886,
1099
+ "eval_loss": 2.3046042919158936,
1100
+ "eval_model_preparation_time": 0.005,
1101
+ "eval_runtime": 54.1529,
1102
+ "eval_samples_per_second": 114.343,
1103
+ "eval_steps_per_second": 14.293,
1104
+ "step": 22698
1105
+ },
1106
+ {
1107
+ "epoch": 79.0,
1108
+ "eval_accuracy": 0.8943798449612403,
1109
+ "eval_loss": 2.297482490539551,
1110
+ "eval_model_preparation_time": 0.005,
1111
+ "eval_runtime": 52.2319,
1112
+ "eval_samples_per_second": 118.548,
1113
+ "eval_steps_per_second": 14.819,
1114
+ "step": 22989
1115
+ },
1116
+ {
1117
+ "epoch": 79.03780068728523,
1118
+ "grad_norm": 2.3586909770965576,
1119
+ "learning_rate": 2.099827882960413e-07,
1120
+ "loss": 2.1947,
1121
+ "step": 23000
1122
+ },
1123
+ {
1124
+ "epoch": 80.0,
1125
+ "eval_accuracy": 0.8938953488372093,
1126
+ "eval_loss": 2.291203260421753,
1127
+ "eval_model_preparation_time": 0.005,
1128
+ "eval_runtime": 52.1733,
1129
+ "eval_samples_per_second": 118.681,
1130
+ "eval_steps_per_second": 14.835,
1131
+ "step": 23280
1132
+ },
1133
+ {
1134
+ "epoch": 80.75601374570446,
1135
+ "grad_norm": 2.4226436614990234,
1136
+ "learning_rate": 1.9277108433734939e-07,
1137
+ "loss": 2.1804,
1138
+ "step": 23500
1139
+ },
1140
+ {
1141
+ "epoch": 81.0,
1142
+ "eval_accuracy": 0.8937338501291989,
1143
+ "eval_loss": 2.285174608230591,
1144
+ "eval_model_preparation_time": 0.005,
1145
+ "eval_runtime": 52.1705,
1146
+ "eval_samples_per_second": 118.688,
1147
+ "eval_steps_per_second": 14.836,
1148
+ "step": 23571
1149
+ },
1150
+ {
1151
+ "epoch": 82.0,
1152
+ "eval_accuracy": 0.8940568475452196,
1153
+ "eval_loss": 2.2793898582458496,
1154
+ "eval_model_preparation_time": 0.005,
1155
+ "eval_runtime": 53.532,
1156
+ "eval_samples_per_second": 115.669,
1157
+ "eval_steps_per_second": 14.459,
1158
+ "step": 23862
1159
+ },
1160
+ {
1161
+ "epoch": 82.47422680412372,
1162
+ "grad_norm": 2.2000808715820312,
1163
+ "learning_rate": 1.7555938037865747e-07,
1164
+ "loss": 2.1725,
1165
+ "step": 24000
1166
+ },
1167
+ {
1168
+ "epoch": 83.0,
1169
+ "eval_accuracy": 0.895187338501292,
1170
+ "eval_loss": 2.2743537425994873,
1171
+ "eval_model_preparation_time": 0.005,
1172
+ "eval_runtime": 52.7704,
1173
+ "eval_samples_per_second": 117.338,
1174
+ "eval_steps_per_second": 14.667,
1175
+ "step": 24153
1176
+ },
1177
+ {
1178
+ "epoch": 84.0,
1179
+ "eval_accuracy": 0.8953488372093024,
1180
+ "eval_loss": 2.2692511081695557,
1181
+ "eval_model_preparation_time": 0.005,
1182
+ "eval_runtime": 52.6929,
1183
+ "eval_samples_per_second": 117.511,
1184
+ "eval_steps_per_second": 14.689,
1185
+ "step": 24444
1186
+ },
1187
+ {
1188
+ "epoch": 84.19243986254295,
1189
+ "grad_norm": 1.9906361103057861,
1190
+ "learning_rate": 1.5834767641996558e-07,
1191
+ "loss": 2.165,
1192
+ "step": 24500
1193
+ },
1194
+ {
1195
+ "epoch": 85.0,
1196
+ "eval_accuracy": 0.895187338501292,
1197
+ "eval_loss": 2.264594078063965,
1198
+ "eval_model_preparation_time": 0.005,
1199
+ "eval_runtime": 53.0382,
1200
+ "eval_samples_per_second": 116.746,
1201
+ "eval_steps_per_second": 14.593,
1202
+ "step": 24735
1203
+ },
1204
+ {
1205
+ "epoch": 85.91065292096219,
1206
+ "grad_norm": 2.219193935394287,
1207
+ "learning_rate": 1.4113597246127366e-07,
1208
+ "loss": 2.1539,
1209
+ "step": 25000
1210
+ },
1211
+ {
1212
+ "epoch": 86.0,
1213
+ "eval_accuracy": 0.8955103359173127,
1214
+ "eval_loss": 2.2601823806762695,
1215
+ "eval_model_preparation_time": 0.005,
1216
+ "eval_runtime": 53.4023,
1217
+ "eval_samples_per_second": 115.95,
1218
+ "eval_steps_per_second": 14.494,
1219
+ "step": 25026
1220
+ },
1221
+ {
1222
+ "epoch": 87.0,
1223
+ "eval_accuracy": 0.8955103359173127,
1224
+ "eval_loss": 2.2560276985168457,
1225
+ "eval_model_preparation_time": 0.005,
1226
+ "eval_runtime": 52.4937,
1227
+ "eval_samples_per_second": 117.957,
1228
+ "eval_steps_per_second": 14.745,
1229
+ "step": 25317
1230
+ },
1231
+ {
1232
+ "epoch": 87.62886597938144,
1233
+ "grad_norm": 1.8535250425338745,
1234
+ "learning_rate": 1.2392426850258174e-07,
1235
+ "loss": 2.1479,
1236
+ "step": 25500
1237
+ },
1238
+ {
1239
+ "epoch": 88.0,
1240
+ "eval_accuracy": 0.895671834625323,
1241
+ "eval_loss": 2.2524540424346924,
1242
+ "eval_model_preparation_time": 0.005,
1243
+ "eval_runtime": 52.1601,
1244
+ "eval_samples_per_second": 118.712,
1245
+ "eval_steps_per_second": 14.839,
1246
+ "step": 25608
1247
+ },
1248
+ {
1249
+ "epoch": 89.0,
1250
+ "eval_accuracy": 0.8964793281653747,
1251
+ "eval_loss": 2.2489233016967773,
1252
+ "eval_model_preparation_time": 0.005,
1253
+ "eval_runtime": 52.5825,
1254
+ "eval_samples_per_second": 117.758,
1255
+ "eval_steps_per_second": 14.72,
1256
+ "step": 25899
1257
+ },
1258
+ {
1259
+ "epoch": 89.34707903780068,
1260
+ "grad_norm": 1.8873833417892456,
1261
+ "learning_rate": 1.0671256454388984e-07,
1262
+ "loss": 2.1392,
1263
+ "step": 26000
1264
+ },
1265
+ {
1266
+ "epoch": 90.0,
1267
+ "eval_accuracy": 0.896640826873385,
1268
+ "eval_loss": 2.2456018924713135,
1269
+ "eval_model_preparation_time": 0.005,
1270
+ "eval_runtime": 52.5499,
1271
+ "eval_samples_per_second": 117.831,
1272
+ "eval_steps_per_second": 14.729,
1273
+ "step": 26190
1274
+ },
1275
+ {
1276
+ "epoch": 91.0,
1277
+ "eval_accuracy": 0.896640826873385,
1278
+ "eval_loss": 2.2428722381591797,
1279
+ "eval_model_preparation_time": 0.005,
1280
+ "eval_runtime": 52.35,
1281
+ "eval_samples_per_second": 118.281,
1282
+ "eval_steps_per_second": 14.785,
1283
+ "step": 26481
1284
+ },
1285
+ {
1286
+ "epoch": 91.06529209621993,
1287
+ "grad_norm": 1.9566396474838257,
1288
+ "learning_rate": 8.950086058519793e-08,
1289
+ "loss": 2.1339,
1290
+ "step": 26500
1291
+ },
1292
+ {
1293
+ "epoch": 92.0,
1294
+ "eval_accuracy": 0.8968023255813954,
1295
+ "eval_loss": 2.240267515182495,
1296
+ "eval_model_preparation_time": 0.005,
1297
+ "eval_runtime": 53.7343,
1298
+ "eval_samples_per_second": 115.234,
1299
+ "eval_steps_per_second": 14.404,
1300
+ "step": 26772
1301
+ },
1302
+ {
1303
+ "epoch": 92.78350515463917,
1304
+ "grad_norm": 2.347038745880127,
1305
+ "learning_rate": 7.228915662650602e-08,
1306
+ "loss": 2.1307,
1307
+ "step": 27000
1308
+ },
1309
+ {
1310
+ "epoch": 93.0,
1311
+ "eval_accuracy": 0.8964793281653747,
1312
+ "eval_loss": 2.2379775047302246,
1313
+ "eval_model_preparation_time": 0.005,
1314
+ "eval_runtime": 52.7112,
1315
+ "eval_samples_per_second": 117.47,
1316
+ "eval_steps_per_second": 14.684,
1317
+ "step": 27063
1318
+ },
1319
+ {
1320
+ "epoch": 94.0,
1321
+ "eval_accuracy": 0.8964793281653747,
1322
+ "eval_loss": 2.2358651161193848,
1323
+ "eval_model_preparation_time": 0.005,
1324
+ "eval_runtime": 52.7355,
1325
+ "eval_samples_per_second": 117.416,
1326
+ "eval_steps_per_second": 14.677,
1327
+ "step": 27354
1328
+ },
1329
+ {
1330
+ "epoch": 94.50171821305842,
1331
+ "grad_norm": 2.1997811794281006,
1332
+ "learning_rate": 5.507745266781411e-08,
1333
+ "loss": 2.1234,
1334
+ "step": 27500
1335
+ },
1336
+ {
1337
+ "epoch": 95.0,
1338
+ "eval_accuracy": 0.8963178294573644,
1339
+ "eval_loss": 2.2341957092285156,
1340
+ "eval_model_preparation_time": 0.005,
1341
+ "eval_runtime": 52.5471,
1342
+ "eval_samples_per_second": 117.837,
1343
+ "eval_steps_per_second": 14.73,
1344
+ "step": 27645
1345
+ },
1346
+ {
1347
+ "epoch": 96.0,
1348
+ "eval_accuracy": 0.8964793281653747,
1349
+ "eval_loss": 2.2328357696533203,
1350
+ "eval_model_preparation_time": 0.005,
1351
+ "eval_runtime": 53.6532,
1352
+ "eval_samples_per_second": 115.408,
1353
+ "eval_steps_per_second": 14.426,
1354
+ "step": 27936
1355
+ },
1356
+ {
1357
+ "epoch": 96.21993127147766,
1358
+ "grad_norm": 1.939498782157898,
1359
+ "learning_rate": 3.78657487091222e-08,
1360
+ "loss": 2.1212,
1361
+ "step": 28000
1362
+ },
1363
+ {
1364
+ "epoch": 97.0,
1365
+ "eval_accuracy": 0.8964793281653747,
1366
+ "eval_loss": 2.2317166328430176,
1367
+ "eval_model_preparation_time": 0.005,
1368
+ "eval_runtime": 52.8569,
1369
+ "eval_samples_per_second": 117.146,
1370
+ "eval_steps_per_second": 14.643,
1371
+ "step": 28227
1372
+ },
1373
+ {
1374
+ "epoch": 97.9381443298969,
1375
+ "grad_norm": 2.966059684753418,
1376
+ "learning_rate": 2.0654044750430294e-08,
1377
+ "loss": 2.1197,
1378
+ "step": 28500
1379
+ },
1380
+ {
1381
+ "epoch": 98.0,
1382
+ "eval_accuracy": 0.8963178294573644,
1383
+ "eval_loss": 2.231013059616089,
1384
+ "eval_model_preparation_time": 0.005,
1385
+ "eval_runtime": 52.9339,
1386
+ "eval_samples_per_second": 116.976,
1387
+ "eval_steps_per_second": 14.622,
1388
+ "step": 28518
1389
+ },
1390
+ {
1391
+ "epoch": 99.0,
1392
+ "eval_accuracy": 0.8964793281653747,
1393
+ "eval_loss": 2.2305853366851807,
1394
+ "eval_model_preparation_time": 0.005,
1395
+ "eval_runtime": 53.021,
1396
+ "eval_samples_per_second": 116.784,
1397
+ "eval_steps_per_second": 14.598,
1398
+ "step": 28809
1399
+ },
1400
+ {
1401
+ "epoch": 99.65635738831615,
1402
+ "grad_norm": 2.3651483058929443,
1403
+ "learning_rate": 3.442340791738382e-09,
1404
+ "loss": 2.1161,
1405
+ "step": 29000
1406
+ },
1407
+ {
1408
+ "epoch": 100.0,
1409
+ "eval_accuracy": 0.8963178294573644,
1410
+ "eval_loss": 2.2304611206054688,
1411
+ "eval_model_preparation_time": 0.005,
1412
+ "eval_runtime": 52.8568,
1413
+ "eval_samples_per_second": 117.147,
1414
+ "eval_steps_per_second": 14.643,
1415
+ "step": 29100
1416
+ }
1417
+ ],
1418
+ "logging_steps": 500,
1419
+ "max_steps": 29100,
1420
+ "num_input_tokens_seen": 0,
1421
+ "num_train_epochs": 100,
1422
+ "save_steps": 500,
1423
+ "stateful_callbacks": {
1424
+ "TrainerControl": {
1425
+ "args": {
1426
+ "should_epoch_stop": false,
1427
+ "should_evaluate": false,
1428
+ "should_log": false,
1429
+ "should_save": true,
1430
+ "should_training_stop": true
1431
+ },
1432
+ "attributes": {}
1433
+ }
1434
+ },
1435
+ "total_flos": 7.200356285135443e+19,
1436
+ "train_batch_size": 32,
1437
+ "trial_name": null,
1438
+ "trial_params": null
1439
+ }
checkpoint-29100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375fecfe3250f0d7d4d281c005412dc987d92419d67a86bffe4969cb248991cd
3
+ size 4731
config.json CHANGED
@@ -9,152 +9,152 @@
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "Bulldog",
13
- "1": "Bearded Collie",
14
- "2": "Boxer",
15
- "3": "Shiba Inu",
16
- "4": "African Wild Dog",
17
- "5": "Mex Hairless",
18
- "6": "Cocker",
19
- "7": "Basenji",
20
- "8": "Lhasa",
21
- "9": "Blenheim",
22
- "10": "Rottweiler",
23
- "11": "Elk Hound",
24
- "12": "Vizsla",
25
- "13": "Siberian Husky",
26
- "14": "Malinois",
27
- "15": "Pekinese",
28
- "16": "Saint Bernard",
29
- "17": "Border Collie",
30
- "18": "Bull Terrier",
31
- "19": "Beagle",
32
- "20": "Airedale",
33
- "21": "Great Perenees",
34
- "22": "Shar_Pei",
35
- "23": "American Spaniel",
36
- "24": "Corgi",
37
  "25": "Chow",
38
- "26": "Afghan",
39
- "27": "Newfoundland",
40
- "28": "German Sheperd",
41
- "29": "Poodle",
42
- "30": "Pug",
43
- "31": "Dingo",
44
- "32": "Bermaise",
45
- "33": "Pit Bull",
46
- "34": "Cairn",
47
- "35": "Labrador",
48
- "36": "Schnauzer",
49
- "37": "Chinese Crested",
50
- "38": "Coyote",
51
- "39": "Groenendael",
52
- "40": "Basset",
53
- "41": "Maltese",
54
- "42": "Dhole",
55
- "43": "Bichon Frise",
56
- "44": "Doberman",
57
- "45": "Scotch Terrier",
58
  "46": "Japanese Spaniel",
59
- "47": "Borzoi",
60
  "48": "Labradoodle",
61
- "49": "Boston Terrier",
62
- "50": "Chihuahua",
63
- "51": "Bull Mastiff",
64
- "52": "American Hairless",
65
- "53": "Great Dane",
66
- "54": "Pomeranian",
67
- "55": "Shih-Tzu",
68
- "56": "Bluetick",
69
- "57": "Greyhound",
70
- "58": "Cockapoo",
71
- "59": "Golden Retriever",
72
- "60": "Irish Wolfhound",
73
- "61": "American Spaniel",
74
- "62": "Clumber",
75
- "63": "Dalmation",
76
- "64": "Bloodhound",
77
- "65": "Komondor",
78
- "66": "Rhodesian",
79
- "67": "French Bulldog",
80
- "68": "Collie",
81
- "69": "Irish Spaniel",
82
  "70": "Yorkie"
83
  },
84
  "image_size": 224,
85
  "initializer_range": 0.02,
86
  "intermediate_size": 3072,
87
  "label2id": {
88
- "Afghan": 26,
89
- "African Wild Dog": 4,
90
- "Airedale": 20,
91
- "American Spaniel": 23,
92
- "American Hairless": 52,
93
- "American Spaniel": 61,
94
- "Basenji": 7,
95
- "Basset": 40,
96
- "Beagle": 19,
97
- "Bearded Collie": 1,
98
- "Bermaise": 32,
99
- "Bichon Frise": 43,
100
- "Blenheim": 9,
101
- "Bloodhound": 64,
102
- "Bluetick": 56,
103
- "Border Collie": 17,
104
- "Borzoi": 47,
105
- "Boston Terrier": 49,
106
- "Boxer": 2,
107
- "Bull Mastiff": 51,
108
- "Bull Terrier": 18,
109
- "Bulldog": 0,
110
- "Cairn": 34,
111
- "Chihuahua": 50,
112
- "Chinese Crested": 37,
113
  "Chow": 25,
114
- "Clumber": 62,
115
- "Cockapoo": 58,
116
- "Cocker": 6,
117
- "Collie": 68,
118
- "Corgi": 24,
119
- "Coyote": 38,
120
- "Dalmation": 63,
121
- "Dhole": 42,
122
- "Dingo": 31,
123
- "Doberman": 44,
124
- "Elk Hound": 11,
125
- "French Bulldog": 67,
126
- "German Sheperd": 28,
127
- "Golden Retriever": 59,
128
- "Great Dane": 53,
129
- "Great Perenees": 21,
130
- "Greyhound": 57,
131
- "Groenendael": 39,
132
- "Irish Spaniel": 69,
133
- "Irish Wolfhound": 60,
134
  "Japanese Spaniel": 46,
135
- "Komondor": 65,
136
  "Labradoodle": 48,
137
- "Labrador": 35,
138
- "Lhasa": 8,
139
- "Malinois": 14,
140
- "Maltese": 41,
141
- "Mex Hairless": 5,
142
- "Newfoundland": 27,
143
- "Pekinese": 15,
144
- "Pit Bull": 33,
145
- "Pomeranian": 54,
146
- "Poodle": 29,
147
- "Pug": 30,
148
- "Rhodesian": 66,
149
- "Rottweiler": 10,
150
- "Saint Bernard": 16,
151
- "Schnauzer": 36,
152
- "Scotch Terrier": 45,
153
- "Shar_Pei": 22,
154
- "Shiba Inu": 3,
155
- "Shih-Tzu": 55,
156
- "Siberian Husky": 13,
157
- "Vizsla": 12,
158
  "Yorkie": 70
159
  },
160
  "layer_norm_eps": 1e-12,
@@ -166,5 +166,5 @@
166
  "problem_type": "single_label_classification",
167
  "qkv_bias": true,
168
  "torch_dtype": "float32",
169
- "transformers_version": "4.33.0"
170
  }
 
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "Afghan",
13
+ "1": "African Wild Dog",
14
+ "2": "Airedale",
15
+ "3": "American Spaniel",
16
+ "4": "American Hairless",
17
+ "5": "American Spaniel",
18
+ "6": "Basenji",
19
+ "7": "Basset",
20
+ "8": "Beagle",
21
+ "9": "Bearded Collie",
22
+ "10": "Bermaise",
23
+ "11": "Bichon Frise",
24
+ "12": "Blenheim",
25
+ "13": "Bloodhound",
26
+ "14": "Bluetick",
27
+ "15": "Border Collie",
28
+ "16": "Borzoi",
29
+ "17": "Boston Terrier",
30
+ "18": "Boxer",
31
+ "19": "Bull Mastiff",
32
+ "20": "Bull Terrier",
33
+ "21": "Bulldog",
34
+ "22": "Cairn",
35
+ "23": "Chihuahua",
36
+ "24": "Chinese Crested",
37
  "25": "Chow",
38
+ "26": "Clumber",
39
+ "27": "Cockapoo",
40
+ "28": "Cocker",
41
+ "29": "Collie",
42
+ "30": "Corgi",
43
+ "31": "Coyote",
44
+ "32": "Dalmation",
45
+ "33": "Dhole",
46
+ "34": "Dingo",
47
+ "35": "Doberman",
48
+ "36": "Elk Hound",
49
+ "37": "French Bulldog",
50
+ "38": "German Sheperd",
51
+ "39": "Golden Retriever",
52
+ "40": "Great Dane",
53
+ "41": "Great Perenees",
54
+ "42": "Greyhound",
55
+ "43": "Groenendael",
56
+ "44": "Irish Spaniel",
57
+ "45": "Irish Wolfhound",
58
  "46": "Japanese Spaniel",
59
+ "47": "Komondor",
60
  "48": "Labradoodle",
61
+ "49": "Labrador",
62
+ "50": "Lhasa",
63
+ "51": "Malinois",
64
+ "52": "Maltese",
65
+ "53": "Mex Hairless",
66
+ "54": "Newfoundland",
67
+ "55": "Pekinese",
68
+ "56": "Pit Bull",
69
+ "57": "Pomeranian",
70
+ "58": "Poodle",
71
+ "59": "Pug",
72
+ "60": "Rhodesian",
73
+ "61": "Rottweiler",
74
+ "62": "Saint Bernard",
75
+ "63": "Schnauzer",
76
+ "64": "Scotch Terrier",
77
+ "65": "Shar_Pei",
78
+ "66": "Shiba Inu",
79
+ "67": "Shih-Tzu",
80
+ "68": "Siberian Husky",
81
+ "69": "Vizsla",
82
  "70": "Yorkie"
83
  },
84
  "image_size": 224,
85
  "initializer_range": 0.02,
86
  "intermediate_size": 3072,
87
  "label2id": {
88
+ "Afghan": 0,
89
+ "African Wild Dog": 1,
90
+ "Airedale": 2,
91
+ "American Spaniel": 3,
92
+ "American Hairless": 4,
93
+ "American Spaniel": 5,
94
+ "Basenji": 6,
95
+ "Basset": 7,
96
+ "Beagle": 8,
97
+ "Bearded Collie": 9,
98
+ "Bermaise": 10,
99
+ "Bichon Frise": 11,
100
+ "Blenheim": 12,
101
+ "Bloodhound": 13,
102
+ "Bluetick": 14,
103
+ "Border Collie": 15,
104
+ "Borzoi": 16,
105
+ "Boston Terrier": 17,
106
+ "Boxer": 18,
107
+ "Bull Mastiff": 19,
108
+ "Bull Terrier": 20,
109
+ "Bulldog": 21,
110
+ "Cairn": 22,
111
+ "Chihuahua": 23,
112
+ "Chinese Crested": 24,
113
  "Chow": 25,
114
+ "Clumber": 26,
115
+ "Cockapoo": 27,
116
+ "Cocker": 28,
117
+ "Collie": 29,
118
+ "Corgi": 30,
119
+ "Coyote": 31,
120
+ "Dalmation": 32,
121
+ "Dhole": 33,
122
+ "Dingo": 34,
123
+ "Doberman": 35,
124
+ "Elk Hound": 36,
125
+ "French Bulldog": 37,
126
+ "German Sheperd": 38,
127
+ "Golden Retriever": 39,
128
+ "Great Dane": 40,
129
+ "Great Perenees": 41,
130
+ "Greyhound": 42,
131
+ "Groenendael": 43,
132
+ "Irish Spaniel": 44,
133
+ "Irish Wolfhound": 45,
134
  "Japanese Spaniel": 46,
135
+ "Komondor": 47,
136
  "Labradoodle": 48,
137
+ "Labrador": 49,
138
+ "Lhasa": 50,
139
+ "Malinois": 51,
140
+ "Maltese": 52,
141
+ "Mex Hairless": 53,
142
+ "Newfoundland": 54,
143
+ "Pekinese": 55,
144
+ "Pit Bull": 56,
145
+ "Pomeranian": 57,
146
+ "Poodle": 58,
147
+ "Pug": 59,
148
+ "Rhodesian": 60,
149
+ "Rottweiler": 61,
150
+ "Saint Bernard": 62,
151
+ "Schnauzer": 63,
152
+ "Scotch Terrier": 64,
153
+ "Shar_Pei": 65,
154
+ "Shiba Inu": 66,
155
+ "Shih-Tzu": 67,
156
+ "Siberian Husky": 68,
157
+ "Vizsla": 69,
158
  "Yorkie": 70
159
  },
160
  "layer_norm_eps": 1e-12,
 
166
  "problem_type": "single_label_classification",
167
  "qkv_bias": true,
168
  "torch_dtype": "float32",
169
+ "transformers_version": "4.45.2"
170
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acaedcf7aee61d3db66f5b0b4ebfe10778154ddbbf9634d8974199c7782b52e0
3
  size 343436228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda49ac12533271a8bbe1efb216edbf2fe4d89614936f48267431eb3a0ba3b2d
3
  size 343436228
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d6c6be3416b12aa281c729a3039482650312fc14cf09341ddd80018b101bc3a
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375fecfe3250f0d7d4d281c005412dc987d92419d67a86bffe4969cb248991cd
3
+ size 4731