Woleek commited on
Commit
6ca43d4
·
1 Parent(s): 9a0a920

Test passed

Browse files
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9382716049382716,
4
- "eval_loss": 0.16537487506866455,
5
- "eval_runtime": 7.4881,
6
- "eval_samples_per_second": 75.72,
7
- "eval_steps_per_second": 9.482,
8
- "test_accuracy": 0.7333333333333333,
9
- "test_loss": 0.6995685696601868,
10
- "test_runtime": 0.645,
11
- "test_samples_per_second": 46.51,
12
- "test_steps_per_second": 6.201,
13
- "train_loss": 0.11905734094947872,
14
- "train_runtime": 363.8199,
15
- "train_samples_per_second": 55.412,
16
- "train_steps_per_second": 5.552
17
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9915611814345991,
4
+ "eval_loss": 0.023517148569226265,
5
+ "eval_runtime": 14.6863,
6
+ "eval_samples_per_second": 80.687,
7
+ "eval_steps_per_second": 10.145,
8
+ "test_accuracy": 0.9333333333333333,
9
+ "test_loss": 0.32354873418807983,
10
+ "test_runtime": 0.5696,
11
+ "test_samples_per_second": 52.669,
12
+ "test_steps_per_second": 7.022,
13
+ "train_loss": 0.004172653943921321,
14
+ "train_runtime": 720.5262,
15
+ "train_samples_per_second": 35.079,
16
+ "train_steps_per_second": 3.511
17
  }
config.json CHANGED
@@ -1,50 +1,34 @@
1
  {
2
- "_name_or_path": "microsoft/resnet-50",
3
  "architectures": [
4
- "ResNetForImageClassification"
5
- ],
6
- "depths": [
7
- 3,
8
- 4,
9
- 6,
10
- 3
11
- ],
12
- "downsample_in_first_stage": false,
13
- "embedding_size": 64,
14
- "hidden_act": "relu",
15
- "hidden_sizes": [
16
- 256,
17
- 512,
18
- 1024,
19
- 2048
20
  ],
 
 
 
 
 
21
  "id2label": {
22
  "0": "cctv",
23
  "1": "phone",
24
  "2": "topdown"
25
  },
 
 
 
26
  "label2id": {
27
  "cctv": "0",
28
  "phone": "1",
29
  "topdown": "2"
30
  },
31
- "layer_type": "bottleneck",
32
- "model_type": "resnet",
 
33
  "num_channels": 3,
34
- "out_features": [
35
- "stage4"
36
- ],
37
- "out_indices": [
38
- 4
39
- ],
40
  "problem_type": "single_label_classification",
41
- "stage_names": [
42
- "stem",
43
- "stage1",
44
- "stage2",
45
- "stage3",
46
- "stage4"
47
- ],
48
  "torch_dtype": "float32",
49
  "transformers_version": "4.31.0"
50
  }
 
1
  {
2
+ "_name_or_path": "google/vit-base-patch16-224",
3
  "architectures": [
4
+ "ViTForImageClassification"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
  "id2label": {
12
  "0": "cctv",
13
  "1": "phone",
14
  "2": "topdown"
15
  },
16
+ "image_size": 224,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
  "label2id": {
20
  "cctv": "0",
21
  "phone": "1",
22
  "topdown": "2"
23
  },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 12,
27
  "num_channels": 3,
28
+ "num_hidden_layers": 12,
29
+ "patch_size": 16,
 
 
 
 
30
  "problem_type": "single_label_classification",
31
+ "qkv_bias": true,
 
 
 
 
 
 
32
  "torch_dtype": "float32",
33
  "transformers_version": "4.31.0"
34
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9382716049382716,
4
- "eval_loss": 0.16537487506866455,
5
- "eval_runtime": 7.4881,
6
- "eval_samples_per_second": 75.72,
7
- "eval_steps_per_second": 9.482
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9915611814345991,
4
+ "eval_loss": 0.023517148569226265,
5
+ "eval_runtime": 14.6863,
6
+ "eval_samples_per_second": 80.687,
7
+ "eval_steps_per_second": 10.145
8
  }
preprocessor_config.json CHANGED
@@ -1,23 +1,22 @@
1
  {
2
- "crop_pct": 0.875,
3
  "do_normalize": true,
4
  "do_rescale": true,
5
  "do_resize": true,
6
- "feature_extractor_type": "ConvNextFeatureExtractor",
7
  "image_mean": [
8
- 0.485,
9
- 0.456,
10
- 0.406
11
  ],
12
- "image_processor_type": "ConvNextFeatureExtractor",
13
  "image_std": [
14
- 0.229,
15
- 0.224,
16
- 0.225
17
  ],
18
- "resample": 3,
19
  "rescale_factor": 0.00392156862745098,
20
  "size": {
21
- "shortest_edge": 224
 
22
  }
23
  }
 
1
  {
 
2
  "do_normalize": true,
3
  "do_rescale": true,
4
  "do_resize": true,
 
5
  "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
  ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
  "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
  ],
16
+ "resample": 2,
17
  "rescale_factor": 0.00392156862745098,
18
  "size": {
19
+ "height": 224,
20
+ "width": 224
21
  }
22
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:195f177d2ea1e6d0901fc59cc04a229e62d799533ac9c374eb6c7e6629c50c2d
3
- size 94383181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9e93a397eb093ec4fe14e8f7d6fb52de2afc10b7ad8b9ef877e9aa5d2a458a
3
+ size 343271789
runs/Aug31_13-25-03_surveily-ag-02/events.out.tfevents.1693481104.surveily-ag-02.6684.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8150829bfd690e36c5e4280aed25f63f65c18dcec6b040ef5939f7fc6853df
3
+ size 11453
runs/Aug31_13-25-03_surveily-ag-02/events.out.tfevents.1693481674.surveily-ag-02.6684.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2ee4cd89b083af9df09cc39f4788f721476b567541854e5b4eb9d6d9479790
3
+ size 411
runs/Aug31_13-43-24_surveily-ag-02/events.out.tfevents.1693482204.surveily-ag-02.6684.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db5aba99827c972325207483473787c02fcf1433e9e9bce2ac2dbae8cd68d0a
3
+ size 11610
runs/Aug31_13-43-24_surveily-ag-02/events.out.tfevents.1693482893.surveily-ag-02.6684.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60a1b0454aa07e5a92c413ebc60514fa0bed3631b9017380fc60326928122fe4
3
+ size 411
runs/Aug31_13-55-36_surveily-ag-02/events.out.tfevents.1693482936.surveily-ag-02.6684.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1f532bb293059f29bf35a92c097c9eb47302ea9222cd71ab60cfef1b10a8815
3
+ size 12247
runs/Aug31_13-55-36_surveily-ag-02/events.out.tfevents.1693483672.surveily-ag-02.6684.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5c396d8de2a5d761ce4147709ecbdcf736b21baf22c82cc36df526257a33ae
3
+ size 411
test_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "test_accuracy": 0.7333333333333333,
3
- "test_loss": 0.6995685696601868,
4
- "test_runtime": 0.645,
5
- "test_samples_per_second": 46.51,
6
- "test_steps_per_second": 6.201
7
  }
 
1
  {
2
+ "test_accuracy": 0.9333333333333333,
3
+ "test_loss": 0.32354873418807983,
4
+ "test_runtime": 0.5696,
5
+ "test_samples_per_second": 52.669,
6
+ "test_steps_per_second": 7.022
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.11905734094947872,
4
- "train_runtime": 363.8199,
5
- "train_samples_per_second": 55.412,
6
- "train_steps_per_second": 5.552
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.004172653943921321,
4
+ "train_runtime": 720.5262,
5
+ "train_samples_per_second": 35.079,
6
+ "train_steps_per_second": 3.511
7
  }
trainer_state.json CHANGED
@@ -1,235 +1,283 @@
1
  {
2
- "best_metric": 0.16537487506866455,
3
- "best_model_checkpoint": "./models/camera-type/checkpoint-400",
4
  "epoch": 5.0,
5
- "global_step": 2020,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.25,
12
- "learning_rate": 9.504950495049505e-05,
13
- "loss": 0.895,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 0.5,
18
- "learning_rate": 9.014851485148515e-05,
19
- "loss": 0.4597,
20
  "step": 200
21
  },
22
  {
23
- "epoch": 0.5,
24
- "eval_accuracy": 0.9241622574955908,
25
- "eval_loss": 0.2800751328468323,
26
- "eval_runtime": 7.8295,
27
- "eval_samples_per_second": 72.419,
28
- "eval_steps_per_second": 9.068,
29
  "step": 200
30
  },
31
  {
32
- "epoch": 0.74,
33
- "learning_rate": 8.51980198019802e-05,
34
- "loss": 0.2158,
35
  "step": 300
36
  },
37
  {
38
- "epoch": 0.99,
39
- "learning_rate": 8.02970297029703e-05,
40
- "loss": 0.1375,
41
  "step": 400
42
  },
43
  {
44
- "epoch": 0.99,
45
- "eval_accuracy": 0.9382716049382716,
46
- "eval_loss": 0.16537487506866455,
47
- "eval_runtime": 8.6108,
48
- "eval_samples_per_second": 65.848,
49
- "eval_steps_per_second": 8.245,
50
  "step": 400
51
  },
52
  {
53
- "epoch": 1.24,
54
- "learning_rate": 7.534653465346535e-05,
55
- "loss": 0.097,
56
  "step": 500
57
  },
58
  {
59
- "epoch": 1.49,
60
- "learning_rate": 7.039603960396039e-05,
61
- "loss": 0.0795,
62
  "step": 600
63
  },
64
  {
65
- "epoch": 1.49,
66
- "eval_accuracy": 0.9382716049382716,
67
- "eval_loss": 0.1904107928276062,
68
- "eval_runtime": 7.6784,
69
- "eval_samples_per_second": 73.843,
70
- "eval_steps_per_second": 9.247,
71
  "step": 600
72
  },
73
  {
74
- "epoch": 1.73,
75
- "learning_rate": 6.544554455445545e-05,
76
- "loss": 0.0756,
77
  "step": 700
78
  },
79
  {
80
- "epoch": 1.98,
81
- "learning_rate": 6.0495049504950496e-05,
82
- "loss": 0.0686,
83
  "step": 800
84
  },
85
  {
86
- "epoch": 1.98,
87
- "eval_accuracy": 0.9453262786596119,
88
- "eval_loss": 0.18103937804698944,
89
- "eval_runtime": 8.0556,
90
- "eval_samples_per_second": 70.386,
91
- "eval_steps_per_second": 8.814,
92
  "step": 800
93
  },
94
  {
95
- "epoch": 2.23,
96
- "learning_rate": 5.5544554455445554e-05,
97
- "loss": 0.0369,
98
  "step": 900
99
  },
100
  {
101
- "epoch": 2.48,
102
- "learning_rate": 5.05940594059406e-05,
103
- "loss": 0.026,
104
  "step": 1000
105
  },
106
  {
107
- "epoch": 2.48,
108
- "eval_accuracy": 0.9400352733686067,
109
- "eval_loss": 0.22161062061786652,
110
- "eval_runtime": 7.6989,
111
- "eval_samples_per_second": 73.647,
112
- "eval_steps_per_second": 9.222,
113
  "step": 1000
114
  },
115
  {
116
- "epoch": 2.72,
117
- "learning_rate": 4.5643564356435645e-05,
118
- "loss": 0.0266,
119
  "step": 1100
120
  },
121
  {
122
- "epoch": 2.97,
123
- "learning_rate": 4.069306930693069e-05,
124
- "loss": 0.0495,
125
  "step": 1200
126
  },
127
  {
128
- "epoch": 2.97,
129
- "eval_accuracy": 0.9453262786596119,
130
- "eval_loss": 0.20959477126598358,
131
- "eval_runtime": 7.4625,
132
- "eval_samples_per_second": 75.98,
133
- "eval_steps_per_second": 9.514,
134
  "step": 1200
135
  },
136
  {
137
- "epoch": 3.22,
138
- "learning_rate": 3.579207920792079e-05,
139
- "loss": 0.0354,
140
  "step": 1300
141
  },
142
  {
143
- "epoch": 3.47,
144
- "learning_rate": 3.0841584158415845e-05,
145
- "loss": 0.0487,
146
  "step": 1400
147
  },
148
  {
149
- "epoch": 3.47,
150
- "eval_accuracy": 0.9435626102292769,
151
- "eval_loss": 0.21740128099918365,
152
- "eval_runtime": 7.5927,
153
- "eval_samples_per_second": 74.677,
154
- "eval_steps_per_second": 9.351,
155
  "step": 1400
156
  },
157
  {
158
- "epoch": 3.71,
159
- "learning_rate": 2.5891089108910893e-05,
160
- "loss": 0.0186,
161
  "step": 1500
162
  },
163
  {
164
- "epoch": 3.96,
165
- "learning_rate": 2.0940594059405942e-05,
166
- "loss": 0.0268,
167
  "step": 1600
168
  },
169
  {
170
- "epoch": 3.96,
171
- "eval_accuracy": 0.9453262786596119,
172
- "eval_loss": 0.23042456805706024,
173
- "eval_runtime": 7.4693,
174
- "eval_samples_per_second": 75.911,
175
- "eval_steps_per_second": 9.506,
176
  "step": 1600
177
  },
178
  {
179
- "epoch": 4.21,
180
- "learning_rate": 1.599009900990099e-05,
181
- "loss": 0.031,
182
  "step": 1700
183
  },
184
  {
185
- "epoch": 4.46,
186
- "learning_rate": 1.103960396039604e-05,
187
- "loss": 0.0254,
188
  "step": 1800
189
  },
190
  {
191
- "epoch": 4.46,
192
- "eval_accuracy": 0.9400352733686067,
193
- "eval_loss": 0.25736740231513977,
194
- "eval_runtime": 7.6456,
195
- "eval_samples_per_second": 74.16,
196
- "eval_steps_per_second": 9.286,
197
  "step": 1800
198
  },
199
  {
200
- "epoch": 4.7,
201
- "learning_rate": 6.089108910891089e-06,
202
- "loss": 0.0303,
203
  "step": 1900
204
  },
205
  {
206
- "epoch": 4.95,
207
- "learning_rate": 1.1386138613861386e-06,
208
- "loss": 0.0186,
209
  "step": 2000
210
  },
211
  {
212
- "epoch": 4.95,
213
- "eval_accuracy": 0.9382716049382716,
214
- "eval_loss": 0.3211604356765747,
215
- "eval_runtime": 7.4741,
216
- "eval_samples_per_second": 75.862,
217
- "eval_steps_per_second": 9.499,
218
  "step": 2000
219
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  {
221
  "epoch": 5.0,
222
- "step": 2020,
223
- "total_flos": 4.281430410244915e+17,
224
- "train_loss": 0.11905734094947872,
225
- "train_runtime": 363.8199,
226
- "train_samples_per_second": 55.412,
227
- "train_steps_per_second": 5.552
228
  }
229
  ],
230
- "max_steps": 2020,
231
  "num_train_epochs": 5,
232
- "total_flos": 4.281430410244915e+17,
233
  "trial_name": null,
234
  "trial_params": null
235
  }
 
1
  {
2
+ "best_metric": 0.023517148569226265,
3
+ "best_model_checkpoint": "./models/camera-type/checkpoint-200",
4
  "epoch": 5.0,
5
+ "global_step": 2530,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.2,
12
+ "learning_rate": 9.604743083003953e-05,
13
+ "loss": 0.0703,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.4,
18
+ "learning_rate": 9.213438735177866e-05,
19
+ "loss": 0.0064,
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 0.4,
24
+ "eval_accuracy": 0.9915611814345991,
25
+ "eval_loss": 0.023517148569226265,
26
+ "eval_runtime": 13.6256,
27
+ "eval_samples_per_second": 86.968,
28
+ "eval_steps_per_second": 10.935,
29
  "step": 200
30
  },
31
  {
32
+ "epoch": 0.59,
33
+ "learning_rate": 8.818181818181818e-05,
34
+ "loss": 0.015,
35
  "step": 300
36
  },
37
  {
38
+ "epoch": 0.79,
39
+ "learning_rate": 8.426877470355731e-05,
40
+ "loss": 0.0034,
41
  "step": 400
42
  },
43
  {
44
+ "epoch": 0.79,
45
+ "eval_accuracy": 0.9940928270042194,
46
+ "eval_loss": 0.03918765112757683,
47
+ "eval_runtime": 26.2367,
48
+ "eval_samples_per_second": 45.166,
49
+ "eval_steps_per_second": 5.679,
50
  "step": 400
51
  },
52
  {
53
+ "epoch": 0.99,
54
+ "learning_rate": 8.031620553359685e-05,
55
+ "loss": 0.0036,
56
  "step": 500
57
  },
58
  {
59
+ "epoch": 1.19,
60
+ "learning_rate": 7.636363636363637e-05,
61
+ "loss": 0.0066,
62
  "step": 600
63
  },
64
  {
65
+ "epoch": 1.19,
66
+ "eval_accuracy": 0.9839662447257383,
67
+ "eval_loss": 0.10105644911527634,
68
+ "eval_runtime": 13.5269,
69
+ "eval_samples_per_second": 87.603,
70
+ "eval_steps_per_second": 11.015,
71
  "step": 600
72
  },
73
  {
74
+ "epoch": 1.38,
75
+ "learning_rate": 7.24110671936759e-05,
76
+ "loss": 0.0001,
77
  "step": 700
78
  },
79
  {
80
+ "epoch": 1.58,
81
+ "learning_rate": 6.845849802371542e-05,
82
+ "loss": 0.0,
83
  "step": 800
84
  },
85
  {
86
+ "epoch": 1.58,
87
+ "eval_accuracy": 0.9839662447257383,
88
+ "eval_loss": 0.12266893684864044,
89
+ "eval_runtime": 14.2674,
90
+ "eval_samples_per_second": 83.057,
91
+ "eval_steps_per_second": 10.443,
92
  "step": 800
93
  },
94
  {
95
+ "epoch": 1.78,
96
+ "learning_rate": 6.450592885375494e-05,
97
+ "loss": 0.0,
98
  "step": 900
99
  },
100
  {
101
+ "epoch": 1.98,
102
+ "learning_rate": 6.055335968379446e-05,
103
+ "loss": 0.0,
104
  "step": 1000
105
  },
106
  {
107
+ "epoch": 1.98,
108
+ "eval_accuracy": 0.9839662447257383,
109
+ "eval_loss": 0.12320054322481155,
110
+ "eval_runtime": 13.4323,
111
+ "eval_samples_per_second": 88.22,
112
+ "eval_steps_per_second": 11.093,
113
  "step": 1000
114
  },
115
  {
116
+ "epoch": 2.17,
117
+ "learning_rate": 5.660079051383399e-05,
118
+ "loss": 0.0,
119
  "step": 1100
120
  },
121
  {
122
+ "epoch": 2.37,
123
+ "learning_rate": 5.2648221343873516e-05,
124
+ "loss": 0.0,
125
  "step": 1200
126
  },
127
  {
128
+ "epoch": 2.37,
129
+ "eval_accuracy": 0.9839662447257383,
130
+ "eval_loss": 0.143290176987648,
131
+ "eval_runtime": 14.5978,
132
+ "eval_samples_per_second": 81.177,
133
+ "eval_steps_per_second": 10.207,
134
  "step": 1200
135
  },
136
  {
137
+ "epoch": 2.57,
138
+ "learning_rate": 4.8695652173913046e-05,
139
+ "loss": 0.0,
140
  "step": 1300
141
  },
142
  {
143
+ "epoch": 2.77,
144
+ "learning_rate": 4.4743083003952576e-05,
145
+ "loss": 0.0,
146
  "step": 1400
147
  },
148
  {
149
+ "epoch": 2.77,
150
+ "eval_accuracy": 0.9839662447257383,
151
+ "eval_loss": 0.14161579310894012,
152
+ "eval_runtime": 13.9736,
153
+ "eval_samples_per_second": 84.803,
154
+ "eval_steps_per_second": 10.663,
155
  "step": 1400
156
  },
157
  {
158
+ "epoch": 2.96,
159
+ "learning_rate": 4.079051383399209e-05,
160
+ "loss": 0.0,
161
  "step": 1500
162
  },
163
  {
164
+ "epoch": 3.16,
165
+ "learning_rate": 3.6837944664031623e-05,
166
+ "loss": 0.0,
167
  "step": 1600
168
  },
169
  {
170
+ "epoch": 3.16,
171
+ "eval_accuracy": 0.9839662447257383,
172
+ "eval_loss": 0.14079466462135315,
173
+ "eval_runtime": 13.3148,
174
+ "eval_samples_per_second": 88.999,
175
+ "eval_steps_per_second": 11.191,
176
  "step": 1600
177
  },
178
  {
179
+ "epoch": 3.36,
180
+ "learning_rate": 3.288537549407115e-05,
181
+ "loss": 0.0,
182
  "step": 1700
183
  },
184
  {
185
+ "epoch": 3.56,
186
+ "learning_rate": 2.8932806324110674e-05,
187
+ "loss": 0.0,
188
  "step": 1800
189
  },
190
  {
191
+ "epoch": 3.56,
192
+ "eval_accuracy": 0.9839662447257383,
193
+ "eval_loss": 0.140061616897583,
194
+ "eval_runtime": 13.4092,
195
+ "eval_samples_per_second": 88.372,
196
+ "eval_steps_per_second": 11.112,
197
  "step": 1800
198
  },
199
  {
200
+ "epoch": 3.75,
201
+ "learning_rate": 2.4980237154150198e-05,
202
+ "loss": 0.0,
203
  "step": 1900
204
  },
205
  {
206
+ "epoch": 3.95,
207
+ "learning_rate": 2.1027667984189725e-05,
208
+ "loss": 0.0,
209
  "step": 2000
210
  },
211
  {
212
+ "epoch": 3.95,
213
+ "eval_accuracy": 0.9839662447257383,
214
+ "eval_loss": 0.13938628137111664,
215
+ "eval_runtime": 13.5631,
216
+ "eval_samples_per_second": 87.369,
217
+ "eval_steps_per_second": 10.986,
218
  "step": 2000
219
  },
220
+ {
221
+ "epoch": 4.15,
222
+ "learning_rate": 1.707509881422925e-05,
223
+ "loss": 0.0,
224
+ "step": 2100
225
+ },
226
+ {
227
+ "epoch": 4.35,
228
+ "learning_rate": 1.3122529644268775e-05,
229
+ "loss": 0.0,
230
+ "step": 2200
231
+ },
232
+ {
233
+ "epoch": 4.35,
234
+ "eval_accuracy": 0.9839662447257383,
235
+ "eval_loss": 0.13903304934501648,
236
+ "eval_runtime": 14.2016,
237
+ "eval_samples_per_second": 83.441,
238
+ "eval_steps_per_second": 10.492,
239
+ "step": 2200
240
+ },
241
+ {
242
+ "epoch": 4.55,
243
+ "learning_rate": 9.1699604743083e-06,
244
+ "loss": 0.0,
245
+ "step": 2300
246
+ },
247
+ {
248
+ "epoch": 4.74,
249
+ "learning_rate": 5.217391304347826e-06,
250
+ "loss": 0.0,
251
+ "step": 2400
252
+ },
253
+ {
254
+ "epoch": 4.74,
255
+ "eval_accuracy": 0.9839662447257383,
256
+ "eval_loss": 0.13887113332748413,
257
+ "eval_runtime": 13.5995,
258
+ "eval_samples_per_second": 87.136,
259
+ "eval_steps_per_second": 10.956,
260
+ "step": 2400
261
+ },
262
+ {
263
+ "epoch": 4.94,
264
+ "learning_rate": 1.2648221343873517e-06,
265
+ "loss": 0.0,
266
+ "step": 2500
267
+ },
268
  {
269
  "epoch": 5.0,
270
+ "step": 2530,
271
+ "total_flos": 1.9586275919110656e+18,
272
+ "train_loss": 0.004172653943921321,
273
+ "train_runtime": 720.5262,
274
+ "train_samples_per_second": 35.079,
275
+ "train_steps_per_second": 3.511
276
  }
277
  ],
278
+ "max_steps": 2530,
279
  "num_train_epochs": 5,
280
+ "total_flos": 1.9586275919110656e+18,
281
  "trial_name": null,
282
  "trial_params": null
283
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaeda358597efe759e938c6fa8910566dc86c2f58aef60d15672b1d0c4b75915
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a891659bbef29a096768cfc757449f649d7adc620258759a34f11798e1676ca
3
  size 4027