to-be commited on
Commit
9e4662a
1 Parent(s): 2e76231

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - object-detection
6
+ - vision
7
+ base_model: hustvl/yolos-tiny
8
+ widget:
9
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
10
+ example_title: Tiger
11
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
12
+ example_title: Teapot
13
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
14
+ example_title: Palace
15
+ ---
16
+
17
+ # Model Trained Using AutoTrain
18
+
19
+ - Problem type: Object Detection
20
+
21
+ ## Validation Metrics
22
+ loss: 1.4424972534179688
23
+
24
+ map: 0.0152
25
+
26
+ map_50: 0.053
27
+
28
+ map_75: 0.005
29
+
30
+ map_small: -1.0
31
+
32
+ map_medium: 0.018
33
+
34
+ map_large: 0.0094
35
+
36
+ mar_1: 0.0473
37
+
38
+ mar_10: 0.1992
39
+
40
+ mar_100: 0.3797
41
+
42
+ mar_small: -1.0
43
+
44
+ mar_medium: 0.3686
45
+
46
+ mar_large: 0.4261
checkpoint-624/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hustvl/yolos-tiny",
3
+ "architectures": [
4
+ "YolosForObjectDetection"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auxiliary_loss": false,
8
+ "bbox_cost": 5,
9
+ "bbox_loss_coefficient": 5,
10
+ "class_cost": 1,
11
+ "eos_coefficient": 0.1,
12
+ "giou_cost": 2,
13
+ "giou_loss_coefficient": 2,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0.0,
16
+ "hidden_size": 192,
17
+ "id2label": {
18
+ "0": 0
19
+ },
20
+ "image_size": [
21
+ 800,
22
+ 1333
23
+ ],
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 768,
26
+ "label2id": {
27
+ "0": 0
28
+ },
29
+ "layer_norm_eps": 1e-12,
30
+ "model_type": "yolos",
31
+ "num_attention_heads": 3,
32
+ "num_channels": 3,
33
+ "num_detection_tokens": 100,
34
+ "num_hidden_layers": 12,
35
+ "patch_size": 16,
36
+ "qkv_bias": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.41.0",
39
+ "use_mid_position_embeddings": false
40
+ }
checkpoint-624/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728be27ebde033c9db82f536d83740166daf3ba9e0139639469ccc1618c81263
3
+ size 25909400
checkpoint-624/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6823d0684c194eecc7a7500a48dccc2c30e25bcf5fe7801c55a3439e86300b14
3
+ size 51946362
checkpoint-624/preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "annotations",
5
+ "return_segmentation_masks",
6
+ "masks_path",
7
+ "do_resize",
8
+ "size",
9
+ "resample",
10
+ "do_rescale",
11
+ "rescale_factor",
12
+ "do_normalize",
13
+ "image_mean",
14
+ "image_std",
15
+ "do_convert_annotations",
16
+ "do_pad",
17
+ "format",
18
+ "return_tensors",
19
+ "data_format",
20
+ "input_data_format"
21
+ ],
22
+ "do_convert_annotations": true,
23
+ "do_normalize": true,
24
+ "do_pad": false,
25
+ "do_rescale": true,
26
+ "do_resize": false,
27
+ "format": "coco_detection",
28
+ "image_mean": [
29
+ 0.485,
30
+ 0.456,
31
+ 0.406
32
+ ],
33
+ "image_processor_type": "YolosImageProcessor",
34
+ "image_std": [
35
+ 0.229,
36
+ 0.224,
37
+ 0.225
38
+ ],
39
+ "resample": 2,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "size": {
42
+ "longest_edge": 600
43
+ }
44
+ }
checkpoint-624/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ab05c41f8acd6becf31ed2ae9fec5c6805fe9f47e5a914504e55b01f8df497
3
+ size 14244
checkpoint-624/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92105533540354ff07de82d6db4122319ea163c82fa2ee9a3080419efcdcbc1
3
+ size 1064
checkpoint-624/trainer_state.json ADDED
@@ -0,0 +1,1674 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.4424972534179688,
3
+ "best_model_checkpoint": "autotrain-signatures-yolos-tiny-v4/checkpoint-624",
4
+ "epoch": 8.0,
5
+ "eval_steps": 500,
6
+ "global_step": 624,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.038461538461538464,
13
+ "grad_norm": NaN,
14
+ "learning_rate": 0.0,
15
+ "loss": 3.3568,
16
+ "step": 3
17
+ },
18
+ {
19
+ "epoch": 0.07692307692307693,
20
+ "grad_norm": Infinity,
21
+ "learning_rate": 4.2735042735042736e-08,
22
+ "loss": 3.3023,
23
+ "step": 6
24
+ },
25
+ {
26
+ "epoch": 0.11538461538461539,
27
+ "grad_norm": 273.1413269042969,
28
+ "learning_rate": 1.7094017094017095e-07,
29
+ "loss": 3.5245,
30
+ "step": 9
31
+ },
32
+ {
33
+ "epoch": 0.15384615384615385,
34
+ "grad_norm": 269.0606384277344,
35
+ "learning_rate": 2.991452991452992e-07,
36
+ "loss": 3.334,
37
+ "step": 12
38
+ },
39
+ {
40
+ "epoch": 0.19230769230769232,
41
+ "grad_norm": 314.0342712402344,
42
+ "learning_rate": 4.273504273504274e-07,
43
+ "loss": 3.3816,
44
+ "step": 15
45
+ },
46
+ {
47
+ "epoch": 0.23076923076923078,
48
+ "grad_norm": 100.73604583740234,
49
+ "learning_rate": 5.555555555555556e-07,
50
+ "loss": 3.5385,
51
+ "step": 18
52
+ },
53
+ {
54
+ "epoch": 0.2692307692307692,
55
+ "grad_norm": 59.5837516784668,
56
+ "learning_rate": 6.837606837606838e-07,
57
+ "loss": 3.5975,
58
+ "step": 21
59
+ },
60
+ {
61
+ "epoch": 0.3076923076923077,
62
+ "grad_norm": 92.41542053222656,
63
+ "learning_rate": 8.11965811965812e-07,
64
+ "loss": 3.4075,
65
+ "step": 24
66
+ },
67
+ {
68
+ "epoch": 0.34615384615384615,
69
+ "grad_norm": 148.47996520996094,
70
+ "learning_rate": 9.401709401709402e-07,
71
+ "loss": 3.5601,
72
+ "step": 27
73
+ },
74
+ {
75
+ "epoch": 0.38461538461538464,
76
+ "grad_norm": 54.41558837890625,
77
+ "learning_rate": 1.0683760683760685e-06,
78
+ "loss": 3.502,
79
+ "step": 30
80
+ },
81
+ {
82
+ "epoch": 0.4230769230769231,
83
+ "grad_norm": 58.81692123413086,
84
+ "learning_rate": 1.1965811965811968e-06,
85
+ "loss": 3.6398,
86
+ "step": 33
87
+ },
88
+ {
89
+ "epoch": 0.46153846153846156,
90
+ "grad_norm": 59.197391510009766,
91
+ "learning_rate": 1.3247863247863248e-06,
92
+ "loss": 3.6787,
93
+ "step": 36
94
+ },
95
+ {
96
+ "epoch": 0.5,
97
+ "grad_norm": 237.29342651367188,
98
+ "learning_rate": 1.4529914529914531e-06,
99
+ "loss": 3.3408,
100
+ "step": 39
101
+ },
102
+ {
103
+ "epoch": 0.5384615384615384,
104
+ "grad_norm": 66.22134399414062,
105
+ "learning_rate": 1.5811965811965813e-06,
106
+ "loss": 3.6161,
107
+ "step": 42
108
+ },
109
+ {
110
+ "epoch": 0.5769230769230769,
111
+ "grad_norm": 62.1399040222168,
112
+ "learning_rate": 1.7094017094017097e-06,
113
+ "loss": 3.5289,
114
+ "step": 45
115
+ },
116
+ {
117
+ "epoch": 0.6153846153846154,
118
+ "grad_norm": 80.36759185791016,
119
+ "learning_rate": 1.8376068376068378e-06,
120
+ "loss": 3.3902,
121
+ "step": 48
122
+ },
123
+ {
124
+ "epoch": 0.6538461538461539,
125
+ "grad_norm": 102.58648681640625,
126
+ "learning_rate": 1.9658119658119658e-06,
127
+ "loss": 3.3,
128
+ "step": 51
129
+ },
130
+ {
131
+ "epoch": 0.6923076923076923,
132
+ "grad_norm": 1139.0341796875,
133
+ "learning_rate": 2.094017094017094e-06,
134
+ "loss": 3.396,
135
+ "step": 54
136
+ },
137
+ {
138
+ "epoch": 0.7307692307692307,
139
+ "grad_norm": 91.15528869628906,
140
+ "learning_rate": 2.2222222222222225e-06,
141
+ "loss": 3.4,
142
+ "step": 57
143
+ },
144
+ {
145
+ "epoch": 0.7692307692307693,
146
+ "grad_norm": 76.54491424560547,
147
+ "learning_rate": 2.3504273504273504e-06,
148
+ "loss": 3.2606,
149
+ "step": 60
150
+ },
151
+ {
152
+ "epoch": 0.8076923076923077,
153
+ "grad_norm": 79.00670623779297,
154
+ "learning_rate": 2.478632478632479e-06,
155
+ "loss": 3.2404,
156
+ "step": 63
157
+ },
158
+ {
159
+ "epoch": 0.8461538461538461,
160
+ "grad_norm": 305.5508728027344,
161
+ "learning_rate": 2.606837606837607e-06,
162
+ "loss": 3.411,
163
+ "step": 66
164
+ },
165
+ {
166
+ "epoch": 0.8846153846153846,
167
+ "grad_norm": 46.23222351074219,
168
+ "learning_rate": 2.735042735042735e-06,
169
+ "loss": 3.4607,
170
+ "step": 69
171
+ },
172
+ {
173
+ "epoch": 0.9230769230769231,
174
+ "grad_norm": 96.50804138183594,
175
+ "learning_rate": 2.863247863247863e-06,
176
+ "loss": 3.0111,
177
+ "step": 72
178
+ },
179
+ {
180
+ "epoch": 0.9615384615384616,
181
+ "grad_norm": 108.60494995117188,
182
+ "learning_rate": 2.991452991452992e-06,
183
+ "loss": 2.972,
184
+ "step": 75
185
+ },
186
+ {
187
+ "epoch": 1.0,
188
+ "grad_norm": Infinity,
189
+ "learning_rate": 3.0769230769230774e-06,
190
+ "loss": 2.9331,
191
+ "step": 78
192
+ },
193
+ {
194
+ "epoch": 1.0,
195
+ "eval_loss": 2.9128596782684326,
196
+ "eval_map": 0.0001,
197
+ "eval_map_50": 0.0003,
198
+ "eval_map_75": 0.0,
199
+ "eval_map_large": 0.0001,
200
+ "eval_map_medium": 0.0001,
201
+ "eval_map_per_class": 0.0001,
202
+ "eval_map_small": -1.0,
203
+ "eval_mar_1": 0.0,
204
+ "eval_mar_10": 0.0,
205
+ "eval_mar_100": 0.03,
206
+ "eval_mar_100_per_class": 0.03,
207
+ "eval_mar_large": 0.0348,
208
+ "eval_mar_medium": 0.0288,
209
+ "eval_mar_small": -1.0,
210
+ "eval_runtime": 7.4098,
211
+ "eval_samples_per_second": 20.918,
212
+ "eval_steps_per_second": 1.35,
213
+ "step": 78
214
+ },
215
+ {
216
+ "epoch": 1.0384615384615385,
217
+ "grad_norm": 563.4578857421875,
218
+ "learning_rate": 3.205128205128205e-06,
219
+ "loss": 3.2157,
220
+ "step": 81
221
+ },
222
+ {
223
+ "epoch": 1.0769230769230769,
224
+ "grad_norm": 61.78779220581055,
225
+ "learning_rate": 3.3333333333333333e-06,
226
+ "loss": 3.0643,
227
+ "step": 84
228
+ },
229
+ {
230
+ "epoch": 1.1153846153846154,
231
+ "grad_norm": 61.74622344970703,
232
+ "learning_rate": 3.4615384615384617e-06,
233
+ "loss": 3.2045,
234
+ "step": 87
235
+ },
236
+ {
237
+ "epoch": 1.1538461538461537,
238
+ "grad_norm": 80.19824981689453,
239
+ "learning_rate": 3.5897435897435896e-06,
240
+ "loss": 2.9672,
241
+ "step": 90
242
+ },
243
+ {
244
+ "epoch": 1.1923076923076923,
245
+ "grad_norm": 34.98136901855469,
246
+ "learning_rate": 3.717948717948718e-06,
247
+ "loss": 2.9372,
248
+ "step": 93
249
+ },
250
+ {
251
+ "epoch": 1.2307692307692308,
252
+ "grad_norm": 813.1605224609375,
253
+ "learning_rate": 3.846153846153847e-06,
254
+ "loss": 3.0351,
255
+ "step": 96
256
+ },
257
+ {
258
+ "epoch": 1.2692307692307692,
259
+ "grad_norm": 125.60267639160156,
260
+ "learning_rate": 3.974358974358974e-06,
261
+ "loss": 3.0765,
262
+ "step": 99
263
+ },
264
+ {
265
+ "epoch": 1.3076923076923077,
266
+ "grad_norm": 187.66407775878906,
267
+ "learning_rate": 4.102564102564103e-06,
268
+ "loss": 2.8243,
269
+ "step": 102
270
+ },
271
+ {
272
+ "epoch": 1.3461538461538463,
273
+ "grad_norm": 278.97698974609375,
274
+ "learning_rate": 4.230769230769231e-06,
275
+ "loss": 2.9521,
276
+ "step": 105
277
+ },
278
+ {
279
+ "epoch": 1.3846153846153846,
280
+ "grad_norm": 102.054931640625,
281
+ "learning_rate": 4.3589743589743586e-06,
282
+ "loss": 2.7844,
283
+ "step": 108
284
+ },
285
+ {
286
+ "epoch": 1.4230769230769231,
287
+ "grad_norm": 140.40711975097656,
288
+ "learning_rate": 4.487179487179488e-06,
289
+ "loss": 3.0765,
290
+ "step": 111
291
+ },
292
+ {
293
+ "epoch": 1.4615384615384617,
294
+ "grad_norm": 43.541473388671875,
295
+ "learning_rate": 4.615384615384616e-06,
296
+ "loss": 3.015,
297
+ "step": 114
298
+ },
299
+ {
300
+ "epoch": 1.5,
301
+ "grad_norm": 78.25859832763672,
302
+ "learning_rate": 4.743589743589744e-06,
303
+ "loss": 2.5134,
304
+ "step": 117
305
+ },
306
+ {
307
+ "epoch": 1.5384615384615383,
308
+ "grad_norm": 141.24464416503906,
309
+ "learning_rate": 4.871794871794872e-06,
310
+ "loss": 2.7579,
311
+ "step": 120
312
+ },
313
+ {
314
+ "epoch": 1.5769230769230769,
315
+ "grad_norm": 124.17371368408203,
316
+ "learning_rate": 5e-06,
317
+ "loss": 2.671,
318
+ "step": 123
319
+ },
320
+ {
321
+ "epoch": 1.6153846153846154,
322
+ "grad_norm": 148.00299072265625,
323
+ "learning_rate": 5.128205128205128e-06,
324
+ "loss": 2.7806,
325
+ "step": 126
326
+ },
327
+ {
328
+ "epoch": 1.6538461538461537,
329
+ "grad_norm": 144.6546630859375,
330
+ "learning_rate": 5.256410256410257e-06,
331
+ "loss": 3.1417,
332
+ "step": 129
333
+ },
334
+ {
335
+ "epoch": 1.6923076923076923,
336
+ "grad_norm": 120.8062973022461,
337
+ "learning_rate": 5.3846153846153855e-06,
338
+ "loss": 2.4718,
339
+ "step": 132
340
+ },
341
+ {
342
+ "epoch": 1.7307692307692308,
343
+ "grad_norm": 53.48143005371094,
344
+ "learning_rate": 5.512820512820513e-06,
345
+ "loss": 2.5687,
346
+ "step": 135
347
+ },
348
+ {
349
+ "epoch": 1.7692307692307692,
350
+ "grad_norm": 131.220703125,
351
+ "learning_rate": 5.641025641025641e-06,
352
+ "loss": 2.7619,
353
+ "step": 138
354
+ },
355
+ {
356
+ "epoch": 1.8076923076923077,
357
+ "grad_norm": 118.8127212524414,
358
+ "learning_rate": 5.76923076923077e-06,
359
+ "loss": 2.7001,
360
+ "step": 141
361
+ },
362
+ {
363
+ "epoch": 1.8461538461538463,
364
+ "grad_norm": 141.43331909179688,
365
+ "learning_rate": 5.897435897435897e-06,
366
+ "loss": 2.7713,
367
+ "step": 144
368
+ },
369
+ {
370
+ "epoch": 1.8846153846153846,
371
+ "grad_norm": 218.00189208984375,
372
+ "learning_rate": 6.025641025641026e-06,
373
+ "loss": 2.5222,
374
+ "step": 147
375
+ },
376
+ {
377
+ "epoch": 1.9230769230769231,
378
+ "grad_norm": 60.612728118896484,
379
+ "learning_rate": 6.153846153846155e-06,
380
+ "loss": 2.5978,
381
+ "step": 150
382
+ },
383
+ {
384
+ "epoch": 1.9615384615384617,
385
+ "grad_norm": 51.540592193603516,
386
+ "learning_rate": 6.282051282051282e-06,
387
+ "loss": 2.8127,
388
+ "step": 153
389
+ },
390
+ {
391
+ "epoch": 2.0,
392
+ "grad_norm": 98.53108978271484,
393
+ "learning_rate": 6.41025641025641e-06,
394
+ "loss": 2.4069,
395
+ "step": 156
396
+ },
397
+ {
398
+ "epoch": 2.0,
399
+ "eval_loss": 2.3802876472473145,
400
+ "eval_map": 0.0004,
401
+ "eval_map_50": 0.0025,
402
+ "eval_map_75": 0.0,
403
+ "eval_map_large": 0.0004,
404
+ "eval_map_medium": 0.0007,
405
+ "eval_map_per_class": 0.0004,
406
+ "eval_map_small": -1.0,
407
+ "eval_mar_1": 0.0017,
408
+ "eval_mar_10": 0.0097,
409
+ "eval_mar_100": 0.0937,
410
+ "eval_mar_100_per_class": 0.0937,
411
+ "eval_mar_large": 0.1196,
412
+ "eval_mar_medium": 0.0874,
413
+ "eval_mar_small": -1.0,
414
+ "eval_runtime": 7.1066,
415
+ "eval_samples_per_second": 21.811,
416
+ "eval_steps_per_second": 1.407,
417
+ "step": 156
418
+ },
419
+ {
420
+ "epoch": 2.0384615384615383,
421
+ "grad_norm": 76.5794677734375,
422
+ "learning_rate": 6.538461538461539e-06,
423
+ "loss": 2.7062,
424
+ "step": 159
425
+ },
426
+ {
427
+ "epoch": 2.076923076923077,
428
+ "grad_norm": 96.10802459716797,
429
+ "learning_rate": 6.666666666666667e-06,
430
+ "loss": 2.6982,
431
+ "step": 162
432
+ },
433
+ {
434
+ "epoch": 2.1153846153846154,
435
+ "grad_norm": 464.37103271484375,
436
+ "learning_rate": 6.794871794871795e-06,
437
+ "loss": 2.5429,
438
+ "step": 165
439
+ },
440
+ {
441
+ "epoch": 2.1538461538461537,
442
+ "grad_norm": 216.93106079101562,
443
+ "learning_rate": 6.923076923076923e-06,
444
+ "loss": 2.2953,
445
+ "step": 168
446
+ },
447
+ {
448
+ "epoch": 2.1923076923076925,
449
+ "grad_norm": 160.7875213623047,
450
+ "learning_rate": 7.051282051282052e-06,
451
+ "loss": 2.8558,
452
+ "step": 171
453
+ },
454
+ {
455
+ "epoch": 2.230769230769231,
456
+ "grad_norm": 75.49896240234375,
457
+ "learning_rate": 7.179487179487179e-06,
458
+ "loss": 2.5174,
459
+ "step": 174
460
+ },
461
+ {
462
+ "epoch": 2.269230769230769,
463
+ "grad_norm": 39.86626052856445,
464
+ "learning_rate": 7.3076923076923085e-06,
465
+ "loss": 2.5822,
466
+ "step": 177
467
+ },
468
+ {
469
+ "epoch": 2.3076923076923075,
470
+ "grad_norm": 84.89918518066406,
471
+ "learning_rate": 7.435897435897436e-06,
472
+ "loss": 2.4464,
473
+ "step": 180
474
+ },
475
+ {
476
+ "epoch": 2.3461538461538463,
477
+ "grad_norm": 69.48336029052734,
478
+ "learning_rate": 7.564102564102564e-06,
479
+ "loss": 2.5619,
480
+ "step": 183
481
+ },
482
+ {
483
+ "epoch": 2.3846153846153846,
484
+ "grad_norm": 57.562721252441406,
485
+ "learning_rate": 7.692307692307694e-06,
486
+ "loss": 2.5131,
487
+ "step": 186
488
+ },
489
+ {
490
+ "epoch": 2.423076923076923,
491
+ "grad_norm": 55.67247009277344,
492
+ "learning_rate": 7.82051282051282e-06,
493
+ "loss": 2.5467,
494
+ "step": 189
495
+ },
496
+ {
497
+ "epoch": 2.4615384615384617,
498
+ "grad_norm": 164.40463256835938,
499
+ "learning_rate": 7.948717948717949e-06,
500
+ "loss": 2.2275,
501
+ "step": 192
502
+ },
503
+ {
504
+ "epoch": 2.5,
505
+ "grad_norm": 59.712806701660156,
506
+ "learning_rate": 8.076923076923077e-06,
507
+ "loss": 2.4607,
508
+ "step": 195
509
+ },
510
+ {
511
+ "epoch": 2.5384615384615383,
512
+ "grad_norm": 50.99430465698242,
513
+ "learning_rate": 8.205128205128205e-06,
514
+ "loss": 2.5031,
515
+ "step": 198
516
+ },
517
+ {
518
+ "epoch": 2.5769230769230766,
519
+ "grad_norm": 96.38360595703125,
520
+ "learning_rate": 8.333333333333334e-06,
521
+ "loss": 2.5362,
522
+ "step": 201
523
+ },
524
+ {
525
+ "epoch": 2.6153846153846154,
526
+ "grad_norm": 51.91063690185547,
527
+ "learning_rate": 8.461538461538462e-06,
528
+ "loss": 2.4711,
529
+ "step": 204
530
+ },
531
+ {
532
+ "epoch": 2.6538461538461537,
533
+ "grad_norm": 151.7155303955078,
534
+ "learning_rate": 8.58974358974359e-06,
535
+ "loss": 2.377,
536
+ "step": 207
537
+ },
538
+ {
539
+ "epoch": 2.6923076923076925,
540
+ "grad_norm": 46.83069610595703,
541
+ "learning_rate": 8.717948717948717e-06,
542
+ "loss": 2.3123,
543
+ "step": 210
544
+ },
545
+ {
546
+ "epoch": 2.730769230769231,
547
+ "grad_norm": 63.71812438964844,
548
+ "learning_rate": 8.846153846153847e-06,
549
+ "loss": 2.2614,
550
+ "step": 213
551
+ },
552
+ {
553
+ "epoch": 2.769230769230769,
554
+ "grad_norm": 55.61922073364258,
555
+ "learning_rate": 8.974358974358976e-06,
556
+ "loss": 2.468,
557
+ "step": 216
558
+ },
559
+ {
560
+ "epoch": 2.8076923076923075,
561
+ "grad_norm": 58.41524124145508,
562
+ "learning_rate": 9.102564102564102e-06,
563
+ "loss": 2.4301,
564
+ "step": 219
565
+ },
566
+ {
567
+ "epoch": 2.8461538461538463,
568
+ "grad_norm": 105.96492767333984,
569
+ "learning_rate": 9.230769230769232e-06,
570
+ "loss": 2.2949,
571
+ "step": 222
572
+ },
573
+ {
574
+ "epoch": 2.8846153846153846,
575
+ "grad_norm": 60.72886276245117,
576
+ "learning_rate": 9.358974358974359e-06,
577
+ "loss": 2.0177,
578
+ "step": 225
579
+ },
580
+ {
581
+ "epoch": 2.9230769230769234,
582
+ "grad_norm": 75.22779083251953,
583
+ "learning_rate": 9.487179487179487e-06,
584
+ "loss": 2.202,
585
+ "step": 228
586
+ },
587
+ {
588
+ "epoch": 2.9615384615384617,
589
+ "grad_norm": 61.02786636352539,
590
+ "learning_rate": 9.615384615384616e-06,
591
+ "loss": 2.3777,
592
+ "step": 231
593
+ },
594
+ {
595
+ "epoch": 3.0,
596
+ "grad_norm": 150.6619873046875,
597
+ "learning_rate": 9.743589743589744e-06,
598
+ "loss": 1.8886,
599
+ "step": 234
600
+ },
601
+ {
602
+ "epoch": 3.0,
603
+ "eval_loss": 2.0523335933685303,
604
+ "eval_map": 0.0011,
605
+ "eval_map_50": 0.0057,
606
+ "eval_map_75": 0.0001,
607
+ "eval_map_large": 0.0007,
608
+ "eval_map_medium": 0.0021,
609
+ "eval_map_per_class": 0.0011,
610
+ "eval_map_small": -1.0,
611
+ "eval_mar_1": 0.0,
612
+ "eval_mar_10": 0.0241,
613
+ "eval_mar_100": 0.1705,
614
+ "eval_mar_100_per_class": 0.1705,
615
+ "eval_mar_large": 0.163,
616
+ "eval_mar_medium": 0.1723,
617
+ "eval_mar_small": -1.0,
618
+ "eval_runtime": 6.5233,
619
+ "eval_samples_per_second": 23.761,
620
+ "eval_steps_per_second": 1.533,
621
+ "step": 234
622
+ },
623
+ {
624
+ "epoch": 3.0384615384615383,
625
+ "grad_norm": 39.399452209472656,
626
+ "learning_rate": 9.871794871794872e-06,
627
+ "loss": 2.2923,
628
+ "step": 237
629
+ },
630
+ {
631
+ "epoch": 3.076923076923077,
632
+ "grad_norm": 35.7647819519043,
633
+ "learning_rate": 1e-05,
634
+ "loss": 1.9331,
635
+ "step": 240
636
+ },
637
+ {
638
+ "epoch": 3.1153846153846154,
639
+ "grad_norm": 45.918128967285156,
640
+ "learning_rate": 1.012820512820513e-05,
641
+ "loss": 2.0621,
642
+ "step": 243
643
+ },
644
+ {
645
+ "epoch": 3.1538461538461537,
646
+ "grad_norm": 61.76093292236328,
647
+ "learning_rate": 1.0256410256410256e-05,
648
+ "loss": 2.0264,
649
+ "step": 246
650
+ },
651
+ {
652
+ "epoch": 3.1923076923076925,
653
+ "grad_norm": 39.96932601928711,
654
+ "learning_rate": 1.0384615384615386e-05,
655
+ "loss": 1.8564,
656
+ "step": 249
657
+ },
658
+ {
659
+ "epoch": 3.230769230769231,
660
+ "grad_norm": 85.42276000976562,
661
+ "learning_rate": 1.0512820512820514e-05,
662
+ "loss": 2.2377,
663
+ "step": 252
664
+ },
665
+ {
666
+ "epoch": 3.269230769230769,
667
+ "grad_norm": 63.91851043701172,
668
+ "learning_rate": 1.0641025641025641e-05,
669
+ "loss": 2.0225,
670
+ "step": 255
671
+ },
672
+ {
673
+ "epoch": 3.3076923076923075,
674
+ "grad_norm": 79.40022277832031,
675
+ "learning_rate": 1.0769230769230771e-05,
676
+ "loss": 1.9156,
677
+ "step": 258
678
+ },
679
+ {
680
+ "epoch": 3.3461538461538463,
681
+ "grad_norm": 89.13397216796875,
682
+ "learning_rate": 1.0897435897435898e-05,
683
+ "loss": 2.0712,
684
+ "step": 261
685
+ },
686
+ {
687
+ "epoch": 3.3846153846153846,
688
+ "grad_norm": 44.81402587890625,
689
+ "learning_rate": 1.1025641025641026e-05,
690
+ "loss": 2.0748,
691
+ "step": 264
692
+ },
693
+ {
694
+ "epoch": 3.423076923076923,
695
+ "grad_norm": 83.1436538696289,
696
+ "learning_rate": 1.1153846153846154e-05,
697
+ "loss": 2.1549,
698
+ "step": 267
699
+ },
700
+ {
701
+ "epoch": 3.4615384615384617,
702
+ "grad_norm": 47.19963455200195,
703
+ "learning_rate": 1.1282051282051283e-05,
704
+ "loss": 1.9342,
705
+ "step": 270
706
+ },
707
+ {
708
+ "epoch": 3.5,
709
+ "grad_norm": 69.76883697509766,
710
+ "learning_rate": 1.1410256410256411e-05,
711
+ "loss": 1.7994,
712
+ "step": 273
713
+ },
714
+ {
715
+ "epoch": 3.5384615384615383,
716
+ "grad_norm": 355.1128234863281,
717
+ "learning_rate": 1.153846153846154e-05,
718
+ "loss": 1.6167,
719
+ "step": 276
720
+ },
721
+ {
722
+ "epoch": 3.5769230769230766,
723
+ "grad_norm": 94.69706726074219,
724
+ "learning_rate": 1.1666666666666668e-05,
725
+ "loss": 1.8403,
726
+ "step": 279
727
+ },
728
+ {
729
+ "epoch": 3.6153846153846154,
730
+ "grad_norm": 40.64764404296875,
731
+ "learning_rate": 1.1794871794871795e-05,
732
+ "loss": 1.715,
733
+ "step": 282
734
+ },
735
+ {
736
+ "epoch": 3.6538461538461537,
737
+ "grad_norm": 54.820762634277344,
738
+ "learning_rate": 1.1923076923076925e-05,
739
+ "loss": 1.7649,
740
+ "step": 285
741
+ },
742
+ {
743
+ "epoch": 3.6923076923076925,
744
+ "grad_norm": 211.56454467773438,
745
+ "learning_rate": 1.2051282051282051e-05,
746
+ "loss": 2.1393,
747
+ "step": 288
748
+ },
749
+ {
750
+ "epoch": 3.730769230769231,
751
+ "grad_norm": 58.309844970703125,
752
+ "learning_rate": 1.217948717948718e-05,
753
+ "loss": 1.8294,
754
+ "step": 291
755
+ },
756
+ {
757
+ "epoch": 3.769230769230769,
758
+ "grad_norm": 66.05257415771484,
759
+ "learning_rate": 1.230769230769231e-05,
760
+ "loss": 1.7081,
761
+ "step": 294
762
+ },
763
+ {
764
+ "epoch": 3.8076923076923075,
765
+ "grad_norm": 49.79051971435547,
766
+ "learning_rate": 1.2435897435897436e-05,
767
+ "loss": 1.883,
768
+ "step": 297
769
+ },
770
+ {
771
+ "epoch": 3.8461538461538463,
772
+ "grad_norm": 36.41020965576172,
773
+ "learning_rate": 1.2564102564102565e-05,
774
+ "loss": 2.1653,
775
+ "step": 300
776
+ },
777
+ {
778
+ "epoch": 3.8846153846153846,
779
+ "grad_norm": 60.422271728515625,
780
+ "learning_rate": 1.2692307692307691e-05,
781
+ "loss": 2.0104,
782
+ "step": 303
783
+ },
784
+ {
785
+ "epoch": 3.9230769230769234,
786
+ "grad_norm": 31.990575790405273,
787
+ "learning_rate": 1.282051282051282e-05,
788
+ "loss": 1.8102,
789
+ "step": 306
790
+ },
791
+ {
792
+ "epoch": 3.9615384615384617,
793
+ "grad_norm": 59.03278732299805,
794
+ "learning_rate": 1.294871794871795e-05,
795
+ "loss": 1.9436,
796
+ "step": 309
797
+ },
798
+ {
799
+ "epoch": 4.0,
800
+ "grad_norm": 108.77870178222656,
801
+ "learning_rate": 1.3076923076923078e-05,
802
+ "loss": 1.9051,
803
+ "step": 312
804
+ },
805
+ {
806
+ "epoch": 4.0,
807
+ "eval_loss": 1.8102283477783203,
808
+ "eval_map": 0.0029,
809
+ "eval_map_50": 0.0122,
810
+ "eval_map_75": 0.0005,
811
+ "eval_map_large": 0.0027,
812
+ "eval_map_medium": 0.0038,
813
+ "eval_map_per_class": 0.0029,
814
+ "eval_map_small": -1.0,
815
+ "eval_mar_1": 0.0072,
816
+ "eval_mar_10": 0.0633,
817
+ "eval_mar_100": 0.2435,
818
+ "eval_mar_100_per_class": 0.2435,
819
+ "eval_mar_large": 0.2478,
820
+ "eval_mar_medium": 0.2424,
821
+ "eval_mar_small": -1.0,
822
+ "eval_runtime": 6.3771,
823
+ "eval_samples_per_second": 24.306,
824
+ "eval_steps_per_second": 1.568,
825
+ "step": 312
826
+ },
827
+ {
828
+ "epoch": 4.038461538461538,
829
+ "grad_norm": 91.2202377319336,
830
+ "learning_rate": 1.3205128205128207e-05,
831
+ "loss": 2.071,
832
+ "step": 315
833
+ },
834
+ {
835
+ "epoch": 4.076923076923077,
836
+ "grad_norm": 60.652069091796875,
837
+ "learning_rate": 1.3333333333333333e-05,
838
+ "loss": 1.9046,
839
+ "step": 318
840
+ },
841
+ {
842
+ "epoch": 4.115384615384615,
843
+ "grad_norm": 70.13227081298828,
844
+ "learning_rate": 1.3461538461538462e-05,
845
+ "loss": 2.0003,
846
+ "step": 321
847
+ },
848
+ {
849
+ "epoch": 4.153846153846154,
850
+ "grad_norm": 62.53144836425781,
851
+ "learning_rate": 1.358974358974359e-05,
852
+ "loss": 2.1058,
853
+ "step": 324
854
+ },
855
+ {
856
+ "epoch": 4.1923076923076925,
857
+ "grad_norm": 35.44761657714844,
858
+ "learning_rate": 1.371794871794872e-05,
859
+ "loss": 1.7253,
860
+ "step": 327
861
+ },
862
+ {
863
+ "epoch": 4.230769230769231,
864
+ "grad_norm": 74.52267456054688,
865
+ "learning_rate": 1.3846153846153847e-05,
866
+ "loss": 1.4238,
867
+ "step": 330
868
+ },
869
+ {
870
+ "epoch": 4.269230769230769,
871
+ "grad_norm": 52.21393585205078,
872
+ "learning_rate": 1.3974358974358975e-05,
873
+ "loss": 1.7488,
874
+ "step": 333
875
+ },
876
+ {
877
+ "epoch": 4.3076923076923075,
878
+ "grad_norm": 74.36579895019531,
879
+ "learning_rate": 1.4102564102564104e-05,
880
+ "loss": 1.661,
881
+ "step": 336
882
+ },
883
+ {
884
+ "epoch": 4.346153846153846,
885
+ "grad_norm": 48.529876708984375,
886
+ "learning_rate": 1.423076923076923e-05,
887
+ "loss": 1.8758,
888
+ "step": 339
889
+ },
890
+ {
891
+ "epoch": 4.384615384615385,
892
+ "grad_norm": 73.52633666992188,
893
+ "learning_rate": 1.4358974358974359e-05,
894
+ "loss": 1.6955,
895
+ "step": 342
896
+ },
897
+ {
898
+ "epoch": 4.423076923076923,
899
+ "grad_norm": 85.90145874023438,
900
+ "learning_rate": 1.4487179487179489e-05,
901
+ "loss": 1.472,
902
+ "step": 345
903
+ },
904
+ {
905
+ "epoch": 4.461538461538462,
906
+ "grad_norm": 50.83028793334961,
907
+ "learning_rate": 1.4615384615384617e-05,
908
+ "loss": 1.5928,
909
+ "step": 348
910
+ },
911
+ {
912
+ "epoch": 4.5,
913
+ "grad_norm": 48.89540481567383,
914
+ "learning_rate": 1.4743589743589745e-05,
915
+ "loss": 1.7213,
916
+ "step": 351
917
+ },
918
+ {
919
+ "epoch": 4.538461538461538,
920
+ "grad_norm": 58.186668395996094,
921
+ "learning_rate": 1.4871794871794872e-05,
922
+ "loss": 1.783,
923
+ "step": 354
924
+ },
925
+ {
926
+ "epoch": 4.576923076923077,
927
+ "grad_norm": 62.13600540161133,
928
+ "learning_rate": 1.5e-05,
929
+ "loss": 1.7598,
930
+ "step": 357
931
+ },
932
+ {
933
+ "epoch": 4.615384615384615,
934
+ "grad_norm": 107.26667022705078,
935
+ "learning_rate": 1.5128205128205129e-05,
936
+ "loss": 1.8542,
937
+ "step": 360
938
+ },
939
+ {
940
+ "epoch": 4.653846153846154,
941
+ "grad_norm": 55.86286544799805,
942
+ "learning_rate": 1.5256410256410259e-05,
943
+ "loss": 1.7496,
944
+ "step": 363
945
+ },
946
+ {
947
+ "epoch": 4.6923076923076925,
948
+ "grad_norm": 55.86201095581055,
949
+ "learning_rate": 1.5384615384615387e-05,
950
+ "loss": 1.737,
951
+ "step": 366
952
+ },
953
+ {
954
+ "epoch": 4.730769230769231,
955
+ "grad_norm": 153.73898315429688,
956
+ "learning_rate": 1.5512820512820516e-05,
957
+ "loss": 1.5114,
958
+ "step": 369
959
+ },
960
+ {
961
+ "epoch": 4.769230769230769,
962
+ "grad_norm": 53.17085266113281,
963
+ "learning_rate": 1.564102564102564e-05,
964
+ "loss": 1.8322,
965
+ "step": 372
966
+ },
967
+ {
968
+ "epoch": 4.8076923076923075,
969
+ "grad_norm": 50.15892791748047,
970
+ "learning_rate": 1.576923076923077e-05,
971
+ "loss": 1.7755,
972
+ "step": 375
973
+ },
974
+ {
975
+ "epoch": 4.846153846153846,
976
+ "grad_norm": 88.30574798583984,
977
+ "learning_rate": 1.5897435897435897e-05,
978
+ "loss": 1.7223,
979
+ "step": 378
980
+ },
981
+ {
982
+ "epoch": 4.884615384615385,
983
+ "grad_norm": 43.660011291503906,
984
+ "learning_rate": 1.602564102564103e-05,
985
+ "loss": 2.1867,
986
+ "step": 381
987
+ },
988
+ {
989
+ "epoch": 4.923076923076923,
990
+ "grad_norm": 53.86124038696289,
991
+ "learning_rate": 1.6153846153846154e-05,
992
+ "loss": 1.8563,
993
+ "step": 384
994
+ },
995
+ {
996
+ "epoch": 4.961538461538462,
997
+ "grad_norm": 37.966732025146484,
998
+ "learning_rate": 1.6282051282051282e-05,
999
+ "loss": 1.761,
1000
+ "step": 387
1001
+ },
1002
+ {
1003
+ "epoch": 5.0,
1004
+ "grad_norm": 83.91299438476562,
1005
+ "learning_rate": 1.641025641025641e-05,
1006
+ "loss": 1.3634,
1007
+ "step": 390
1008
+ },
1009
+ {
1010
+ "epoch": 5.0,
1011
+ "eval_loss": 1.6359920501708984,
1012
+ "eval_map": 0.0063,
1013
+ "eval_map_50": 0.0243,
1014
+ "eval_map_75": 0.0009,
1015
+ "eval_map_large": 0.0056,
1016
+ "eval_map_medium": 0.0077,
1017
+ "eval_map_per_class": 0.0063,
1018
+ "eval_map_small": -1.0,
1019
+ "eval_mar_1": 0.008,
1020
+ "eval_mar_10": 0.0979,
1021
+ "eval_mar_100": 0.3008,
1022
+ "eval_mar_100_per_class": 0.3008,
1023
+ "eval_mar_large": 0.2978,
1024
+ "eval_mar_medium": 0.3016,
1025
+ "eval_mar_small": -1.0,
1026
+ "eval_runtime": 6.5302,
1027
+ "eval_samples_per_second": 23.736,
1028
+ "eval_steps_per_second": 1.531,
1029
+ "step": 390
1030
+ },
1031
+ {
1032
+ "epoch": 5.038461538461538,
1033
+ "grad_norm": 159.61842346191406,
1034
+ "learning_rate": 1.653846153846154e-05,
1035
+ "loss": 1.7117,
1036
+ "step": 393
1037
+ },
1038
+ {
1039
+ "epoch": 5.076923076923077,
1040
+ "grad_norm": 44.411678314208984,
1041
+ "learning_rate": 1.6666666666666667e-05,
1042
+ "loss": 1.8599,
1043
+ "step": 396
1044
+ },
1045
+ {
1046
+ "epoch": 5.115384615384615,
1047
+ "grad_norm": 29.635787963867188,
1048
+ "learning_rate": 1.6794871794871796e-05,
1049
+ "loss": 1.6144,
1050
+ "step": 399
1051
+ },
1052
+ {
1053
+ "epoch": 5.153846153846154,
1054
+ "grad_norm": 40.09303283691406,
1055
+ "learning_rate": 1.6923076923076924e-05,
1056
+ "loss": 1.7767,
1057
+ "step": 402
1058
+ },
1059
+ {
1060
+ "epoch": 5.1923076923076925,
1061
+ "grad_norm": 50.75849914550781,
1062
+ "learning_rate": 1.7051282051282053e-05,
1063
+ "loss": 1.6156,
1064
+ "step": 405
1065
+ },
1066
+ {
1067
+ "epoch": 5.230769230769231,
1068
+ "grad_norm": 392.7651062011719,
1069
+ "learning_rate": 1.717948717948718e-05,
1070
+ "loss": 1.7562,
1071
+ "step": 408
1072
+ },
1073
+ {
1074
+ "epoch": 5.269230769230769,
1075
+ "grad_norm": 37.688934326171875,
1076
+ "learning_rate": 1.730769230769231e-05,
1077
+ "loss": 1.5757,
1078
+ "step": 411
1079
+ },
1080
+ {
1081
+ "epoch": 5.3076923076923075,
1082
+ "grad_norm": 59.35781478881836,
1083
+ "learning_rate": 1.7435897435897434e-05,
1084
+ "loss": 1.6499,
1085
+ "step": 414
1086
+ },
1087
+ {
1088
+ "epoch": 5.346153846153846,
1089
+ "grad_norm": 33.89421844482422,
1090
+ "learning_rate": 1.7564102564102563e-05,
1091
+ "loss": 1.5451,
1092
+ "step": 417
1093
+ },
1094
+ {
1095
+ "epoch": 5.384615384615385,
1096
+ "grad_norm": 64.62847137451172,
1097
+ "learning_rate": 1.7692307692307694e-05,
1098
+ "loss": 1.958,
1099
+ "step": 420
1100
+ },
1101
+ {
1102
+ "epoch": 5.423076923076923,
1103
+ "grad_norm": 77.18164825439453,
1104
+ "learning_rate": 1.7820512820512823e-05,
1105
+ "loss": 1.5295,
1106
+ "step": 423
1107
+ },
1108
+ {
1109
+ "epoch": 5.461538461538462,
1110
+ "grad_norm": 73.4771957397461,
1111
+ "learning_rate": 1.794871794871795e-05,
1112
+ "loss": 1.7325,
1113
+ "step": 426
1114
+ },
1115
+ {
1116
+ "epoch": 5.5,
1117
+ "grad_norm": 36.59367752075195,
1118
+ "learning_rate": 1.8076923076923076e-05,
1119
+ "loss": 1.7073,
1120
+ "step": 429
1121
+ },
1122
+ {
1123
+ "epoch": 5.538461538461538,
1124
+ "grad_norm": 54.93395233154297,
1125
+ "learning_rate": 1.8205128205128204e-05,
1126
+ "loss": 1.5168,
1127
+ "step": 432
1128
+ },
1129
+ {
1130
+ "epoch": 5.576923076923077,
1131
+ "grad_norm": 39.080833435058594,
1132
+ "learning_rate": 1.8333333333333333e-05,
1133
+ "loss": 1.5118,
1134
+ "step": 435
1135
+ },
1136
+ {
1137
+ "epoch": 5.615384615384615,
1138
+ "grad_norm": 55.969791412353516,
1139
+ "learning_rate": 1.8461538461538465e-05,
1140
+ "loss": 1.3844,
1141
+ "step": 438
1142
+ },
1143
+ {
1144
+ "epoch": 5.653846153846154,
1145
+ "grad_norm": 35.72905731201172,
1146
+ "learning_rate": 1.858974358974359e-05,
1147
+ "loss": 1.5504,
1148
+ "step": 441
1149
+ },
1150
+ {
1151
+ "epoch": 5.6923076923076925,
1152
+ "grad_norm": 88.6219482421875,
1153
+ "learning_rate": 1.8717948717948718e-05,
1154
+ "loss": 1.6619,
1155
+ "step": 444
1156
+ },
1157
+ {
1158
+ "epoch": 5.730769230769231,
1159
+ "grad_norm": 38.608760833740234,
1160
+ "learning_rate": 1.8846153846153846e-05,
1161
+ "loss": 1.5445,
1162
+ "step": 447
1163
+ },
1164
+ {
1165
+ "epoch": 5.769230769230769,
1166
+ "grad_norm": 31.97504997253418,
1167
+ "learning_rate": 1.8974358974358975e-05,
1168
+ "loss": 1.8499,
1169
+ "step": 450
1170
+ },
1171
+ {
1172
+ "epoch": 5.8076923076923075,
1173
+ "grad_norm": 66.33550262451172,
1174
+ "learning_rate": 1.9102564102564103e-05,
1175
+ "loss": 1.6797,
1176
+ "step": 453
1177
+ },
1178
+ {
1179
+ "epoch": 5.846153846153846,
1180
+ "grad_norm": 95.7342758178711,
1181
+ "learning_rate": 1.923076923076923e-05,
1182
+ "loss": 1.599,
1183
+ "step": 456
1184
+ },
1185
+ {
1186
+ "epoch": 5.884615384615385,
1187
+ "grad_norm": 94.28338623046875,
1188
+ "learning_rate": 1.935897435897436e-05,
1189
+ "loss": 1.7258,
1190
+ "step": 459
1191
+ },
1192
+ {
1193
+ "epoch": 5.923076923076923,
1194
+ "grad_norm": 51.16758346557617,
1195
+ "learning_rate": 1.9487179487179488e-05,
1196
+ "loss": 1.5837,
1197
+ "step": 462
1198
+ },
1199
+ {
1200
+ "epoch": 5.961538461538462,
1201
+ "grad_norm": 92.95182800292969,
1202
+ "learning_rate": 1.9615384615384617e-05,
1203
+ "loss": 1.6795,
1204
+ "step": 465
1205
+ },
1206
+ {
1207
+ "epoch": 6.0,
1208
+ "grad_norm": 87.92588806152344,
1209
+ "learning_rate": 1.9743589743589745e-05,
1210
+ "loss": 1.5109,
1211
+ "step": 468
1212
+ },
1213
+ {
1214
+ "epoch": 6.0,
1215
+ "eval_loss": 1.5518213510513306,
1216
+ "eval_map": 0.0101,
1217
+ "eval_map_50": 0.0331,
1218
+ "eval_map_75": 0.0017,
1219
+ "eval_map_large": 0.0117,
1220
+ "eval_map_medium": 0.0102,
1221
+ "eval_map_per_class": 0.0101,
1222
+ "eval_map_small": -1.0,
1223
+ "eval_mar_1": 0.0241,
1224
+ "eval_mar_10": 0.1646,
1225
+ "eval_mar_100": 0.3304,
1226
+ "eval_mar_100_per_class": 0.3304,
1227
+ "eval_mar_large": 0.3435,
1228
+ "eval_mar_medium": 0.3272,
1229
+ "eval_mar_small": -1.0,
1230
+ "eval_runtime": 6.6435,
1231
+ "eval_samples_per_second": 23.331,
1232
+ "eval_steps_per_second": 1.505,
1233
+ "step": 468
1234
+ },
1235
+ {
1236
+ "epoch": 6.038461538461538,
1237
+ "grad_norm": 49.577308654785156,
1238
+ "learning_rate": 1.987179487179487e-05,
1239
+ "loss": 1.4305,
1240
+ "step": 471
1241
+ },
1242
+ {
1243
+ "epoch": 6.076923076923077,
1244
+ "grad_norm": 50.980587005615234,
1245
+ "learning_rate": 2e-05,
1246
+ "loss": 1.5264,
1247
+ "step": 474
1248
+ },
1249
+ {
1250
+ "epoch": 6.115384615384615,
1251
+ "grad_norm": 65.80145263671875,
1252
+ "learning_rate": 2.012820512820513e-05,
1253
+ "loss": 1.7552,
1254
+ "step": 477
1255
+ },
1256
+ {
1257
+ "epoch": 6.153846153846154,
1258
+ "grad_norm": 47.22199630737305,
1259
+ "learning_rate": 2.025641025641026e-05,
1260
+ "loss": 1.5436,
1261
+ "step": 480
1262
+ },
1263
+ {
1264
+ "epoch": 6.1923076923076925,
1265
+ "grad_norm": 67.4208755493164,
1266
+ "learning_rate": 2.0384615384615387e-05,
1267
+ "loss": 1.6228,
1268
+ "step": 483
1269
+ },
1270
+ {
1271
+ "epoch": 6.230769230769231,
1272
+ "grad_norm": 73.6033935546875,
1273
+ "learning_rate": 2.0512820512820512e-05,
1274
+ "loss": 1.8489,
1275
+ "step": 486
1276
+ },
1277
+ {
1278
+ "epoch": 6.269230769230769,
1279
+ "grad_norm": 52.831321716308594,
1280
+ "learning_rate": 2.064102564102564e-05,
1281
+ "loss": 1.3603,
1282
+ "step": 489
1283
+ },
1284
+ {
1285
+ "epoch": 6.3076923076923075,
1286
+ "grad_norm": 75.80672454833984,
1287
+ "learning_rate": 2.0769230769230772e-05,
1288
+ "loss": 1.6267,
1289
+ "step": 492
1290
+ },
1291
+ {
1292
+ "epoch": 6.346153846153846,
1293
+ "grad_norm": 61.34874725341797,
1294
+ "learning_rate": 2.08974358974359e-05,
1295
+ "loss": 1.6093,
1296
+ "step": 495
1297
+ },
1298
+ {
1299
+ "epoch": 6.384615384615385,
1300
+ "grad_norm": 123.37115478515625,
1301
+ "learning_rate": 2.102564102564103e-05,
1302
+ "loss": 1.7725,
1303
+ "step": 498
1304
+ },
1305
+ {
1306
+ "epoch": 6.423076923076923,
1307
+ "grad_norm": 230.2049560546875,
1308
+ "learning_rate": 2.1153846153846154e-05,
1309
+ "loss": 1.6774,
1310
+ "step": 501
1311
+ },
1312
+ {
1313
+ "epoch": 6.461538461538462,
1314
+ "grad_norm": 35.423614501953125,
1315
+ "learning_rate": 2.1282051282051282e-05,
1316
+ "loss": 1.8907,
1317
+ "step": 504
1318
+ },
1319
+ {
1320
+ "epoch": 6.5,
1321
+ "grad_norm": 59.30735778808594,
1322
+ "learning_rate": 2.141025641025641e-05,
1323
+ "loss": 1.6018,
1324
+ "step": 507
1325
+ },
1326
+ {
1327
+ "epoch": 6.538461538461538,
1328
+ "grad_norm": 42.424747467041016,
1329
+ "learning_rate": 2.1538461538461542e-05,
1330
+ "loss": 1.5013,
1331
+ "step": 510
1332
+ },
1333
+ {
1334
+ "epoch": 6.576923076923077,
1335
+ "grad_norm": 34.8141975402832,
1336
+ "learning_rate": 2.1666666666666667e-05,
1337
+ "loss": 1.6192,
1338
+ "step": 513
1339
+ },
1340
+ {
1341
+ "epoch": 6.615384615384615,
1342
+ "grad_norm": 31.286949157714844,
1343
+ "learning_rate": 2.1794871794871795e-05,
1344
+ "loss": 1.5307,
1345
+ "step": 516
1346
+ },
1347
+ {
1348
+ "epoch": 6.653846153846154,
1349
+ "grad_norm": 49.418704986572266,
1350
+ "learning_rate": 2.1923076923076924e-05,
1351
+ "loss": 1.5726,
1352
+ "step": 519
1353
+ },
1354
+ {
1355
+ "epoch": 6.6923076923076925,
1356
+ "grad_norm": 64.7080078125,
1357
+ "learning_rate": 2.2051282051282052e-05,
1358
+ "loss": 1.4166,
1359
+ "step": 522
1360
+ },
1361
+ {
1362
+ "epoch": 6.730769230769231,
1363
+ "grad_norm": 48.86499786376953,
1364
+ "learning_rate": 2.217948717948718e-05,
1365
+ "loss": 1.4853,
1366
+ "step": 525
1367
+ },
1368
+ {
1369
+ "epoch": 6.769230769230769,
1370
+ "grad_norm": 52.145896911621094,
1371
+ "learning_rate": 2.230769230769231e-05,
1372
+ "loss": 1.4472,
1373
+ "step": 528
1374
+ },
1375
+ {
1376
+ "epoch": 6.8076923076923075,
1377
+ "grad_norm": 274.7472839355469,
1378
+ "learning_rate": 2.2435897435897437e-05,
1379
+ "loss": 1.6896,
1380
+ "step": 531
1381
+ },
1382
+ {
1383
+ "epoch": 6.846153846153846,
1384
+ "grad_norm": 82.1732406616211,
1385
+ "learning_rate": 2.2564102564102566e-05,
1386
+ "loss": 1.5141,
1387
+ "step": 534
1388
+ },
1389
+ {
1390
+ "epoch": 6.884615384615385,
1391
+ "grad_norm": 49.141685485839844,
1392
+ "learning_rate": 2.2692307692307694e-05,
1393
+ "loss": 1.6835,
1394
+ "step": 537
1395
+ },
1396
+ {
1397
+ "epoch": 6.923076923076923,
1398
+ "grad_norm": 93.44412231445312,
1399
+ "learning_rate": 2.2820512820512822e-05,
1400
+ "loss": 1.5879,
1401
+ "step": 540
1402
+ },
1403
+ {
1404
+ "epoch": 6.961538461538462,
1405
+ "grad_norm": 86.4562759399414,
1406
+ "learning_rate": 2.2948717948717947e-05,
1407
+ "loss": 1.5326,
1408
+ "step": 543
1409
+ },
1410
+ {
1411
+ "epoch": 7.0,
1412
+ "grad_norm": 152.79666137695312,
1413
+ "learning_rate": 2.307692307692308e-05,
1414
+ "loss": 1.3115,
1415
+ "step": 546
1416
+ },
1417
+ {
1418
+ "epoch": 7.0,
1419
+ "eval_loss": 1.5149505138397217,
1420
+ "eval_map": 0.0101,
1421
+ "eval_map_50": 0.0356,
1422
+ "eval_map_75": 0.0026,
1423
+ "eval_map_large": 0.0103,
1424
+ "eval_map_medium": 0.0111,
1425
+ "eval_map_per_class": 0.0101,
1426
+ "eval_map_small": -1.0,
1427
+ "eval_mar_1": 0.0257,
1428
+ "eval_mar_10": 0.1515,
1429
+ "eval_mar_100": 0.3603,
1430
+ "eval_mar_100_per_class": 0.3603,
1431
+ "eval_mar_large": 0.3783,
1432
+ "eval_mar_medium": 0.356,
1433
+ "eval_mar_small": -1.0,
1434
+ "eval_runtime": 6.8174,
1435
+ "eval_samples_per_second": 22.736,
1436
+ "eval_steps_per_second": 1.467,
1437
+ "step": 546
1438
+ },
1439
+ {
1440
+ "epoch": 7.038461538461538,
1441
+ "grad_norm": 101.46001434326172,
1442
+ "learning_rate": 2.3205128205128207e-05,
1443
+ "loss": 1.7303,
1444
+ "step": 549
1445
+ },
1446
+ {
1447
+ "epoch": 7.076923076923077,
1448
+ "grad_norm": 130.27052307128906,
1449
+ "learning_rate": 2.3333333333333336e-05,
1450
+ "loss": 1.4572,
1451
+ "step": 552
1452
+ },
1453
+ {
1454
+ "epoch": 7.115384615384615,
1455
+ "grad_norm": 40.150543212890625,
1456
+ "learning_rate": 2.3461538461538464e-05,
1457
+ "loss": 1.603,
1458
+ "step": 555
1459
+ },
1460
+ {
1461
+ "epoch": 7.153846153846154,
1462
+ "grad_norm": 55.429046630859375,
1463
+ "learning_rate": 2.358974358974359e-05,
1464
+ "loss": 1.6084,
1465
+ "step": 558
1466
+ },
1467
+ {
1468
+ "epoch": 7.1923076923076925,
1469
+ "grad_norm": 122.35597229003906,
1470
+ "learning_rate": 2.3717948717948718e-05,
1471
+ "loss": 1.5198,
1472
+ "step": 561
1473
+ },
1474
+ {
1475
+ "epoch": 7.230769230769231,
1476
+ "grad_norm": 48.49534225463867,
1477
+ "learning_rate": 2.384615384615385e-05,
1478
+ "loss": 1.2955,
1479
+ "step": 564
1480
+ },
1481
+ {
1482
+ "epoch": 7.269230769230769,
1483
+ "grad_norm": 72.3371810913086,
1484
+ "learning_rate": 2.3974358974358978e-05,
1485
+ "loss": 1.6392,
1486
+ "step": 567
1487
+ },
1488
+ {
1489
+ "epoch": 7.3076923076923075,
1490
+ "grad_norm": 42.39763641357422,
1491
+ "learning_rate": 2.4102564102564103e-05,
1492
+ "loss": 1.5837,
1493
+ "step": 570
1494
+ },
1495
+ {
1496
+ "epoch": 7.346153846153846,
1497
+ "grad_norm": 93.84075927734375,
1498
+ "learning_rate": 2.423076923076923e-05,
1499
+ "loss": 1.707,
1500
+ "step": 573
1501
+ },
1502
+ {
1503
+ "epoch": 7.384615384615385,
1504
+ "grad_norm": 58.59679412841797,
1505
+ "learning_rate": 2.435897435897436e-05,
1506
+ "loss": 1.3053,
1507
+ "step": 576
1508
+ },
1509
+ {
1510
+ "epoch": 7.423076923076923,
1511
+ "grad_norm": 43.61842346191406,
1512
+ "learning_rate": 2.4487179487179488e-05,
1513
+ "loss": 1.3443,
1514
+ "step": 579
1515
+ },
1516
+ {
1517
+ "epoch": 7.461538461538462,
1518
+ "grad_norm": 41.36621856689453,
1519
+ "learning_rate": 2.461538461538462e-05,
1520
+ "loss": 1.555,
1521
+ "step": 582
1522
+ },
1523
+ {
1524
+ "epoch": 7.5,
1525
+ "grad_norm": 37.859886169433594,
1526
+ "learning_rate": 2.4743589743589744e-05,
1527
+ "loss": 1.4498,
1528
+ "step": 585
1529
+ },
1530
+ {
1531
+ "epoch": 7.538461538461538,
1532
+ "grad_norm": 41.65701675415039,
1533
+ "learning_rate": 2.4871794871794873e-05,
1534
+ "loss": 1.7132,
1535
+ "step": 588
1536
+ },
1537
+ {
1538
+ "epoch": 7.576923076923077,
1539
+ "grad_norm": 30.315841674804688,
1540
+ "learning_rate": 2.5e-05,
1541
+ "loss": 1.331,
1542
+ "step": 591
1543
+ },
1544
+ {
1545
+ "epoch": 7.615384615384615,
1546
+ "grad_norm": 91.23014831542969,
1547
+ "learning_rate": 2.512820512820513e-05,
1548
+ "loss": 1.2738,
1549
+ "step": 594
1550
+ },
1551
+ {
1552
+ "epoch": 7.653846153846154,
1553
+ "grad_norm": 220.35211181640625,
1554
+ "learning_rate": 2.5256410256410258e-05,
1555
+ "loss": 1.6383,
1556
+ "step": 597
1557
+ },
1558
+ {
1559
+ "epoch": 7.6923076923076925,
1560
+ "grad_norm": 42.8922233581543,
1561
+ "learning_rate": 2.5384615384615383e-05,
1562
+ "loss": 1.874,
1563
+ "step": 600
1564
+ },
1565
+ {
1566
+ "epoch": 7.730769230769231,
1567
+ "grad_norm": 32.37718963623047,
1568
+ "learning_rate": 2.551282051282051e-05,
1569
+ "loss": 1.6838,
1570
+ "step": 603
1571
+ },
1572
+ {
1573
+ "epoch": 7.769230769230769,
1574
+ "grad_norm": 52.3074836730957,
1575
+ "learning_rate": 2.564102564102564e-05,
1576
+ "loss": 1.6764,
1577
+ "step": 606
1578
+ },
1579
+ {
1580
+ "epoch": 7.8076923076923075,
1581
+ "grad_norm": 51.08807373046875,
1582
+ "learning_rate": 2.5769230769230768e-05,
1583
+ "loss": 1.4344,
1584
+ "step": 609
1585
+ },
1586
+ {
1587
+ "epoch": 7.846153846153846,
1588
+ "grad_norm": 50.850826263427734,
1589
+ "learning_rate": 2.58974358974359e-05,
1590
+ "loss": 1.3824,
1591
+ "step": 612
1592
+ },
1593
+ {
1594
+ "epoch": 7.884615384615385,
1595
+ "grad_norm": 93.5791244506836,
1596
+ "learning_rate": 2.6025641025641028e-05,
1597
+ "loss": 1.4267,
1598
+ "step": 615
1599
+ },
1600
+ {
1601
+ "epoch": 7.923076923076923,
1602
+ "grad_norm": 41.59553146362305,
1603
+ "learning_rate": 2.6153846153846157e-05,
1604
+ "loss": 1.5955,
1605
+ "step": 618
1606
+ },
1607
+ {
1608
+ "epoch": 7.961538461538462,
1609
+ "grad_norm": 70.58924102783203,
1610
+ "learning_rate": 2.6282051282051285e-05,
1611
+ "loss": 1.5326,
1612
+ "step": 621
1613
+ },
1614
+ {
1615
+ "epoch": 8.0,
1616
+ "grad_norm": 82.12455749511719,
1617
+ "learning_rate": 2.6410256410256413e-05,
1618
+ "loss": 1.6845,
1619
+ "step": 624
1620
+ },
1621
+ {
1622
+ "epoch": 8.0,
1623
+ "eval_loss": 1.4424972534179688,
1624
+ "eval_map": 0.0152,
1625
+ "eval_map_50": 0.053,
1626
+ "eval_map_75": 0.005,
1627
+ "eval_map_large": 0.0094,
1628
+ "eval_map_medium": 0.018,
1629
+ "eval_map_per_class": 0.0152,
1630
+ "eval_map_small": -1.0,
1631
+ "eval_mar_1": 0.0473,
1632
+ "eval_mar_10": 0.1992,
1633
+ "eval_mar_100": 0.3797,
1634
+ "eval_mar_100_per_class": 0.3797,
1635
+ "eval_mar_large": 0.4261,
1636
+ "eval_mar_medium": 0.3686,
1637
+ "eval_mar_small": -1.0,
1638
+ "eval_runtime": 6.6267,
1639
+ "eval_samples_per_second": 23.39,
1640
+ "eval_steps_per_second": 1.509,
1641
+ "step": 624
1642
+ }
1643
+ ],
1644
+ "logging_steps": 3,
1645
+ "max_steps": 11700,
1646
+ "num_input_tokens_seen": 0,
1647
+ "num_train_epochs": 150,
1648
+ "save_steps": 500,
1649
+ "stateful_callbacks": {
1650
+ "EarlyStoppingCallback": {
1651
+ "args": {
1652
+ "early_stopping_patience": 5,
1653
+ "early_stopping_threshold": 0.01
1654
+ },
1655
+ "attributes": {
1656
+ "early_stopping_patience_counter": 0
1657
+ }
1658
+ },
1659
+ "TrainerControl": {
1660
+ "args": {
1661
+ "should_epoch_stop": false,
1662
+ "should_evaluate": false,
1663
+ "should_log": false,
1664
+ "should_save": true,
1665
+ "should_training_stop": false
1666
+ },
1667
+ "attributes": {}
1668
+ }
1669
+ },
1670
+ "total_flos": 2.0698845349248e+17,
1671
+ "train_batch_size": 8,
1672
+ "trial_name": null,
1673
+ "trial_params": null
1674
+ }
checkpoint-624/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16e1e869169a03d27d9d15701c3dea3d021838c8ac85311ea63b5f5600627c0
3
+ size 5176
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hustvl/yolos-tiny",
3
+ "architectures": [
4
+ "YolosForObjectDetection"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auxiliary_loss": false,
8
+ "bbox_cost": 5,
9
+ "bbox_loss_coefficient": 5,
10
+ "class_cost": 1,
11
+ "eos_coefficient": 0.1,
12
+ "giou_cost": 2,
13
+ "giou_loss_coefficient": 2,
14
+ "hidden_act": "gelu",
15
+ "hidden_dropout_prob": 0.0,
16
+ "hidden_size": 192,
17
+ "id2label": {
18
+ "0": 0
19
+ },
20
+ "image_size": [
21
+ 800,
22
+ 1333
23
+ ],
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 768,
26
+ "label2id": {
27
+ "0": 0
28
+ },
29
+ "layer_norm_eps": 1e-12,
30
+ "model_type": "yolos",
31
+ "num_attention_heads": 3,
32
+ "num_channels": 3,
33
+ "num_detection_tokens": 100,
34
+ "num_hidden_layers": 12,
35
+ "patch_size": 16,
36
+ "qkv_bias": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.41.0",
39
+ "use_mid_position_embeddings": false
40
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728be27ebde033c9db82f536d83740166daf3ba9e0139639469ccc1618c81263
3
+ size 25909400
preprocessor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "annotations",
5
+ "return_segmentation_masks",
6
+ "masks_path",
7
+ "do_resize",
8
+ "size",
9
+ "resample",
10
+ "do_rescale",
11
+ "rescale_factor",
12
+ "do_normalize",
13
+ "image_mean",
14
+ "image_std",
15
+ "do_convert_annotations",
16
+ "do_pad",
17
+ "format",
18
+ "return_tensors",
19
+ "data_format",
20
+ "input_data_format"
21
+ ],
22
+ "do_convert_annotations": true,
23
+ "do_normalize": true,
24
+ "do_pad": false,
25
+ "do_rescale": true,
26
+ "do_resize": false,
27
+ "format": "coco_detection",
28
+ "image_mean": [
29
+ 0.485,
30
+ 0.456,
31
+ 0.406
32
+ ],
33
+ "image_processor_type": "YolosImageProcessor",
34
+ "image_std": [
35
+ 0.229,
36
+ 0.224,
37
+ 0.225
38
+ ],
39
+ "resample": 2,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "size": {
42
+ "longest_edge": 600
43
+ }
44
+ }
runs/Jun07_08-29-33_r-to-be-autotrain-advanced-bh4wdktd-bb651-htfxj/events.out.tfevents.1717748974.r-to-be-autotrain-advanced-bh4wdktd-bb651-htfxj.151.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc22242577c6c816c1cf0c59b2b0c93079705f00dfcd8d18321a0705c2866f4b
3
- size 5100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae35758ff054fa6c7f90c8d0a59e1fe113b49ec05b7f6ff1e4439b9771bc5df
3
+ size 89663
runs/Jun07_08-29-33_r-to-be-autotrain-advanced-bh4wdktd-bb651-htfxj/events.out.tfevents.1717749412.r-to-be-autotrain-advanced-bh4wdktd-bb651-htfxj.151.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eba35445d41cdc65491c3d28431c095750c2374b9a59edd5c98bc6fca406de39
3
+ size 1094
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16e1e869169a03d27d9d15701c3dea3d021838c8ac85311ea63b5f5600627c0
3
+ size 5176
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-signatures-yolos-tiny-v4/autotrain-data",
3
+ "model": "hustvl/yolos-tiny",
4
+ "username": "to-be",
5
+ "lr": 5e-05,
6
+ "epochs": 150,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "logging_steps": -1,
18
+ "project_name": "autotrain-signatures-yolos-tiny-v4",
19
+ "auto_find_batch_size": false,
20
+ "mixed_precision": "fp16",
21
+ "save_total_limit": 1,
22
+ "push_to_hub": true,
23
+ "evaluation_strategy": "epoch",
24
+ "image_column": "autotrain_image",
25
+ "objects_column": "autotrain_objects",
26
+ "log": "tensorboard",
27
+ "image_square_size": 600,
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }