DiTo97 commited on
Commit
2d59e33
1 Parent(s): b6db9d7

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: microsoft/swinv2-base-patch4-window8-256
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - generator
@@ -15,11 +17,11 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # swinv2-base-panorama-IQA
17
 
18
- This model is a fine-tuned version of [microsoft/swinv2-base-patch4-window8-256](https://huggingface.co/microsoft/swinv2-base-patch4-window8-256) on the generator dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.0312
21
  - Srocc: 0.1132
22
- - Lcc: 0.1584
23
 
24
  ## Model description
25
 
 
2
  license: apache-2.0
3
  base_model: microsoft/swinv2-base-patch4-window8-256
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - generator
 
17
 
18
  # swinv2-base-panorama-IQA
19
 
20
+ This model is a fine-tuned version of [microsoft/swinv2-base-patch4-window8-256](https://huggingface.co/microsoft/swinv2-base-patch4-window8-256) on the isiqa-2019-hf dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0312
23
  - Srocc: 0.1132
24
+ - Lcc: 0.1583
25
 
26
  ## Model description
27
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 26.0,
3
- "eval_LCC": 0.17734737426317984,
4
- "eval_SROCC": 0.08955582232893158,
5
- "eval_loss": 0.02460244856774807,
6
- "eval_runtime": 37.0604,
7
- "eval_samples_per_second": 1.349,
8
- "eval_steps_per_second": 0.054,
9
- "total_flos": 2.2813585220126638e+18,
10
- "train_loss": 0.041763259517540646,
11
- "train_runtime": 5114.8643,
12
- "train_samples_per_second": 2.092,
13
- "train_steps_per_second": 0.029
14
  }
 
1
  {
2
+ "epoch": 42.857142857142854,
3
+ "eval_LCC": 0.15832038036938517,
4
+ "eval_SROCC": 0.11318127250900359,
5
+ "eval_loss": 0.031172048300504684,
6
+ "eval_runtime": 39.8118,
7
+ "eval_samples_per_second": 1.256,
8
+ "eval_steps_per_second": 0.05,
9
+ "total_flos": 3.763995548539945e+18,
10
+ "train_loss": 0.029827568009495736,
11
+ "train_runtime": 8529.9763,
12
+ "train_samples_per_second": 1.254,
13
+ "train_steps_per_second": 0.018
14
  }
runs/Aug03_09-00-43_c969d6f45fba/events.out.tfevents.1722684228.c969d6f45fba.52.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9dfb99a2f307330fe59856fe1193ef7e13d27cfb673d9ff79182f86c9dabad
3
+ size 455
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 26.0,
3
- "eval_LCC": 0.17734737426317984,
4
- "eval_SROCC": 0.08955582232893158,
5
- "eval_loss": 0.02460244856774807,
6
- "eval_runtime": 37.0604,
7
- "eval_samples_per_second": 1.349,
8
- "eval_steps_per_second": 0.054
9
  }
 
1
  {
2
+ "epoch": 42.857142857142854,
3
+ "eval_LCC": 0.15832038036938517,
4
+ "eval_SROCC": 0.11318127250900359,
5
+ "eval_loss": 0.031172048300504684,
6
+ "eval_runtime": 39.8118,
7
+ "eval_samples_per_second": 1.256,
8
+ "eval_steps_per_second": 0.05
9
  }
trainer_state.json CHANGED
@@ -1,344 +1,556 @@
1
  {
2
- "best_metric": 0.02460244856774807,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-73",
4
- "epoch": 26.0,
5
  "eval_steps": 500,
6
- "global_step": 91,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.8571428571428571,
13
- "eval_LCC": -0.13996786173466005,
14
- "eval_SROCC": -0.1660984393757503,
15
- "eval_loss": 0.2684723138809204,
16
- "eval_runtime": 39.3373,
17
- "eval_samples_per_second": 1.271,
18
- "eval_steps_per_second": 0.051,
19
  "step": 3
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_LCC": -0.13191836249511346,
24
- "eval_SROCC": -0.20710684273709484,
25
- "eval_loss": 0.06745556741952896,
26
- "eval_runtime": 37.0134,
27
- "eval_samples_per_second": 1.351,
28
- "eval_steps_per_second": 0.054,
29
  "step": 7
30
  },
31
  {
32
  "epoch": 2.857142857142857,
33
- "grad_norm": 5.974637508392334,
34
- "learning_rate": 1.3333333333333333e-05,
35
- "loss": 0.223,
36
  "step": 10
37
  },
38
  {
39
  "epoch": 2.857142857142857,
40
- "eval_LCC": -0.11444761651756143,
41
- "eval_SROCC": -0.19721488595438177,
42
- "eval_loss": 0.13801459968090057,
43
- "eval_runtime": 37.041,
44
- "eval_samples_per_second": 1.35,
45
- "eval_steps_per_second": 0.054,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 4.0,
50
- "eval_LCC": -0.11619739343449043,
51
- "eval_SROCC": -0.23620648259303723,
52
- "eval_loss": 0.0638759583234787,
53
- "eval_runtime": 37.3454,
54
- "eval_samples_per_second": 1.339,
55
- "eval_steps_per_second": 0.054,
56
  "step": 14
57
  },
58
  {
59
  "epoch": 4.857142857142857,
60
- "eval_LCC": -0.1097281268596262,
61
- "eval_SROCC": -0.17599039615846337,
62
- "eval_loss": 0.06009223312139511,
63
- "eval_runtime": 39.2582,
64
- "eval_samples_per_second": 1.274,
65
- "eval_steps_per_second": 0.051,
66
  "step": 17
67
  },
68
  {
69
  "epoch": 5.714285714285714,
70
- "grad_norm": 1.5656846761703491,
71
- "learning_rate": 1.925925925925926e-05,
72
- "loss": 0.0607,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 6.0,
77
- "eval_LCC": -0.08523254844178266,
78
- "eval_SROCC": -0.12902761104441776,
79
- "eval_loss": 0.06266126781702042,
80
- "eval_runtime": 37.4159,
81
- "eval_samples_per_second": 1.336,
82
- "eval_steps_per_second": 0.053,
83
  "step": 21
84
  },
85
  {
86
  "epoch": 6.857142857142857,
87
- "eval_LCC": -0.07908973191513438,
88
- "eval_SROCC": -0.10501800720288115,
89
- "eval_loss": 0.054282378405332565,
90
- "eval_runtime": 39.9947,
91
- "eval_samples_per_second": 1.25,
92
- "eval_steps_per_second": 0.05,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_LCC": -0.07022943984845728,
98
- "eval_SROCC": -0.0683313325330132,
99
- "eval_loss": 0.04083505645394325,
100
- "eval_runtime": 39.4734,
101
- "eval_samples_per_second": 1.267,
102
- "eval_steps_per_second": 0.051,
103
  "step": 28
104
  },
105
  {
106
  "epoch": 8.571428571428571,
107
- "grad_norm": 0.6326273679733276,
108
- "learning_rate": 1.7777777777777777e-05,
109
- "loss": 0.0212,
110
  "step": 30
111
  },
112
  {
113
  "epoch": 8.857142857142858,
114
- "eval_LCC": -0.05666279490414187,
115
- "eval_SROCC": -0.06919567827130851,
116
- "eval_loss": 0.04194454103708267,
117
- "eval_runtime": 37.518,
118
- "eval_samples_per_second": 1.333,
119
- "eval_steps_per_second": 0.053,
120
  "step": 31
121
  },
122
  {
123
  "epoch": 10.0,
124
- "eval_LCC": -0.02743218726796948,
125
- "eval_SROCC": -0.037022809123649456,
126
- "eval_loss": 0.03434378281235695,
127
- "eval_runtime": 37.3074,
128
- "eval_samples_per_second": 1.34,
129
- "eval_steps_per_second": 0.054,
130
  "step": 35
131
  },
132
  {
133
  "epoch": 10.857142857142858,
134
- "eval_LCC": -0.0012650189550020947,
135
- "eval_SROCC": -0.033949579831932766,
136
- "eval_loss": 0.03074028715491295,
137
- "eval_runtime": 39.2094,
138
- "eval_samples_per_second": 1.275,
139
- "eval_steps_per_second": 0.051,
140
  "step": 38
141
  },
142
  {
143
  "epoch": 11.428571428571429,
144
- "grad_norm": 0.3264749348163605,
145
- "learning_rate": 1.6296296296296297e-05,
146
- "loss": 0.0168,
147
  "step": 40
148
  },
149
  {
150
  "epoch": 12.0,
151
- "eval_LCC": 0.02330881609272888,
152
- "eval_SROCC": -0.02809123649459784,
153
- "eval_loss": 0.029941115528345108,
154
- "eval_runtime": 39.1241,
155
- "eval_samples_per_second": 1.278,
156
- "eval_steps_per_second": 0.051,
157
  "step": 42
158
  },
159
  {
160
  "epoch": 12.857142857142858,
161
- "eval_LCC": 0.03261216335612809,
162
- "eval_SROCC": -0.042785114045618244,
163
- "eval_loss": 0.03004513680934906,
164
- "eval_runtime": 36.9998,
165
- "eval_samples_per_second": 1.351,
166
- "eval_steps_per_second": 0.054,
167
  "step": 45
168
  },
169
  {
170
  "epoch": 14.0,
171
- "eval_LCC": 0.051745647526359385,
172
- "eval_SROCC": -0.02376950780312125,
173
- "eval_loss": 0.028606927022337914,
174
- "eval_runtime": 36.9029,
175
- "eval_samples_per_second": 1.355,
176
- "eval_steps_per_second": 0.054,
177
  "step": 49
178
  },
179
  {
180
  "epoch": 14.285714285714286,
181
- "grad_norm": 0.6258419156074524,
182
- "learning_rate": 1.4814814814814815e-05,
183
- "loss": 0.0143,
184
  "step": 50
185
  },
186
  {
187
  "epoch": 14.857142857142858,
188
- "eval_LCC": 0.06012754354341758,
189
- "eval_SROCC": -0.018583433373349337,
190
- "eval_loss": 0.028338493779301643,
191
- "eval_runtime": 39.2004,
192
- "eval_samples_per_second": 1.275,
193
  "eval_steps_per_second": 0.051,
194
  "step": 52
195
  },
196
  {
197
  "epoch": 16.0,
198
- "eval_LCC": 0.08678963760193395,
199
- "eval_SROCC": -0.0024489795918367346,
200
- "eval_loss": 0.027331581339240074,
201
- "eval_runtime": 39.1787,
202
- "eval_samples_per_second": 1.276,
203
  "eval_steps_per_second": 0.051,
204
  "step": 56
205
  },
206
  {
207
  "epoch": 16.857142857142858,
208
- "eval_LCC": 0.11189936135943072,
209
- "eval_SROCC": 0.028283313325330132,
210
- "eval_loss": 0.02574434131383896,
211
- "eval_runtime": 37.6775,
212
- "eval_samples_per_second": 1.327,
213
- "eval_steps_per_second": 0.053,
214
  "step": 59
215
  },
216
  {
217
  "epoch": 17.142857142857142,
218
- "grad_norm": 0.2967870831489563,
219
- "learning_rate": 1.3333333333333333e-05,
220
- "loss": 0.013,
221
  "step": 60
222
  },
223
  {
224
  "epoch": 18.0,
225
- "eval_LCC": 0.140408573006196,
226
- "eval_SROCC": 0.05421368547418968,
227
- "eval_loss": 0.024704232811927795,
228
- "eval_runtime": 39.5048,
229
- "eval_samples_per_second": 1.266,
230
  "eval_steps_per_second": 0.051,
231
  "step": 63
232
  },
233
  {
234
  "epoch": 18.857142857142858,
235
- "eval_LCC": 0.15329553575685126,
236
- "eval_SROCC": 0.07025210084033613,
237
- "eval_loss": 0.0247227493673563,
238
- "eval_runtime": 37.5863,
239
- "eval_samples_per_second": 1.33,
240
- "eval_steps_per_second": 0.053,
241
  "step": 66
242
  },
243
  {
244
  "epoch": 20.0,
245
- "grad_norm": 0.35133129358291626,
246
- "learning_rate": 1.1851851851851852e-05,
247
- "loss": 0.0111,
248
  "step": 70
249
  },
250
  {
251
  "epoch": 20.0,
252
- "eval_LCC": 0.16704999475534,
253
- "eval_SROCC": 0.08004801920768306,
254
- "eval_loss": 0.02460792474448681,
255
- "eval_runtime": 39.6008,
256
- "eval_samples_per_second": 1.263,
257
- "eval_steps_per_second": 0.051,
258
  "step": 70
259
  },
260
  {
261
  "epoch": 20.857142857142858,
262
- "eval_LCC": 0.17734737426317984,
263
- "eval_SROCC": 0.08955582232893158,
264
- "eval_loss": 0.02460244856774807,
265
- "eval_runtime": 39.3838,
266
- "eval_samples_per_second": 1.27,
267
- "eval_steps_per_second": 0.051,
268
  "step": 73
269
  },
270
  {
271
  "epoch": 22.0,
272
- "eval_LCC": 0.183477067706457,
273
- "eval_SROCC": 0.09983193277310923,
274
- "eval_loss": 0.025651078671216965,
275
- "eval_runtime": 37.2011,
276
- "eval_samples_per_second": 1.344,
277
- "eval_steps_per_second": 0.054,
278
  "step": 77
279
  },
280
  {
281
  "epoch": 22.857142857142858,
282
- "grad_norm": 0.3938016891479492,
283
- "learning_rate": 1.037037037037037e-05,
284
- "loss": 0.0104,
285
  "step": 80
286
  },
287
  {
288
  "epoch": 22.857142857142858,
289
- "eval_LCC": 0.1943336496302965,
290
- "eval_SROCC": 0.10165666266506602,
291
- "eval_loss": 0.02549559995532036,
292
- "eval_runtime": 39.3404,
293
- "eval_samples_per_second": 1.271,
294
- "eval_steps_per_second": 0.051,
295
  "step": 80
296
  },
297
  {
298
  "epoch": 24.0,
299
- "eval_LCC": 0.20850983626278138,
300
- "eval_SROCC": 0.11490996398559422,
301
- "eval_loss": 0.02545199543237686,
302
- "eval_runtime": 39.5182,
303
- "eval_samples_per_second": 1.265,
304
- "eval_steps_per_second": 0.051,
305
  "step": 84
306
  },
307
  {
308
  "epoch": 24.857142857142858,
309
- "eval_LCC": 0.21549093905447098,
310
- "eval_SROCC": 0.12451380552220888,
311
- "eval_loss": 0.025542089715600014,
312
- "eval_runtime": 37.4091,
313
- "eval_samples_per_second": 1.337,
314
- "eval_steps_per_second": 0.053,
315
  "step": 87
316
  },
317
  {
318
  "epoch": 25.714285714285715,
319
- "grad_norm": 0.5060675740242004,
320
- "learning_rate": 8.888888888888888e-06,
321
- "loss": 0.0088,
322
  "step": 90
323
  },
324
  {
325
  "epoch": 26.0,
326
- "eval_LCC": 0.22575900814493188,
327
- "eval_SROCC": 0.13190876350540215,
328
- "eval_loss": 0.026240630075335503,
329
- "eval_runtime": 38.9035,
330
  "eval_samples_per_second": 1.285,
331
  "eval_steps_per_second": 0.051,
332
  "step": 91
333
  },
334
  {
335
- "epoch": 26.0,
336
- "step": 91,
337
- "total_flos": 2.2813585220126638e+18,
338
- "train_loss": 0.041763259517540646,
339
- "train_runtime": 5114.8643,
340
- "train_samples_per_second": 2.092,
341
- "train_steps_per_second": 0.029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  }
343
  ],
344
  "logging_steps": 10,
@@ -367,7 +579,7 @@
367
  "attributes": {}
368
  }
369
  },
370
- "total_flos": 2.2813585220126638e+18,
371
  "train_batch_size": 16,
372
  "trial_name": null,
373
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.031172048300504684,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-140",
4
+ "epoch": 42.857142857142854,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.8571428571428571,
13
+ "eval_LCC": -0.13924632211285398,
14
+ "eval_SROCC": -0.16677070828331333,
15
+ "eval_loss": 0.30208367109298706,
16
+ "eval_runtime": 40.3225,
17
+ "eval_samples_per_second": 1.24,
18
+ "eval_steps_per_second": 0.05,
19
  "step": 3
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_LCC": -0.13474898479193245,
24
+ "eval_SROCC": -0.1807923169267707,
25
+ "eval_loss": 0.12863630056381226,
26
+ "eval_runtime": 40.2585,
27
+ "eval_samples_per_second": 1.242,
28
+ "eval_steps_per_second": 0.05,
29
  "step": 7
30
  },
31
  {
32
  "epoch": 2.857142857142857,
33
+ "grad_norm": 2.176586151123047,
34
+ "learning_rate": 6.666666666666667e-06,
35
+ "loss": 0.2494,
36
  "step": 10
37
  },
38
  {
39
  "epoch": 2.857142857142857,
40
+ "eval_LCC": -0.1273026992962787,
41
+ "eval_SROCC": -0.17839135654261706,
42
+ "eval_loss": 0.06783520430326462,
43
+ "eval_runtime": 38.6817,
44
+ "eval_samples_per_second": 1.293,
45
+ "eval_steps_per_second": 0.052,
46
  "step": 10
47
  },
48
  {
49
  "epoch": 4.0,
50
+ "eval_LCC": -0.11136535393072287,
51
+ "eval_SROCC": -0.16254501800720286,
52
+ "eval_loss": 0.1143360510468483,
53
+ "eval_runtime": 40.2563,
54
+ "eval_samples_per_second": 1.242,
55
+ "eval_steps_per_second": 0.05,
56
  "step": 14
57
  },
58
  {
59
  "epoch": 4.857142857142857,
60
+ "eval_LCC": -0.11520350547892241,
61
+ "eval_SROCC": -0.19394957983193276,
62
+ "eval_loss": 0.0686483308672905,
63
+ "eval_runtime": 41.6029,
64
+ "eval_samples_per_second": 1.202,
65
+ "eval_steps_per_second": 0.048,
66
  "step": 17
67
  },
68
  {
69
  "epoch": 5.714285714285714,
70
+ "grad_norm": 1.4751098155975342,
71
+ "learning_rate": 9.966191788709716e-06,
72
+ "loss": 0.069,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 6.0,
77
+ "eval_LCC": -0.13762820684113933,
78
+ "eval_SROCC": -0.2063385354141657,
79
+ "eval_loss": 0.05720577389001846,
80
+ "eval_runtime": 40.3372,
81
+ "eval_samples_per_second": 1.24,
82
+ "eval_steps_per_second": 0.05,
83
  "step": 21
84
  },
85
  {
86
  "epoch": 6.857142857142857,
87
+ "eval_LCC": -0.14053110609303213,
88
+ "eval_SROCC": -0.19654261704681872,
89
+ "eval_loss": 0.05371831730008125,
90
+ "eval_runtime": 38.7274,
91
+ "eval_samples_per_second": 1.291,
92
+ "eval_steps_per_second": 0.052,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_LCC": -0.1289209958563034,
98
+ "eval_SROCC": -0.17944777911164464,
99
+ "eval_loss": 0.06708792597055435,
100
+ "eval_runtime": 39.9307,
101
+ "eval_samples_per_second": 1.252,
102
+ "eval_steps_per_second": 0.05,
103
  "step": 28
104
  },
105
  {
106
  "epoch": 8.571428571428571,
107
+ "grad_norm": 1.0116759538650513,
108
+ "learning_rate": 9.698463103929542e-06,
109
+ "loss": 0.0276,
110
  "step": 30
111
  },
112
  {
113
  "epoch": 8.857142857142858,
114
+ "eval_LCC": -0.11639993851566689,
115
+ "eval_SROCC": -0.14429771908763506,
116
+ "eval_loss": 0.05505238473415375,
117
+ "eval_runtime": 40.7189,
118
+ "eval_samples_per_second": 1.228,
119
+ "eval_steps_per_second": 0.049,
120
  "step": 31
121
  },
122
  {
123
  "epoch": 10.0,
124
+ "eval_LCC": -0.09483350559637191,
125
+ "eval_SROCC": -0.11097238895558223,
126
+ "eval_loss": 0.04916208237409592,
127
+ "eval_runtime": 39.8785,
128
+ "eval_samples_per_second": 1.254,
129
+ "eval_steps_per_second": 0.05,
130
  "step": 35
131
  },
132
  {
133
  "epoch": 10.857142857142858,
134
+ "eval_LCC": -0.07673019260109643,
135
+ "eval_SROCC": -0.09445378151260504,
136
+ "eval_loss": 0.04647849500179291,
137
+ "eval_runtime": 40.0012,
138
+ "eval_samples_per_second": 1.25,
139
+ "eval_steps_per_second": 0.05,
140
  "step": 38
141
  },
142
  {
143
  "epoch": 11.428571428571429,
144
+ "grad_norm": 0.6633228063583374,
145
+ "learning_rate": 9.177439057064684e-06,
146
+ "loss": 0.0181,
147
  "step": 40
148
  },
149
  {
150
  "epoch": 12.0,
151
+ "eval_LCC": -0.04636396970926032,
152
+ "eval_SROCC": -0.0830252100840336,
153
+ "eval_loss": 0.04492847993969917,
154
+ "eval_runtime": 40.5111,
155
+ "eval_samples_per_second": 1.234,
156
+ "eval_steps_per_second": 0.049,
157
  "step": 42
158
  },
159
  {
160
  "epoch": 12.857142857142858,
161
+ "eval_LCC": -0.028021486790691955,
162
+ "eval_SROCC": -0.06593037214885954,
163
+ "eval_loss": 0.04024951532483101,
164
+ "eval_runtime": 39.9421,
165
+ "eval_samples_per_second": 1.252,
166
+ "eval_steps_per_second": 0.05,
167
  "step": 45
168
  },
169
  {
170
  "epoch": 14.0,
171
+ "eval_LCC": -0.011749661725862606,
172
+ "eval_SROCC": -0.041056422569027605,
173
+ "eval_loss": 0.038896750658750534,
174
+ "eval_runtime": 38.1309,
175
+ "eval_samples_per_second": 1.311,
176
+ "eval_steps_per_second": 0.052,
177
  "step": 49
178
  },
179
  {
180
  "epoch": 14.285714285714286,
181
+ "grad_norm": 0.8275535106658936,
182
+ "learning_rate": 8.43120818934367e-06,
183
+ "loss": 0.0128,
184
  "step": 50
185
  },
186
  {
187
  "epoch": 14.857142857142858,
188
+ "eval_LCC": -0.005515563619600932,
189
+ "eval_SROCC": -0.03481392557022809,
190
+ "eval_loss": 0.03803449496626854,
191
+ "eval_runtime": 39.4857,
192
+ "eval_samples_per_second": 1.266,
193
  "eval_steps_per_second": 0.051,
194
  "step": 52
195
  },
196
  {
197
  "epoch": 16.0,
198
+ "eval_LCC": 0.008753274292948681,
199
+ "eval_SROCC": -0.023193277310924368,
200
+ "eval_loss": 0.037131380289793015,
201
+ "eval_runtime": 39.5889,
202
+ "eval_samples_per_second": 1.263,
203
  "eval_steps_per_second": 0.051,
204
  "step": 56
205
  },
206
  {
207
  "epoch": 16.857142857142858,
208
+ "eval_LCC": 0.020548187715507097,
209
+ "eval_SROCC": 0.00475390156062425,
210
+ "eval_loss": 0.03600911796092987,
211
+ "eval_runtime": 38.3606,
212
+ "eval_samples_per_second": 1.303,
213
+ "eval_steps_per_second": 0.052,
214
  "step": 59
215
  },
216
  {
217
  "epoch": 17.142857142857142,
218
+ "grad_norm": 0.48783865571022034,
219
+ "learning_rate": 7.500000000000001e-06,
220
+ "loss": 0.0112,
221
  "step": 60
222
  },
223
  {
224
  "epoch": 18.0,
225
+ "eval_LCC": 0.038465536486787584,
226
+ "eval_SROCC": 0.01282112845138055,
227
+ "eval_loss": 0.03541974350810051,
228
+ "eval_runtime": 39.3983,
229
+ "eval_samples_per_second": 1.269,
230
  "eval_steps_per_second": 0.051,
231
  "step": 63
232
  },
233
  {
234
  "epoch": 18.857142857142858,
235
+ "eval_LCC": 0.05092487532044425,
236
+ "eval_SROCC": 0.019735894357743092,
237
+ "eval_loss": 0.03518449887633324,
238
+ "eval_runtime": 38.2131,
239
+ "eval_samples_per_second": 1.308,
240
+ "eval_steps_per_second": 0.052,
241
  "step": 66
242
  },
243
  {
244
  "epoch": 20.0,
245
+ "grad_norm": 0.5530023574829102,
246
+ "learning_rate": 6.434016163555452e-06,
247
+ "loss": 0.0088,
248
  "step": 70
249
  },
250
  {
251
  "epoch": 20.0,
252
+ "eval_LCC": 0.06701747831478003,
253
+ "eval_SROCC": 0.03308523409363745,
254
+ "eval_loss": 0.03455502539873123,
255
+ "eval_runtime": 37.5917,
256
+ "eval_samples_per_second": 1.33,
257
+ "eval_steps_per_second": 0.053,
258
  "step": 70
259
  },
260
  {
261
  "epoch": 20.857142857142858,
262
+ "eval_LCC": 0.08014093003987598,
263
+ "eval_SROCC": 0.04124849939975991,
264
+ "eval_loss": 0.033747877925634384,
265
+ "eval_runtime": 37.8795,
266
+ "eval_samples_per_second": 1.32,
267
+ "eval_steps_per_second": 0.053,
268
  "step": 73
269
  },
270
  {
271
  "epoch": 22.0,
272
+ "eval_LCC": 0.08790653267137535,
273
+ "eval_SROCC": 0.039615846338535404,
274
+ "eval_loss": 0.034678902477025986,
275
+ "eval_runtime": 40.032,
276
+ "eval_samples_per_second": 1.249,
277
+ "eval_steps_per_second": 0.05,
278
  "step": 77
279
  },
280
  {
281
  "epoch": 22.857142857142858,
282
+ "grad_norm": 0.45388710498809814,
283
+ "learning_rate": 5.290724144552379e-06,
284
+ "loss": 0.008,
285
  "step": 80
286
  },
287
  {
288
  "epoch": 22.857142857142858,
289
+ "eval_LCC": 0.09537098486730576,
290
+ "eval_SROCC": 0.051236494597839126,
291
+ "eval_loss": 0.034752532839775085,
292
+ "eval_runtime": 38.2511,
293
+ "eval_samples_per_second": 1.307,
294
+ "eval_steps_per_second": 0.052,
295
  "step": 80
296
  },
297
  {
298
  "epoch": 24.0,
299
+ "eval_LCC": 0.10710952739103984,
300
+ "eval_SROCC": 0.06429771908763504,
301
+ "eval_loss": 0.0338628776371479,
302
+ "eval_runtime": 37.9231,
303
+ "eval_samples_per_second": 1.318,
304
+ "eval_steps_per_second": 0.053,
305
  "step": 84
306
  },
307
  {
308
  "epoch": 24.857142857142858,
309
+ "eval_LCC": 0.11431013947231683,
310
+ "eval_SROCC": 0.07649459783913565,
311
+ "eval_loss": 0.03316153585910797,
312
+ "eval_runtime": 39.8963,
313
+ "eval_samples_per_second": 1.253,
314
+ "eval_steps_per_second": 0.05,
315
  "step": 87
316
  },
317
  {
318
  "epoch": 25.714285714285715,
319
+ "grad_norm": 0.3282585144042969,
320
+ "learning_rate": 4.131759111665349e-06,
321
+ "loss": 0.0066,
322
  "step": 90
323
  },
324
  {
325
  "epoch": 26.0,
326
+ "eval_LCC": 0.12399887441297865,
327
+ "eval_SROCC": 0.08552220888355341,
328
+ "eval_loss": 0.03337186202406883,
329
+ "eval_runtime": 38.9243,
330
  "eval_samples_per_second": 1.285,
331
  "eval_steps_per_second": 0.051,
332
  "step": 91
333
  },
334
  {
335
+ "epoch": 26.857142857142858,
336
+ "eval_LCC": 0.12917364004592166,
337
+ "eval_SROCC": 0.09378151260504201,
338
+ "eval_loss": 0.032974038273096085,
339
+ "eval_runtime": 38.6081,
340
+ "eval_samples_per_second": 1.295,
341
+ "eval_steps_per_second": 0.052,
342
+ "step": 94
343
+ },
344
+ {
345
+ "epoch": 28.0,
346
+ "eval_LCC": 0.13808917019413902,
347
+ "eval_SROCC": 0.09973589435774309,
348
+ "eval_loss": 0.03173243626952171,
349
+ "eval_runtime": 40.6852,
350
+ "eval_samples_per_second": 1.229,
351
+ "eval_steps_per_second": 0.049,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 28.571428571428573,
356
+ "grad_norm": 0.3081737458705902,
357
+ "learning_rate": 3.019601169804216e-06,
358
+ "loss": 0.006,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 28.857142857142858,
363
+ "eval_LCC": 0.14324069262154604,
364
+ "eval_SROCC": 0.1086674669867947,
365
+ "eval_loss": 0.031397391110658646,
366
+ "eval_runtime": 40.3292,
367
+ "eval_samples_per_second": 1.24,
368
+ "eval_steps_per_second": 0.05,
369
+ "step": 101
370
+ },
371
+ {
372
+ "epoch": 30.0,
373
+ "eval_LCC": 0.14463811773223376,
374
+ "eval_SROCC": 0.10530612244897959,
375
+ "eval_loss": 0.03169296309351921,
376
+ "eval_runtime": 38.2368,
377
+ "eval_samples_per_second": 1.308,
378
+ "eval_steps_per_second": 0.052,
379
+ "step": 105
380
+ },
381
+ {
382
+ "epoch": 30.857142857142858,
383
+ "eval_LCC": 0.14653245871689255,
384
+ "eval_SROCC": 0.09714285714285714,
385
+ "eval_loss": 0.0316772386431694,
386
+ "eval_runtime": 39.419,
387
+ "eval_samples_per_second": 1.268,
388
+ "eval_steps_per_second": 0.051,
389
+ "step": 108
390
+ },
391
+ {
392
+ "epoch": 31.428571428571427,
393
+ "grad_norm": 0.286915123462677,
394
+ "learning_rate": 2.0142070414860704e-06,
395
+ "loss": 0.0062,
396
+ "step": 110
397
+ },
398
+ {
399
+ "epoch": 32.0,
400
+ "eval_LCC": 0.14957484084767725,
401
+ "eval_SROCC": 0.10319327731092437,
402
+ "eval_loss": 0.03150345757603645,
403
+ "eval_runtime": 37.6067,
404
+ "eval_samples_per_second": 1.33,
405
+ "eval_steps_per_second": 0.053,
406
+ "step": 112
407
+ },
408
+ {
409
+ "epoch": 32.857142857142854,
410
+ "eval_LCC": 0.15114563767181508,
411
+ "eval_SROCC": 0.10319327731092437,
412
+ "eval_loss": 0.031500138342380524,
413
+ "eval_runtime": 39.4534,
414
+ "eval_samples_per_second": 1.267,
415
+ "eval_steps_per_second": 0.051,
416
+ "step": 115
417
+ },
418
+ {
419
+ "epoch": 34.0,
420
+ "eval_LCC": 0.15328812324811011,
421
+ "eval_SROCC": 0.10319327731092437,
422
+ "eval_loss": 0.03136735409498215,
423
+ "eval_runtime": 39.3064,
424
+ "eval_samples_per_second": 1.272,
425
+ "eval_steps_per_second": 0.051,
426
+ "step": 119
427
+ },
428
+ {
429
+ "epoch": 34.285714285714285,
430
+ "grad_norm": 0.290955513715744,
431
+ "learning_rate": 1.1697777844051105e-06,
432
+ "loss": 0.0057,
433
+ "step": 120
434
+ },
435
+ {
436
+ "epoch": 34.857142857142854,
437
+ "eval_LCC": 0.15433310778646528,
438
+ "eval_SROCC": 0.10943577430972388,
439
+ "eval_loss": 0.03135786950588226,
440
+ "eval_runtime": 39.8429,
441
+ "eval_samples_per_second": 1.255,
442
+ "eval_steps_per_second": 0.05,
443
+ "step": 122
444
+ },
445
+ {
446
+ "epoch": 36.0,
447
+ "eval_LCC": 0.15576240657153692,
448
+ "eval_SROCC": 0.10905162064825931,
449
+ "eval_loss": 0.03134962171316147,
450
+ "eval_runtime": 37.8477,
451
+ "eval_samples_per_second": 1.321,
452
+ "eval_steps_per_second": 0.053,
453
+ "step": 126
454
+ },
455
+ {
456
+ "epoch": 36.857142857142854,
457
+ "eval_LCC": 0.15697894936703244,
458
+ "eval_SROCC": 0.11318127250900359,
459
+ "eval_loss": 0.03124266117811203,
460
+ "eval_runtime": 38.2997,
461
+ "eval_samples_per_second": 1.305,
462
+ "eval_steps_per_second": 0.052,
463
+ "step": 129
464
+ },
465
+ {
466
+ "epoch": 37.142857142857146,
467
+ "grad_norm": 0.33939629793167114,
468
+ "learning_rate": 5.318367983829393e-07,
469
+ "loss": 0.006,
470
+ "step": 130
471
+ },
472
+ {
473
+ "epoch": 38.0,
474
+ "eval_LCC": 0.15766699322349262,
475
+ "eval_SROCC": 0.11318127250900359,
476
+ "eval_loss": 0.031217649579048157,
477
+ "eval_runtime": 40.1834,
478
+ "eval_samples_per_second": 1.244,
479
+ "eval_steps_per_second": 0.05,
480
+ "step": 133
481
+ },
482
+ {
483
+ "epoch": 38.857142857142854,
484
+ "eval_LCC": 0.15808379809146395,
485
+ "eval_SROCC": 0.11318127250900359,
486
+ "eval_loss": 0.031192703172564507,
487
+ "eval_runtime": 40.4348,
488
+ "eval_samples_per_second": 1.237,
489
+ "eval_steps_per_second": 0.049,
490
+ "step": 136
491
+ },
492
+ {
493
+ "epoch": 40.0,
494
+ "grad_norm": 0.4542177617549896,
495
+ "learning_rate": 1.3477564710088097e-07,
496
+ "loss": 0.0058,
497
+ "step": 140
498
+ },
499
+ {
500
+ "epoch": 40.0,
501
+ "eval_LCC": 0.15832038036938517,
502
+ "eval_SROCC": 0.11318127250900359,
503
+ "eval_loss": 0.031172048300504684,
504
+ "eval_runtime": 38.0122,
505
+ "eval_samples_per_second": 1.315,
506
+ "eval_steps_per_second": 0.053,
507
+ "step": 140
508
+ },
509
+ {
510
+ "epoch": 40.857142857142854,
511
+ "eval_LCC": 0.15841233686752923,
512
+ "eval_SROCC": 0.11318127250900359,
513
+ "eval_loss": 0.03117518685758114,
514
+ "eval_runtime": 38.0357,
515
+ "eval_samples_per_second": 1.315,
516
+ "eval_steps_per_second": 0.053,
517
+ "step": 143
518
+ },
519
+ {
520
+ "epoch": 42.0,
521
+ "eval_LCC": 0.15844144208781846,
522
+ "eval_SROCC": 0.11318127250900359,
523
+ "eval_loss": 0.031179124489426613,
524
+ "eval_runtime": 37.8479,
525
+ "eval_samples_per_second": 1.321,
526
+ "eval_steps_per_second": 0.053,
527
+ "step": 147
528
+ },
529
+ {
530
+ "epoch": 42.857142857142854,
531
+ "grad_norm": 0.2731544077396393,
532
+ "learning_rate": 0.0,
533
+ "loss": 0.006,
534
+ "step": 150
535
+ },
536
+ {
537
+ "epoch": 42.857142857142854,
538
+ "eval_LCC": 0.15844069648001213,
539
+ "eval_SROCC": 0.11318127250900359,
540
+ "eval_loss": 0.031179847195744514,
541
+ "eval_runtime": 38.1122,
542
+ "eval_samples_per_second": 1.312,
543
+ "eval_steps_per_second": 0.052,
544
+ "step": 150
545
+ },
546
+ {
547
+ "epoch": 42.857142857142854,
548
+ "step": 150,
549
+ "total_flos": 3.763995548539945e+18,
550
+ "train_loss": 0.029827568009495736,
551
+ "train_runtime": 8529.9763,
552
+ "train_samples_per_second": 1.254,
553
+ "train_steps_per_second": 0.018
554
  }
555
  ],
556
  "logging_steps": 10,
 
579
  "attributes": {}
580
  }
581
  },
582
+ "total_flos": 3.763995548539945e+18,
583
  "train_batch_size": 16,
584
  "trial_name": null,
585
  "trial_params": null
training_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 26.0,
3
- "total_flos": 2.2813585220126638e+18,
4
- "train_loss": 0.041763259517540646,
5
- "train_runtime": 5114.8643,
6
- "train_samples_per_second": 2.092,
7
- "train_steps_per_second": 0.029
8
  }
 
1
  {
2
+ "epoch": 42.857142857142854,
3
+ "total_flos": 3.763995548539945e+18,
4
+ "train_loss": 0.029827568009495736,
5
+ "train_runtime": 8529.9763,
6
+ "train_samples_per_second": 1.254,
7
+ "train_steps_per_second": 0.018
8
  }