chizhik commited on
Commit
b039ab8
1 Parent(s): 06d531e

updated model weights

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/mnt/beegfs/mc000051/CERPLES/Models/roberta-base-biomedical-clinical-es",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "Models/roberta-base-biomedical-clinical-es",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0df800a0717e27a653a11d7f8346b293ebec23f5e161f45ac4940172e19941
3
+ size 1008039837
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6e41143e7af891ffe98910ef36cc712dbb46d86b33a08904be3bd8a2543f6a
3
+ size 504033325
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dcd8bd2facbcb38c978d6b2b10cbacd8e9f54834a513467b1196a471554682c
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c92db6b019bcc5daa66264ee6196af75e462e009a9f3d24a99e49aae2e98453
3
  size 14567
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052b473372c295fdcecaddd4e0fe02e5f1ad19b0e470cf4538018b0a63f4f2cb
3
+ size 623
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "trim_offsets": true, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "/mnt/beegfs/mc000051/CERPLES/Models/roberta-base-biomedical-clinical-es", "tokenizer_class": "RobertaTokenizer"}
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "trim_offsets": true, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "Models/roberta-base-biomedical-clinical-es", "tokenizer_class": "RobertaTokenizer"}
trainer_state.json CHANGED
@@ -1,156 +1,578 @@
1
  {
2
- "best_metric": 0.7442957192288286,
3
- "best_model_checkpoint": "./CARES/checkpoints/roberta-/checkpoint-3500",
4
- "epoch": 49.29577464788732,
5
- "global_step": 3500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 7.04,
12
  "learning_rate": 4.647887323943662e-05,
13
- "loss": 0.1802,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 7.04,
18
- "eval_loss": 0.1463283747434616,
19
- "eval_macro_f1": 0.38060566625566594,
20
- "eval_macro_precision": 0.5037224636955915,
21
- "eval_macro_recall": 0.36676192196038643,
22
- "eval_micro_f1": 0.7358149620527648,
23
- "eval_micro_precision": 0.8249594813614263,
24
- "eval_micro_recall": 0.6640574037834311,
25
- "eval_runtime": 2.7054,
26
- "eval_samples_per_second": 357.063,
27
- "eval_steps_per_second": 11.459,
28
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  },
30
  {
31
  "epoch": 14.08,
32
  "learning_rate": 4.295774647887324e-05,
33
- "loss": 0.0484,
34
  "step": 1000
35
  },
36
  {
37
- "epoch": 14.08,
38
- "eval_loss": 0.14033463597297668,
39
- "eval_macro_f1": 0.6067981552982502,
40
- "eval_macro_precision": 0.6486392263482452,
41
- "eval_macro_recall": 0.5884363376224964,
42
- "eval_micro_f1": 0.7908386662175816,
43
- "eval_micro_precision": 0.8175487465181058,
44
- "eval_micro_recall": 0.7658186562296151,
45
- "eval_runtime": 2.6975,
46
- "eval_samples_per_second": 358.108,
47
- "eval_steps_per_second": 11.492,
48
- "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  },
50
  {
51
  "epoch": 21.13,
52
  "learning_rate": 3.943661971830986e-05,
53
- "loss": 0.0179,
54
  "step": 1500
55
  },
56
  {
57
- "epoch": 21.13,
58
- "eval_loss": 0.16313208639621735,
59
- "eval_macro_f1": 0.6876414143457183,
60
- "eval_macro_precision": 0.7316617517332842,
61
- "eval_macro_recall": 0.7019290034007357,
62
- "eval_micro_f1": 0.7892204042348412,
63
- "eval_micro_precision": 0.7765151515151515,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  "eval_micro_recall": 0.8023483365949119,
65
- "eval_runtime": 2.6931,
66
- "eval_samples_per_second": 358.699,
67
- "eval_steps_per_second": 11.511,
68
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  },
70
  {
71
  "epoch": 28.17,
72
  "learning_rate": 3.5915492957746486e-05,
73
- "loss": 0.0095,
74
  "step": 2000
75
  },
76
  {
77
- "epoch": 28.17,
78
- "eval_loss": 0.1628233790397644,
79
- "eval_macro_f1": 0.7285510286916194,
80
- "eval_macro_precision": 0.8247103134528249,
81
- "eval_macro_recall": 0.6865275313487541,
82
- "eval_micro_f1": 0.8138990978950885,
83
- "eval_micro_precision": 0.8342465753424657,
84
- "eval_micro_recall": 0.7945205479452054,
85
- "eval_runtime": 2.6912,
86
- "eval_samples_per_second": 358.946,
87
- "eval_steps_per_second": 11.519,
88
- "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  },
90
  {
91
  "epoch": 35.21,
92
  "learning_rate": 3.23943661971831e-05,
93
- "loss": 0.0055,
94
  "step": 2500
95
  },
96
  {
97
- "epoch": 35.21,
98
- "eval_loss": 0.1734953373670578,
99
- "eval_macro_f1": 0.7421994453880427,
100
- "eval_macro_precision": 0.8204432806900727,
101
- "eval_macro_recall": 0.7088284387134485,
102
- "eval_micro_f1": 0.8176722716782063,
103
- "eval_micro_precision": 0.8266666666666667,
104
- "eval_micro_recall": 0.8088714938030006,
105
- "eval_runtime": 2.6929,
106
- "eval_samples_per_second": 358.718,
107
- "eval_steps_per_second": 11.512,
108
- "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  {
111
- "epoch": 42.25,
112
- "learning_rate": 2.887323943661972e-05,
113
- "loss": 0.0039,
114
- "step": 3000
115
- },
116
- {
117
- "epoch": 42.25,
118
- "eval_loss": 0.1867484599351883,
119
- "eval_macro_f1": 0.7349168384365183,
120
- "eval_macro_precision": 0.7741277072189265,
121
- "eval_macro_recall": 0.7306178466824944,
122
- "eval_micro_f1": 0.8012924071082391,
123
- "eval_micro_precision": 0.793854033290653,
124
- "eval_micro_recall": 0.8088714938030006,
125
- "eval_runtime": 2.6968,
126
- "eval_samples_per_second": 358.208,
127
- "eval_steps_per_second": 11.495,
128
- "step": 3000
129
- },
130
- {
131
- "epoch": 49.3,
132
- "learning_rate": 2.535211267605634e-05,
133
- "loss": 0.0032,
134
- "step": 3500
135
- },
136
- {
137
- "epoch": 49.3,
138
- "eval_loss": 0.181275874376297,
139
- "eval_macro_f1": 0.7442957192288286,
140
- "eval_macro_precision": 0.7848363899123905,
141
- "eval_macro_recall": 0.7360907675141599,
142
- "eval_micro_f1": 0.8202794930126746,
143
- "eval_micro_precision": 0.8173575129533679,
144
- "eval_micro_recall": 0.8232224396607958,
145
- "eval_runtime": 2.6934,
146
- "eval_samples_per_second": 358.652,
147
- "eval_steps_per_second": 11.51,
148
- "step": 3500
149
  }
150
  ],
151
  "max_steps": 7100,
152
  "num_train_epochs": 100,
153
- "total_flos": 2.877157614419203e+16,
154
  "trial_name": null,
155
  "trial_params": null
156
  }
 
1
  {
2
+ "best_metric": 0.7414142113821449,
3
+ "best_model_checkpoint": "./CARES/checkpoints/roberta/checkpoint-2698",
4
+ "epoch": 38.0,
5
+ "global_step": 2698,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_loss": 0.2509869933128357,
13
+ "eval_macro_f1": 0.04342407975460123,
14
+ "eval_macro_precision": 0.034867610837438424,
15
+ "eval_macro_recall": 0.057545731707317076,
16
+ "eval_micro_f1": 0.38635394456289984,
17
+ "eval_micro_precision": 0.5578817733990148,
18
+ "eval_micro_recall": 0.29549902152641877,
19
+ "eval_runtime": 2.7138,
20
+ "eval_samples_per_second": 355.964,
21
+ "eval_steps_per_second": 11.423,
22
+ "step": 71
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_loss": 0.21916496753692627,
27
+ "eval_macro_f1": 0.10341134166953839,
28
+ "eval_macro_precision": 0.1498316489705596,
29
+ "eval_macro_recall": 0.08601393938480183,
30
+ "eval_micro_f1": 0.42796410014171,
31
+ "eval_micro_precision": 0.7756849315068494,
32
+ "eval_micro_recall": 0.29549902152641877,
33
+ "eval_runtime": 2.7022,
34
+ "eval_samples_per_second": 357.485,
35
+ "eval_steps_per_second": 11.472,
36
+ "step": 142
37
+ },
38
+ {
39
+ "epoch": 3.0,
40
+ "eval_loss": 0.1901940405368805,
41
+ "eval_macro_f1": 0.17433694331035438,
42
+ "eval_macro_precision": 0.1996622297635695,
43
+ "eval_macro_recall": 0.1554066444975929,
44
+ "eval_micro_f1": 0.5823627287853577,
45
+ "eval_micro_precision": 0.8036739380022963,
46
+ "eval_micro_recall": 0.45662100456621,
47
+ "eval_runtime": 2.7029,
48
+ "eval_samples_per_second": 357.398,
49
+ "eval_steps_per_second": 11.469,
50
+ "step": 213
51
+ },
52
+ {
53
+ "epoch": 4.0,
54
+ "eval_loss": 0.16621476411819458,
55
+ "eval_macro_f1": 0.20771764577178875,
56
+ "eval_macro_precision": 0.3206539932628853,
57
+ "eval_macro_recall": 0.18121219739914096,
58
+ "eval_micro_f1": 0.6394934705184012,
59
+ "eval_micro_precision": 0.8128772635814889,
60
+ "eval_micro_recall": 0.5270711024135681,
61
+ "eval_runtime": 2.7016,
62
+ "eval_samples_per_second": 357.565,
63
+ "eval_steps_per_second": 11.475,
64
+ "step": 284
65
+ },
66
+ {
67
+ "epoch": 5.0,
68
+ "eval_loss": 0.1592407524585724,
69
+ "eval_macro_f1": 0.32374988901102003,
70
+ "eval_macro_precision": 0.36319303902027156,
71
+ "eval_macro_recall": 0.30985669516000114,
72
+ "eval_micro_f1": 0.6886094674556215,
73
+ "eval_micro_precision": 0.7950469684030743,
74
+ "eval_micro_recall": 0.6073059360730594,
75
+ "eval_runtime": 2.7015,
76
+ "eval_samples_per_second": 357.579,
77
+ "eval_steps_per_second": 11.475,
78
+ "step": 355
79
+ },
80
+ {
81
+ "epoch": 6.0,
82
+ "eval_loss": 0.1491735577583313,
83
+ "eval_macro_f1": 0.3533190399166411,
84
+ "eval_macro_precision": 0.4078813390257998,
85
+ "eval_macro_recall": 0.33542234175118296,
86
+ "eval_micro_f1": 0.7270094134685011,
87
+ "eval_micro_precision": 0.8169243287225386,
88
+ "eval_micro_recall": 0.6549249836921069,
89
+ "eval_runtime": 2.7029,
90
+ "eval_samples_per_second": 357.392,
91
+ "eval_steps_per_second": 11.469,
92
+ "step": 426
93
+ },
94
+ {
95
+ "epoch": 7.0,
96
+ "eval_loss": 0.14026637375354767,
97
+ "eval_macro_f1": 0.47073084873229765,
98
+ "eval_macro_precision": 0.6837800649883113,
99
+ "eval_macro_recall": 0.423964742702268,
100
+ "eval_micro_f1": 0.75768156424581,
101
+ "eval_micro_precision": 0.8151765589782118,
102
+ "eval_micro_recall": 0.7077625570776256,
103
+ "eval_runtime": 2.7322,
104
+ "eval_samples_per_second": 353.558,
105
+ "eval_steps_per_second": 11.346,
106
+ "step": 497
107
+ },
108
  {
109
  "epoch": 7.04,
110
  "learning_rate": 4.647887323943662e-05,
111
+ "loss": 0.1794,
112
  "step": 500
113
  },
114
  {
115
+ "epoch": 8.0,
116
+ "eval_loss": 0.13919810950756073,
117
+ "eval_macro_f1": 0.4762066395538287,
118
+ "eval_macro_precision": 0.6922175380127314,
119
+ "eval_macro_recall": 0.4248427568334097,
120
+ "eval_micro_f1": 0.7554770318021201,
121
+ "eval_micro_precision": 0.8242097147262915,
122
+ "eval_micro_recall": 0.6973255055446836,
123
+ "eval_runtime": 2.7464,
124
+ "eval_samples_per_second": 351.729,
125
+ "eval_steps_per_second": 11.287,
126
+ "step": 568
127
+ },
128
+ {
129
+ "epoch": 9.0,
130
+ "eval_loss": 0.14247503876686096,
131
+ "eval_macro_f1": 0.49866416753622766,
132
+ "eval_macro_precision": 0.6531542097723891,
133
+ "eval_macro_recall": 0.4611636373916834,
134
+ "eval_micro_f1": 0.7498267498267498,
135
+ "eval_micro_precision": 0.7997043606799704,
136
+ "eval_micro_recall": 0.7058056099151989,
137
+ "eval_runtime": 2.7469,
138
+ "eval_samples_per_second": 351.665,
139
+ "eval_steps_per_second": 11.285,
140
+ "step": 639
141
+ },
142
+ {
143
+ "epoch": 10.0,
144
+ "eval_loss": 0.14413639903068542,
145
+ "eval_macro_f1": 0.515652512172511,
146
+ "eval_macro_precision": 0.5714508837953802,
147
+ "eval_macro_recall": 0.494438079407737,
148
+ "eval_micro_f1": 0.7669675693747909,
149
+ "eval_micro_precision": 0.7866941015089163,
150
+ "eval_micro_recall": 0.7482061317677756,
151
+ "eval_runtime": 2.752,
152
+ "eval_samples_per_second": 351.019,
153
+ "eval_steps_per_second": 11.265,
154
+ "step": 710
155
+ },
156
+ {
157
+ "epoch": 11.0,
158
+ "eval_loss": 0.13881583511829376,
159
+ "eval_macro_f1": 0.5815495436036973,
160
+ "eval_macro_precision": 0.6306553976911301,
161
+ "eval_macro_recall": 0.5606746536385887,
162
+ "eval_micro_f1": 0.7825223435948361,
163
+ "eval_micro_precision": 0.7943548387096774,
164
+ "eval_micro_recall": 0.7710371819960861,
165
+ "eval_runtime": 2.7353,
166
+ "eval_samples_per_second": 353.165,
167
+ "eval_steps_per_second": 11.333,
168
+ "step": 781
169
+ },
170
+ {
171
+ "epoch": 12.0,
172
+ "eval_loss": 0.14889590442180634,
173
+ "eval_macro_f1": 0.582949261540157,
174
+ "eval_macro_precision": 0.6154355859721446,
175
+ "eval_macro_recall": 0.5660348822272816,
176
+ "eval_micro_f1": 0.7689724647414371,
177
+ "eval_micro_precision": 0.7923875432525952,
178
+ "eval_micro_recall": 0.7469015003261579,
179
+ "eval_runtime": 2.7291,
180
+ "eval_samples_per_second": 353.967,
181
+ "eval_steps_per_second": 11.359,
182
+ "step": 852
183
+ },
184
+ {
185
+ "epoch": 13.0,
186
+ "eval_loss": 0.14177829027175903,
187
+ "eval_macro_f1": 0.6003936149639709,
188
+ "eval_macro_precision": 0.6178459654131079,
189
+ "eval_macro_recall": 0.5986040907728625,
190
+ "eval_micro_f1": 0.7824089268132589,
191
+ "eval_micro_precision": 0.7873183619550859,
192
+ "eval_micro_recall": 0.7775603392041748,
193
+ "eval_runtime": 2.7236,
194
+ "eval_samples_per_second": 354.674,
195
+ "eval_steps_per_second": 11.382,
196
+ "step": 923
197
+ },
198
+ {
199
+ "epoch": 14.0,
200
+ "eval_loss": 0.13778340816497803,
201
+ "eval_macro_f1": 0.6071892145561097,
202
+ "eval_macro_precision": 0.6420473960894854,
203
+ "eval_macro_recall": 0.5902771317099681,
204
+ "eval_micro_f1": 0.7962716378162449,
205
+ "eval_micro_precision": 0.8130523453433038,
206
+ "eval_micro_recall": 0.7801696020874103,
207
+ "eval_runtime": 4.6013,
208
+ "eval_samples_per_second": 209.942,
209
+ "eval_steps_per_second": 6.737,
210
+ "step": 994
211
  },
212
  {
213
  "epoch": 14.08,
214
  "learning_rate": 4.295774647887324e-05,
215
+ "loss": 0.0459,
216
  "step": 1000
217
  },
218
  {
219
+ "epoch": 15.0,
220
+ "eval_loss": 0.14022594690322876,
221
+ "eval_macro_f1": 0.6215256196989195,
222
+ "eval_macro_precision": 0.7025580891227308,
223
+ "eval_macro_recall": 0.5957635875240769,
224
+ "eval_micro_f1": 0.7986776859504131,
225
+ "eval_micro_precision": 0.8096514745308311,
226
+ "eval_micro_recall": 0.7879973907371167,
227
+ "eval_runtime": 2.7033,
228
+ "eval_samples_per_second": 357.348,
229
+ "eval_steps_per_second": 11.468,
230
+ "step": 1065
231
+ },
232
+ {
233
+ "epoch": 16.0,
234
+ "eval_loss": 0.1448926478624344,
235
+ "eval_macro_f1": 0.608747774411539,
236
+ "eval_macro_precision": 0.621954970496014,
237
+ "eval_macro_recall": 0.6062377476848362,
238
+ "eval_micro_f1": 0.79816813869807,
239
+ "eval_micro_precision": 0.800524934383202,
240
+ "eval_micro_recall": 0.7958251793868232,
241
+ "eval_runtime": 2.7015,
242
+ "eval_samples_per_second": 357.574,
243
+ "eval_steps_per_second": 11.475,
244
+ "step": 1136
245
+ },
246
+ {
247
+ "epoch": 17.0,
248
+ "eval_loss": 0.1469811201095581,
249
+ "eval_macro_f1": 0.6153866789938162,
250
+ "eval_macro_precision": 0.650591338815516,
251
+ "eval_macro_recall": 0.5950841704161889,
252
+ "eval_micro_f1": 0.7996005326231691,
253
+ "eval_micro_precision": 0.8164513936097892,
254
+ "eval_micro_recall": 0.7834311806914547,
255
+ "eval_runtime": 2.701,
256
+ "eval_samples_per_second": 357.647,
257
+ "eval_steps_per_second": 11.477,
258
+ "step": 1207
259
+ },
260
+ {
261
+ "epoch": 18.0,
262
+ "eval_loss": 0.15327712893486023,
263
+ "eval_macro_f1": 0.6316443999429009,
264
+ "eval_macro_precision": 0.6512229785159827,
265
+ "eval_macro_recall": 0.6349187975925275,
266
+ "eval_micro_f1": 0.7879777850375694,
267
+ "eval_micro_precision": 0.7892670157068062,
268
+ "eval_micro_recall": 0.786692759295499,
269
+ "eval_runtime": 5.7579,
270
+ "eval_samples_per_second": 167.769,
271
+ "eval_steps_per_second": 5.384,
272
+ "step": 1278
273
+ },
274
+ {
275
+ "epoch": 19.0,
276
+ "eval_loss": 0.15278153121471405,
277
+ "eval_macro_f1": 0.6310003042032536,
278
+ "eval_macro_precision": 0.6839818740829384,
279
+ "eval_macro_recall": 0.6255359551717035,
280
+ "eval_micro_f1": 0.7923659098387628,
281
+ "eval_micro_precision": 0.799468791500664,
282
+ "eval_micro_recall": 0.7853881278538812,
283
+ "eval_runtime": 2.7,
284
+ "eval_samples_per_second": 357.774,
285
+ "eval_steps_per_second": 11.481,
286
+ "step": 1349
287
+ },
288
+ {
289
+ "epoch": 20.0,
290
+ "eval_loss": 0.15630246698856354,
291
+ "eval_macro_f1": 0.632349332888396,
292
+ "eval_macro_precision": 0.6992914076324832,
293
+ "eval_macro_recall": 0.6115304182846466,
294
+ "eval_micro_f1": 0.7856437273625287,
295
+ "eval_micro_precision": 0.793218085106383,
296
+ "eval_micro_recall": 0.7782126549249837,
297
+ "eval_runtime": 2.7011,
298
+ "eval_samples_per_second": 357.638,
299
+ "eval_steps_per_second": 11.477,
300
+ "step": 1420
301
+ },
302
+ {
303
+ "epoch": 21.0,
304
+ "eval_loss": 0.15509752929210663,
305
+ "eval_macro_f1": 0.6092726479340159,
306
+ "eval_macro_precision": 0.6680318673287149,
307
+ "eval_macro_recall": 0.5717336891200013,
308
+ "eval_micro_f1": 0.7991967871485944,
309
+ "eval_micro_precision": 0.8206185567010309,
310
+ "eval_micro_recall": 0.7788649706457925,
311
+ "eval_runtime": 2.6986,
312
+ "eval_samples_per_second": 357.967,
313
+ "eval_steps_per_second": 11.488,
314
+ "step": 1491
315
  },
316
  {
317
  "epoch": 21.13,
318
  "learning_rate": 3.943661971830986e-05,
319
+ "loss": 0.0169,
320
  "step": 1500
321
  },
322
  {
323
+ "epoch": 22.0,
324
+ "eval_loss": 0.15633134543895721,
325
+ "eval_macro_f1": 0.6559542184820522,
326
+ "eval_macro_precision": 0.6920252568569637,
327
+ "eval_macro_recall": 0.6423031165953985,
328
+ "eval_micro_f1": 0.8009213557091149,
329
+ "eval_micro_precision": 0.8081009296148738,
330
+ "eval_micro_recall": 0.7938682322243966,
331
+ "eval_runtime": 2.7117,
332
+ "eval_samples_per_second": 356.232,
333
+ "eval_steps_per_second": 11.432,
334
+ "step": 1562
335
+ },
336
+ {
337
+ "epoch": 23.0,
338
+ "eval_loss": 0.16708678007125854,
339
+ "eval_macro_f1": 0.6341964700327825,
340
+ "eval_macro_precision": 0.6730301960786685,
341
+ "eval_macro_recall": 0.6361571334823197,
342
+ "eval_micro_f1": 0.7845195145949493,
343
+ "eval_micro_precision": 0.7889182058047494,
344
+ "eval_micro_recall": 0.7801696020874103,
345
+ "eval_runtime": 2.7072,
346
+ "eval_samples_per_second": 356.825,
347
+ "eval_steps_per_second": 11.451,
348
+ "step": 1633
349
+ },
350
+ {
351
+ "epoch": 24.0,
352
+ "eval_loss": 0.15924513339996338,
353
+ "eval_macro_f1": 0.6567583708222104,
354
+ "eval_macro_precision": 0.6932259526572662,
355
+ "eval_macro_recall": 0.6481414988629403,
356
+ "eval_micro_f1": 0.8018252933507171,
357
+ "eval_micro_precision": 0.8013029315960912,
358
  "eval_micro_recall": 0.8023483365949119,
359
+ "eval_runtime": 2.7129,
360
+ "eval_samples_per_second": 356.081,
361
+ "eval_steps_per_second": 11.427,
362
+ "step": 1704
363
+ },
364
+ {
365
+ "epoch": 25.0,
366
+ "eval_loss": 0.15955589711666107,
367
+ "eval_macro_f1": 0.6499053948607745,
368
+ "eval_macro_precision": 0.7063535942579273,
369
+ "eval_macro_recall": 0.622573757353082,
370
+ "eval_micro_f1": 0.8029100529100529,
371
+ "eval_micro_precision": 0.8142186452045607,
372
+ "eval_micro_recall": 0.79191128506197,
373
+ "eval_runtime": 2.7378,
374
+ "eval_samples_per_second": 352.84,
375
+ "eval_steps_per_second": 11.323,
376
+ "step": 1775
377
+ },
378
+ {
379
+ "epoch": 26.0,
380
+ "eval_loss": 0.1635105311870575,
381
+ "eval_macro_f1": 0.6483066019452002,
382
+ "eval_macro_precision": 0.6824077806874724,
383
+ "eval_macro_recall": 0.6413983597695092,
384
+ "eval_micro_f1": 0.7965879265091863,
385
+ "eval_micro_precision": 0.8013201320132013,
386
+ "eval_micro_recall": 0.79191128506197,
387
+ "eval_runtime": 2.7006,
388
+ "eval_samples_per_second": 357.694,
389
+ "eval_steps_per_second": 11.479,
390
+ "step": 1846
391
+ },
392
+ {
393
+ "epoch": 27.0,
394
+ "eval_loss": 0.16744764149188995,
395
+ "eval_macro_f1": 0.6758901394131414,
396
+ "eval_macro_precision": 0.7249401901181491,
397
+ "eval_macro_recall": 0.6772836080088236,
398
+ "eval_micro_f1": 0.789776357827476,
399
+ "eval_micro_precision": 0.7739511584220413,
400
+ "eval_micro_recall": 0.8062622309197651,
401
+ "eval_runtime": 2.7031,
402
+ "eval_samples_per_second": 357.373,
403
+ "eval_steps_per_second": 11.468,
404
+ "step": 1917
405
+ },
406
+ {
407
+ "epoch": 28.0,
408
+ "eval_loss": 0.17063026130199432,
409
+ "eval_macro_f1": 0.6882911714431834,
410
+ "eval_macro_precision": 0.7320265820262954,
411
+ "eval_macro_recall": 0.6788510173157892,
412
+ "eval_micro_f1": 0.7945659377070908,
413
+ "eval_micro_precision": 0.8074074074074075,
414
+ "eval_micro_recall": 0.7821265492498369,
415
+ "eval_runtime": 2.7065,
416
+ "eval_samples_per_second": 356.912,
417
+ "eval_steps_per_second": 11.454,
418
+ "step": 1988
419
  },
420
  {
421
  "epoch": 28.17,
422
  "learning_rate": 3.5915492957746486e-05,
423
+ "loss": 0.0088,
424
  "step": 2000
425
  },
426
  {
427
+ "epoch": 29.0,
428
+ "eval_loss": 0.17283257842063904,
429
+ "eval_macro_f1": 0.6771985368510574,
430
+ "eval_macro_precision": 0.7332109505236407,
431
+ "eval_macro_recall": 0.663425919860526,
432
+ "eval_micro_f1": 0.7905294311081881,
433
+ "eval_micro_precision": 0.7970822281167109,
434
+ "eval_micro_recall": 0.7840834964122635,
435
+ "eval_runtime": 2.7052,
436
+ "eval_samples_per_second": 357.094,
437
+ "eval_steps_per_second": 11.46,
438
+ "step": 2059
439
+ },
440
+ {
441
+ "epoch": 30.0,
442
+ "eval_loss": 0.18444736301898956,
443
+ "eval_macro_f1": 0.6746367426025414,
444
+ "eval_macro_precision": 0.7201523247516791,
445
+ "eval_macro_recall": 0.6728013388910854,
446
+ "eval_micro_f1": 0.7788242936018187,
447
+ "eval_micro_precision": 0.7755498059508409,
448
+ "eval_micro_recall": 0.7821265492498369,
449
+ "eval_runtime": 2.704,
450
+ "eval_samples_per_second": 357.254,
451
+ "eval_steps_per_second": 11.465,
452
+ "step": 2130
453
+ },
454
+ {
455
+ "epoch": 31.0,
456
+ "eval_loss": 0.1695714294910431,
457
+ "eval_macro_f1": 0.6879346286499437,
458
+ "eval_macro_precision": 0.7516673286655658,
459
+ "eval_macro_recall": 0.6669919763081706,
460
+ "eval_micro_f1": 0.802747791952895,
461
+ "eval_micro_precision": 0.8051181102362205,
462
+ "eval_micro_recall": 0.8003913894324853,
463
+ "eval_runtime": 2.7049,
464
+ "eval_samples_per_second": 357.124,
465
+ "eval_steps_per_second": 11.46,
466
+ "step": 2201
467
+ },
468
+ {
469
+ "epoch": 32.0,
470
+ "eval_loss": 0.16994765400886536,
471
+ "eval_macro_f1": 0.7053100252741045,
472
+ "eval_macro_precision": 0.805278845304122,
473
+ "eval_macro_recall": 0.6824655471979887,
474
+ "eval_micro_f1": 0.8006472491909384,
475
+ "eval_micro_precision": 0.7944765574823378,
476
+ "eval_micro_recall": 0.806914546640574,
477
+ "eval_runtime": 2.7034,
478
+ "eval_samples_per_second": 357.325,
479
+ "eval_steps_per_second": 11.467,
480
+ "step": 2272
481
+ },
482
+ {
483
+ "epoch": 33.0,
484
+ "eval_loss": 0.17551660537719727,
485
+ "eval_macro_f1": 0.7235647899419184,
486
+ "eval_macro_precision": 0.7984745552617538,
487
+ "eval_macro_recall": 0.6960462941676209,
488
+ "eval_micro_f1": 0.8010352636687157,
489
+ "eval_micro_precision": 0.7946084724005135,
490
+ "eval_micro_recall": 0.8075668623613829,
491
+ "eval_runtime": 2.7047,
492
+ "eval_samples_per_second": 357.151,
493
+ "eval_steps_per_second": 11.461,
494
+ "step": 2343
495
+ },
496
+ {
497
+ "epoch": 34.0,
498
+ "eval_loss": 0.17244330048561096,
499
+ "eval_macro_f1": 0.6948832290211537,
500
+ "eval_macro_precision": 0.7553961335643664,
501
+ "eval_macro_recall": 0.6730022583492952,
502
+ "eval_micro_f1": 0.8063989552726085,
503
+ "eval_micro_precision": 0.8071895424836601,
504
+ "eval_micro_recall": 0.8056099151989563,
505
+ "eval_runtime": 2.7035,
506
+ "eval_samples_per_second": 357.318,
507
+ "eval_steps_per_second": 11.467,
508
+ "step": 2414
509
+ },
510
+ {
511
+ "epoch": 35.0,
512
+ "eval_loss": 0.17627869546413422,
513
+ "eval_macro_f1": 0.7275662557839568,
514
+ "eval_macro_precision": 0.7750438946995515,
515
+ "eval_macro_recall": 0.717891607952716,
516
+ "eval_micro_f1": 0.8019261637239165,
517
+ "eval_micro_precision": 0.7895069532237674,
518
+ "eval_micro_recall": 0.8147423352902805,
519
+ "eval_runtime": 2.7043,
520
+ "eval_samples_per_second": 357.211,
521
+ "eval_steps_per_second": 11.463,
522
+ "step": 2485
523
  },
524
  {
525
  "epoch": 35.21,
526
  "learning_rate": 3.23943661971831e-05,
527
+ "loss": 0.0051,
528
  "step": 2500
529
  },
530
  {
531
+ "epoch": 36.0,
532
+ "eval_loss": 0.1767842173576355,
533
+ "eval_macro_f1": 0.6877210668749649,
534
+ "eval_macro_precision": 0.7524115221848844,
535
+ "eval_macro_recall": 0.6652074916473143,
536
+ "eval_micro_f1": 0.7996077149395227,
537
+ "eval_micro_precision": 0.8014416775884666,
538
+ "eval_micro_recall": 0.7977821265492498,
539
+ "eval_runtime": 2.7004,
540
+ "eval_samples_per_second": 357.724,
541
+ "eval_steps_per_second": 11.48,
542
+ "step": 2556
543
+ },
544
+ {
545
+ "epoch": 37.0,
546
+ "eval_loss": 0.17860282957553864,
547
+ "eval_macro_f1": 0.7341275703601238,
548
+ "eval_macro_precision": 0.8208336935763849,
549
+ "eval_macro_recall": 0.6963532745066767,
550
+ "eval_micro_f1": 0.8019512195121952,
551
+ "eval_micro_precision": 0.7996108949416343,
552
+ "eval_micro_recall": 0.8043052837573386,
553
+ "eval_runtime": 2.7018,
554
+ "eval_samples_per_second": 357.539,
555
+ "eval_steps_per_second": 11.474,
556
+ "step": 2627
557
  },
558
  {
559
+ "epoch": 38.0,
560
+ "eval_loss": 0.18057870864868164,
561
+ "eval_macro_f1": 0.7414142113821449,
562
+ "eval_macro_precision": 0.8229012131281167,
563
+ "eval_macro_recall": 0.7070558641306933,
564
+ "eval_micro_f1": 0.8049575994781474,
565
+ "eval_micro_precision": 0.8049575994781474,
566
+ "eval_micro_recall": 0.8049575994781474,
567
+ "eval_runtime": 2.703,
568
+ "eval_samples_per_second": 357.385,
569
+ "eval_steps_per_second": 11.469,
570
+ "step": 2698
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  }
572
  ],
573
  "max_steps": 7100,
574
  "num_train_epochs": 100,
575
+ "total_flos": 2.2147253694876576e+16,
576
  "trial_name": null,
577
  "trial_params": null
578
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95ef9e04b1c54450ebb0e5d368ad5f45d662a1cb072d8e0e194111a56c16c7d5
3
- size 3247
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3415febb816dc8caa278fa1560a2ebbb18bac1da83bdbbd581a6a1409f9c4b4d
3
+ size 3183