RaphaelMourad commited on
Commit
d94d2d0
1 Parent(s): d70b156

Upload 9 files

Browse files
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "data/models/Mixtral-8x7B-v0.2-chem",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 256,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 256,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "mixtral",
15
+ "num_attention_heads": 8,
16
+ "num_experts_per_tok": 1,
17
+ "num_hidden_layers": 8,
18
+ "num_key_value_heads": 8,
19
+ "num_local_experts": 8,
20
+ "output_router_logits": false,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000.0,
23
+ "router_aux_loss_coef": 0.02,
24
+ "sliding_window": null,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.37.2",
28
+ "use_cache": true,
29
+ "vocab_size": 1024
30
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.37.2"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da40836107d5fbf232e420c1110e75a9f1a3a5512ebe0be476f4835bc379c8d9
3
+ size 30480368
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd30b553c2f7a2dd6e36b36ea310da8ceae30fdebe847fa55b5ca0773b89b89
3
+ size 14308
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00b8f5743bdd6dd1a4f2d5fa1dc81a8e54fe4e36a0ce77c1f3e366e30290c19
3
+ size 1064
tokenizer.json ADDED
@@ -0,0 +1,2080 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[CLS]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": {
54
+ "type": "Whitespace"
55
+ },
56
+ "post_processor": null,
57
+ "decoder": null,
58
+ "model": {
59
+ "type": "BPE",
60
+ "dropout": null,
61
+ "unk_token": "[UNK]",
62
+ "continuing_subword_prefix": null,
63
+ "end_of_word_suffix": null,
64
+ "fuse_unk": false,
65
+ "byte_fallback": false,
66
+ "vocab": {
67
+ "[UNK]": 0,
68
+ "[CLS]": 1,
69
+ "[SEP]": 2,
70
+ "[PAD]": 3,
71
+ "[MASK]": 4,
72
+ "#": 5,
73
+ "(": 6,
74
+ ")": 7,
75
+ "+": 8,
76
+ "-": 9,
77
+ "/": 10,
78
+ "1": 11,
79
+ "2": 12,
80
+ "3": 13,
81
+ "4": 14,
82
+ "5": 15,
83
+ "6": 16,
84
+ "7": 17,
85
+ "8": 18,
86
+ "=": 19,
87
+ "@": 20,
88
+ "B": 21,
89
+ "C": 22,
90
+ "F": 23,
91
+ "H": 24,
92
+ "I": 25,
93
+ "N": 26,
94
+ "O": 27,
95
+ "P": 28,
96
+ "S": 29,
97
+ "[": 30,
98
+ "\\": 31,
99
+ "]": 32,
100
+ "c": 33,
101
+ "l": 34,
102
+ "n": 35,
103
+ "o": 36,
104
+ "r": 37,
105
+ "s": 38,
106
+ "cc": 39,
107
+ "CC": 40,
108
+ "c1": 41,
109
+ "(=": 42,
110
+ "c2": 43,
111
+ "c1cc": 44,
112
+ "](": 45,
113
+ "@@": 46,
114
+ "NC": 47,
115
+ "c2cc": 48,
116
+ "c3": 49,
117
+ "CCC": 50,
118
+ "c1ccc": 51,
119
+ ")[": 52,
120
+ "cc1": 53,
121
+ "NH": 54,
122
+ "+]": 55,
123
+ "c1cccc": 56,
124
+ "CO": 57,
125
+ "1CC": 58,
126
+ "c2cccc": 59,
127
+ "n1": 60,
128
+ "Cl": 61,
129
+ "nc": 62,
130
+ "C1": 63,
131
+ "c3cc": 64,
132
+ "2CC": 65,
133
+ "OC": 66,
134
+ "CC1": 67,
135
+ "c2ccc": 68,
136
+ "c2ccccc2": 69,
137
+ "c1ccccc1": 70,
138
+ ")(": 71,
139
+ "(-": 72,
140
+ "CN": 73,
141
+ "([": 74,
142
+ "cc2": 75,
143
+ "n2": 76,
144
+ "-]": 77,
145
+ ")(=": 78,
146
+ "1C": 79,
147
+ "nH": 80,
148
+ "C2": 81,
149
+ "CC2": 82,
150
+ "c1n": 83,
151
+ "1CCC": 84,
152
+ "c3cccc": 85,
153
+ "-])": 86,
154
+ "c1c": 87,
155
+ "c2c": 88,
156
+ "Cc1ccc": 89,
157
+ "+](": 90,
158
+ "NH2": 91,
159
+ "CCO": 92,
160
+ ")=": 93,
161
+ "c3ccccc3": 94,
162
+ "2C": 95,
163
+ "CCN": 96,
164
+ "c4": 97,
165
+ "Cc1cc": 98,
166
+ "N1CC": 99,
167
+ "nn": 100,
168
+ "COc1ccc": 101,
169
+ "2CCC": 102,
170
+ "s1": 103,
171
+ "Br": 104,
172
+ "CNC": 105,
173
+ "CS": 106,
174
+ "c3ccc": 107,
175
+ "N2CC": 108,
176
+ "c2n": 109,
177
+ "ccc1": 110,
178
+ "+](=": 111,
179
+ "c1nc": 112,
180
+ "COC": 113,
181
+ "o1": 114,
182
+ "cc3": 115,
183
+ "3CC": 116,
184
+ "c1C": 117,
185
+ "nc2": 118,
186
+ "cccc": 119,
187
+ "NCC": 120,
188
+ "C1CC": 121,
189
+ "nc1": 122,
190
+ "n3": 123,
191
+ "c3c": 124,
192
+ "+][": 125,
193
+ "Nc1ccc": 126,
194
+ "ccc": 127,
195
+ "Cc1n": 128,
196
+ "CC3": 129,
197
+ "]([": 130,
198
+ "NH3": 131,
199
+ "s2": 132,
200
+ "Cc1c": 133,
201
+ "c2nc": 134,
202
+ "Cn1": 135,
203
+ "ccc2": 136,
204
+ "Cc1cccc": 137,
205
+ "sc": 138,
206
+ "C2CC": 139,
207
+ "N1CCC": 140,
208
+ "C3": 141,
209
+ "OCC": 142,
210
+ "CCC1": 143,
211
+ "1CCN": 144,
212
+ "o2": 145,
213
+ "COc1cc": 146,
214
+ "(/": 147,
215
+ "Cc1ccccc1": 148,
216
+ "CCC2": 149,
217
+ "@]": 150,
218
+ "CCOC": 151,
219
+ "@@]": 152,
220
+ "oc": 153,
221
+ "CCNC": 154,
222
+ "O1": 155,
223
+ "N1C": 156,
224
+ "cc1C": 157,
225
+ "CCn1": 158,
226
+ "Nc1cc": 159,
227
+ "+])": 160,
228
+ "N1CCN": 161,
229
+ "N2CCC": 162,
230
+ "c2ccccc21": 163,
231
+ ")/": 164,
232
+ "c4cccc": 165,
233
+ "N2C": 166,
234
+ "Nc2ccc": 167,
235
+ "N3": 168,
236
+ "Nc1cccc": 169,
237
+ "CCCC": 170,
238
+ "SC": 171,
239
+ "COc1cccc": 172,
240
+ "Cc1nc": 173,
241
+ "ncc": 174,
242
+ "n2c": 175,
243
+ "1CN": 176,
244
+ "c2c1": 177,
245
+ "N1": 178,
246
+ "SCC": 179,
247
+ "C2CC2": 180,
248
+ "cc1F": 181,
249
+ "nc3": 182,
250
+ "COc1ccccc1": 183,
251
+ "c4ccccc4": 184,
252
+ "Cc2ccc": 185,
253
+ "cn1": 186,
254
+ "c12": 187,
255
+ "cc1Cl": 188,
256
+ "Cc2ccccc2": 189,
257
+ "CCS": 190,
258
+ "c1nnc": 191,
259
+ "3C": 192,
260
+ "CCCO": 193,
261
+ "1CCCC": 194,
262
+ "C1CC1": 195,
263
+ "c1ccc2c": 196,
264
+ "nn1": 197,
265
+ "1CCCN": 198,
266
+ "N2CCN": 199,
267
+ "c1ncc": 200,
268
+ "CCC3": 201,
269
+ "c2C": 202,
270
+ "NNC": 203,
271
+ "c2ncc": 204,
272
+ "Nc1ccccc1": 205,
273
+ "CCOc1ccc": 206,
274
+ "OCO": 207,
275
+ "CCN1C": 208,
276
+ "@](": 209,
277
+ "cc1OC": 210,
278
+ "no1": 211,
279
+ "OCC2": 212,
280
+ "OCC1": 213,
281
+ "NS": 214,
282
+ "N2": 215,
283
+ "Nc2cc": 216,
284
+ "nn2": 217,
285
+ "O2": 218,
286
+ "c3nc": 219,
287
+ "Cn1c": 220,
288
+ "CN1C": 221,
289
+ "@@](": 222,
290
+ "cc1NC": 223,
291
+ "OCCO": 224,
292
+ "=[": 225,
293
+ "N3CC": 226,
294
+ "c2nnc": 227,
295
+ "2CCCC": 228,
296
+ "CCc1ccc": 229,
297
+ "cs1": 230,
298
+ "CN2CC": 231,
299
+ "NCc1ccc": 232,
300
+ "n2cc": 233,
301
+ "c4ccc": 234,
302
+ "s3": 235,
303
+ "C3CC": 236,
304
+ "c3ccccc32": 237,
305
+ "ncc2": 238,
306
+ "NCCC": 239,
307
+ "c23": 240,
308
+ "Cn1cc": 241,
309
+ "c1nn": 242,
310
+ "c2cccs2": 243,
311
+ "CCn1c": 244,
312
+ "Nc2cccc": 245,
313
+ "c1cccs1": 246,
314
+ "ccc3": 247,
315
+ "Nc2ccccc2": 248,
316
+ "o3": 249,
317
+ "Cc1nn": 250,
318
+ "1CCO": 251,
319
+ "c2ccccc12": 252,
320
+ "Cc1": 253,
321
+ "c2ccc3c": 254,
322
+ "sc1": 255,
323
+ "c3n": 256,
324
+ "c1cc2": 257,
325
+ "n1C": 258,
326
+ "CCCC2": 259,
327
+ "ncc1": 260,
328
+ "1CC1": 261,
329
+ "1CCCO1": 262,
330
+ "n1c": 263,
331
+ "1NC": 264,
332
+ "c2cccnc2": 265,
333
+ "+]([": 266,
334
+ "Cc2c": 267,
335
+ "c2ccco2": 268,
336
+ "N2CCOCC2": 269,
337
+ "1CCCC1": 270,
338
+ "n2C": 271,
339
+ "NN": 272,
340
+ "c1ccco1": 273,
341
+ "3CCC": 274,
342
+ "CCO1": 275,
343
+ "c2nn": 276,
344
+ "c4c": 277,
345
+ "cccc2": 278,
346
+ "Cc3ccccc3": 279,
347
+ "ccc21": 280,
348
+ "C1CCN": 281,
349
+ "N1CCOCC1": 282,
350
+ "Nc1c": 283,
351
+ "cc4": 284,
352
+ "CCCNC": 285,
353
+ "N2CCCC2": 286,
354
+ "Cc2cc": 287,
355
+ "+]=": 288,
356
+ "c1nc2ccccc2": 289,
357
+ "2CCCO2": 290,
358
+ "1CCS": 291,
359
+ "1CO": 292,
360
+ "2CCN": 293,
361
+ "C3CC3": 294,
362
+ "1CCCCC1": 295,
363
+ "cc2C": 296,
364
+ "-])[": 297,
365
+ "Cc2cccc": 298,
366
+ "C4": 299,
367
+ "NC1": 300,
368
+ "Cc1noc": 301,
369
+ "OC2": 302,
370
+ "Oc1ccc": 303,
371
+ "c2ccccc2C": 304,
372
+ "c2ccccc2F": 305,
373
+ "CCCC1": 306,
374
+ "Cc1csc": 307,
375
+ "c1ccccc1F": 308,
376
+ "2CCCC2": 309,
377
+ "CCCN": 310,
378
+ "c2ccccc2c1": 311,
379
+ "1O": 312,
380
+ "NC2": 313,
381
+ "c1ccccc1C": 314,
382
+ "ccc1C": 315,
383
+ "1c1ccc": 316,
384
+ "c1ccccn1": 317,
385
+ "ccc12": 318,
386
+ "ccc1F": 319,
387
+ "cn2": 320,
388
+ "n4": 321,
389
+ "c2nc3ccccc3": 322,
390
+ "cccn2": 323,
391
+ "c1OC": 324,
392
+ "c1cn": 325,
393
+ "nc12": 326,
394
+ "CCOCC3": 327,
395
+ "c1cccc2": 328,
396
+ "c2cc3": 329,
397
+ "no2": 330,
398
+ "c1ccccc1Cl": 331,
399
+ "c1ccncc1": 332,
400
+ "CC4": 333,
401
+ "2CCCN": 334,
402
+ "nc1C": 335,
403
+ "OCO2": 336,
404
+ "sc2": 337,
405
+ "c1ccccc1NC": 338,
406
+ "n1cc": 339,
407
+ "OCCO2": 340,
408
+ "CN2C": 341,
409
+ "c1cccnc1": 342,
410
+ "n2nc": 343,
411
+ "Cc3ccc": 344,
412
+ "2CCCCC2": 345,
413
+ "c2ccccc2Cl": 346,
414
+ "c5": 347,
415
+ "S1": 348,
416
+ "c2ccccn2": 349,
417
+ "c2cnn": 350,
418
+ "C1C": 351,
419
+ "nnc1": 352,
420
+ "CCCC3": 353,
421
+ "c1NC": 354,
422
+ "sc1C": 355,
423
+ "CCn1cc": 356,
424
+ "nc4": 357,
425
+ "Nc3ccc": 358,
426
+ "2CC2": 359,
427
+ "cc12": 360,
428
+ "c2s": 361,
429
+ "c2ccncc2": 362,
430
+ "Nc1ccc2c": 363,
431
+ "Nc1nc": 364,
432
+ "(\\": 365,
433
+ "cc2Cl": 366,
434
+ "COCC": 367,
435
+ "cc21": 368,
436
+ "cc2F": 369,
437
+ "c3ccccc23": 370,
438
+ "nc2c1": 371,
439
+ "on1": 372,
440
+ "c2cn": 373,
441
+ "c1csc": 374,
442
+ "c1cnc": 375,
443
+ "2C1": 376,
444
+ "N1CCCC1": 377,
445
+ "cs2": 378,
446
+ "ccc1Cl": 379,
447
+ "ccc1NC": 380,
448
+ "c1noc": 381,
449
+ "N3CCC": 382,
450
+ "CN1CC": 383,
451
+ "N4": 384,
452
+ "ccc1OC": 385,
453
+ "Cc1ncc": 386,
454
+ "NCc2ccc": 387,
455
+ ")([": 388,
456
+ "c3cccs3": 389,
457
+ "C1CCCC1": 390,
458
+ "c1F": 391,
459
+ "21": 392,
460
+ "sc2c1": 393,
461
+ "OCO3": 394,
462
+ "CCc1nn": 395,
463
+ "n3c": 396,
464
+ "32": 397,
465
+ "C1CCC": 398,
466
+ "C1CCCCC1": 399,
467
+ "CNS": 400,
468
+ "ncc3": 401,
469
+ "CN1CCN": 402,
470
+ "2c": 403,
471
+ "CCCO3": 404,
472
+ "nn1C": 405,
473
+ "@@](=": 406,
474
+ "c2ccccc2C1": 407,
475
+ "2CO": 408,
476
+ "CCCCC": 409,
477
+ "c3ccc4c": 410,
478
+ "c1cnn": 411,
479
+ "Cc1ccc2c": 412,
480
+ "CCO2": 413,
481
+ "c1O": 414,
482
+ "c3c2": 415,
483
+ "N3CCCC3": 416,
484
+ "CCOc1cc": 417,
485
+ "CSC": 418,
486
+ "CCc1cc": 419,
487
+ "cccc3": 420,
488
+ "c2csc": 421,
489
+ "c2noc": 422,
490
+ "Cn2c": 423,
491
+ "cc1S": 424,
492
+ "c3ccccc3C": 425,
493
+ "ccccc4": 426,
494
+ "c3cccnc3": 427,
495
+ "COc1ccc2c": 428,
496
+ "Cc1nnc": 429,
497
+ "c1Cl": 430,
498
+ "N2CCCCC2": 431,
499
+ "ccccc2": 432,
500
+ "Oc2ccc": 433,
501
+ "2CN": 434,
502
+ "c1s": 435,
503
+ "-])=": 436,
504
+ "C2CCN": 437,
505
+ "c3ncc": 438,
506
+ "c3C": 439,
507
+ "c3ccco3": 440,
508
+ "@](=": 441,
509
+ "Nc2nc": 442,
510
+ "ncn2": 443,
511
+ "N3CCOCC3": 444,
512
+ "c3nc4": 445,
513
+ "1S": 446,
514
+ "COc2ccc": 447,
515
+ "nc2ccccc21": 448,
516
+ "n2cccn2": 449,
517
+ "ccc2C": 450,
518
+ "c1cc2c": 451,
519
+ "no": 452,
520
+ "3CCCC": 453,
521
+ "cc2c1": 454,
522
+ "n12": 455,
523
+ "1c1ccccc1": 456,
524
+ "ccc1O": 457,
525
+ "Nc1n": 458,
526
+ "cc1Br": 459,
527
+ "C2C": 460,
528
+ "/[": 461,
529
+ "N3C": 462,
530
+ "c3ccccn3": 463,
531
+ "c1N": 464,
532
+ "CSc1ccc": 465,
533
+ "c1cncc": 466,
534
+ "OCCO3": 467,
535
+ "ccccc12": 468,
536
+ "CCCCC2": 469,
537
+ "2CCS": 470,
538
+ "c3ccccc3c2": 471,
539
+ "C2CCCC2": 472,
540
+ "3CCCO3": 473,
541
+ "c2sc": 474,
542
+ "CCCCC1": 475,
543
+ "nnn1": 476,
544
+ "ccccc3": 477,
545
+ "C2CCCCC2": 478,
546
+ "cccn1": 479,
547
+ "N3CCN": 480,
548
+ "3C2": 481,
549
+ "c3ccccc3F": 482,
550
+ "CN2CCN": 483,
551
+ "2CCO": 484,
552
+ "cccn3": 485,
553
+ "CSCC": 486,
554
+ "c2cnc": 487,
555
+ "CCn1nc": 488,
556
+ "OC3": 489,
557
+ "2O": 490,
558
+ "NCC1": 491,
559
+ "Cc1cnc": 492,
560
+ "c3nnc": 493,
561
+ "N1CCCCC1": 494,
562
+ "CCN2C": 495,
563
+ "CCc1nc": 496,
564
+ "N1CCO": 497,
565
+ "n1CC": 498,
566
+ "nn3": 499,
567
+ "Cn1nc": 500,
568
+ "S2": 501,
569
+ "n3cc": 502,
570
+ "cc1O": 503,
571
+ "c1nccs1": 504,
572
+ "c2cccc3": 505,
573
+ "CCOc1ccccc1": 506,
574
+ "oc1": 507,
575
+ "CCc1ccccc1": 508,
576
+ "ncn1": 509,
577
+ "c4cc": 510,
578
+ "CSc1nnc": 511,
579
+ "CCc1cccc": 512,
580
+ "Cc1cccs1": 513,
581
+ "12": 514,
582
+ "NCc1cc": 515,
583
+ "nc2C": 516,
584
+ "1CCCN1C": 517,
585
+ "3CCCC3": 518,
586
+ "n3C": 519,
587
+ "CCOc1cccc": 520,
588
+ "Nc2c": 521,
589
+ "2CCCN2C": 522,
590
+ "sc3": 523,
591
+ "+])[": 524,
592
+ "CCCN1C": 525,
593
+ "Oc1cccc": 526,
594
+ "1CS": 527,
595
+ "1Cc2ccccc2": 528,
596
+ "c1ccn": 529,
597
+ "NCc1ccccc1": 530,
598
+ "NCCN": 531,
599
+ "c3ccncc3": 532,
600
+ "Nc1nnc": 533,
601
+ "nn2C": 534,
602
+ "CCCS": 535,
603
+ "CC1CC": 536,
604
+ "NC1CC1": 537,
605
+ "c2nc3c": 538,
606
+ "cccc1Cl": 539,
607
+ "c1sc": 540,
608
+ "c1ccsc1": 541,
609
+ "N1CCCC": 542,
610
+ "CCCC4": 543,
611
+ "1c1cccc": 544,
612
+ "CCl": 545,
613
+ "ccn1": 546,
614
+ "OC1": 547,
615
+ "cc2C1": 548,
616
+ "Cc1cc2": 549,
617
+ "nc2n1": 550,
618
+ "c2cc3c": 551,
619
+ "1OCC": 552,
620
+ "on2": 553,
621
+ "Nc2ccc3c": 554,
622
+ "oc2": 555,
623
+ "1CCCCN1C": 556,
624
+ "ccc2F": 557,
625
+ "nc2c": 558,
626
+ "oc2c1": 559,
627
+ "NC2CC2": 560,
628
+ "nnc": 561,
629
+ "CCc1n": 562,
630
+ "NC3": 563,
631
+ "cccc1F": 564,
632
+ "CSc1n": 565,
633
+ "1CCOC1": 566,
634
+ "2NC": 567,
635
+ "nc21": 568,
636
+ "4CC": 569,
637
+ "c2ccsc2": 570,
638
+ "CC1CCN": 571,
639
+ "Nc1ccccc1C": 572,
640
+ "c2ncccn2": 573,
641
+ "c2nccs2": 574,
642
+ "Cc1ccsc1": 575,
643
+ "c3ccccc3C2": 576,
644
+ "c2n1": 577,
645
+ "2CS": 578,
646
+ "Nc3ccccc3": 579,
647
+ "1N": 580,
648
+ "Cn2cc": 581,
649
+ "CCNS": 582,
650
+ "Cn2": 583,
651
+ "CCCO2": 584,
652
+ "Cc1ccco1": 585,
653
+ "cccc1": 586,
654
+ "3CC3": 587,
655
+ "4C": 588,
656
+ "NCCNC": 589,
657
+ "Cc1ccccc1C": 590,
658
+ "c2cncc": 591,
659
+ "3CCCCC3": 592,
660
+ "Nc3cccc": 593,
661
+ "NCc1cccc": 594,
662
+ "OCO4": 595,
663
+ "Cc2nc": 596,
664
+ "nc2cc": 597,
665
+ "Cc1cnn": 598,
666
+ "CCN1CC": 599,
667
+ "CCc1c": 600,
668
+ "c1cc2ccccc2": 601,
669
+ "c32": 602,
670
+ "c1o": 603,
671
+ "c2ccccc2n1": 604,
672
+ "nc23": 605,
673
+ "CCOCC1": 606,
674
+ "Cc1cccc2": 607,
675
+ "N3CCCCC3": 608,
676
+ "CSc2nnc": 609,
677
+ "2c2ccc": 610,
678
+ "Cc3c": 611,
679
+ "1CCOC2": 612,
680
+ "CCCn1c": 613,
681
+ ")=[": 614,
682
+ "N2CCCC": 615,
683
+ "ccc2Cl": 616,
684
+ "Nc2ccccc2C": 617,
685
+ "c4ccccc43": 618,
686
+ "OCCC": 619,
687
+ "c3nc4ccccc4": 620,
688
+ "Oc1ccccc1": 621,
689
+ "c2nc3ccccc3s2": 622,
690
+ "2N": 623,
691
+ "cc32": 624,
692
+ "ccccc23": 625,
693
+ "c2oc": 626,
694
+ "Cc1cccnc1": 627,
695
+ "c1nc2ccccc2s1": 628,
696
+ "NC1CCCCC1": 629,
697
+ "c21": 630,
698
+ "ccc1Br": 631,
699
+ "Sc1nnc": 632,
700
+ "Oc2ccccc2": 633,
701
+ "1OC": 634,
702
+ "c3ccccc3Cl": 635,
703
+ "c3s": 636,
704
+ "SC1": 637,
705
+ "NCc2cc": 638,
706
+ "c2o": 639,
707
+ "CO2": 640,
708
+ "COCCN": 641,
709
+ "Nc3cc": 642,
710
+ "ccccc5": 643,
711
+ "COc1c": 644,
712
+ "C1N": 645,
713
+ "COc1ccccc1C": 646,
714
+ "COc1ccccc1NC": 647,
715
+ "Cc1o": 648,
716
+ "CSc1nc": 649,
717
+ "nc2s": 650,
718
+ "n2cncn2": 651,
719
+ "c2ccccc2O1": 652,
720
+ "n2cnn": 653,
721
+ "N2CCO": 654,
722
+ "ccc32": 655,
723
+ "c34": 656,
724
+ "Cc1ccccc1NC": 657,
725
+ "c3nn": 658,
726
+ "c2c1C": 659,
727
+ "no3": 660,
728
+ "1CCCCC": 661,
729
+ "ccc2c1": 662,
730
+ "3CCN": 663,
731
+ "n1nc": 664,
732
+ "oc1C": 665,
733
+ "Cc1ccccn1": 666,
734
+ "CCc2ccccc2": 667,
735
+ "n3nc": 668,
736
+ "Fc1ccc": 669,
737
+ "O3": 670,
738
+ "c1ccc2ccccc2c1": 671,
739
+ "n2CC": 672,
740
+ "CCCCNC": 673,
741
+ "c2Cl": 674,
742
+ "CCCc1cc": 675,
743
+ "c1CC": 676,
744
+ "Cc1nc2ccccc2": 677,
745
+ "Cc2ccco2": 678,
746
+ "CN1CCC": 679,
747
+ "CN2CCC": 680,
748
+ "2CCOC2": 681,
749
+ "c1cccc2ccccc12": 682,
750
+ "FC": 683,
751
+ "n2ccnc2": 684,
752
+ "CCOCC": 685,
753
+ "CCC4": 686,
754
+ "2C3": 687,
755
+ "c1ccnc": 688,
756
+ "2c2ccccc2": 689,
757
+ "(\\[": 690,
758
+ "Cc3cccc": 691,
759
+ "cc3C": 692,
760
+ "Cc3cc": 693,
761
+ "n2n1": 694,
762
+ "cc1N": 695,
763
+ "cccc2Cl": 696,
764
+ "cn": 697,
765
+ "NCc2ccccc2": 698,
766
+ "1CCc2c": 699,
767
+ "n1cccn1": 700,
768
+ "2CCOCC2": 701,
769
+ "NCC2": 702,
770
+ "cn3": 703,
771
+ "CCCCN": 704,
772
+ "NC1CC": 705,
773
+ "COCCNC": 706,
774
+ "Cc4ccccc4": 707,
775
+ "CCCOc1ccc": 708,
776
+ "COc1cccc2": 709,
777
+ "NC1CCCC1": 710,
778
+ "2CC3CC": 711,
779
+ "cc23": 712,
780
+ "C4CC4": 713,
781
+ "CN2CCOCC2": 714,
782
+ "CCc1noc": 715,
783
+ "c1nc2c": 716,
784
+ "c1CCC": 717,
785
+ "c3cnn": 718,
786
+ "cnn1C": 719,
787
+ "sc2nc": 720,
788
+ "Cc1cc2c": 721,
789
+ "ccc4": 722,
790
+ "c2C1": 723,
791
+ "nc2ccccc12": 724,
792
+ "1COc2ccccc2O1": 725,
793
+ "CCCO1": 726,
794
+ "n3cccn3": 727,
795
+ "Cc1cn": 728,
796
+ "cccc2F": 729,
797
+ "NCc2cccc": 730,
798
+ "Cc1ccc2nc": 731,
799
+ "N2CCCCCC2": 732,
800
+ "CCCn1cc": 733,
801
+ "c1ccccc1OC": 734,
802
+ "n23": 735,
803
+ "SCC1": 736,
804
+ "cc1CNC": 737,
805
+ "Nc1cccc2": 738,
806
+ "CN1CCO": 739,
807
+ "Cc2cccs2": 740,
808
+ "n2cccc2": 741,
809
+ "NCc1ccco1": 742,
810
+ "Nc2nnc": 743,
811
+ "c1ncccc1C": 744,
812
+ "CCCCC4": 745,
813
+ "s4": 746,
814
+ "CCN1CCN": 747,
815
+ "c3ncccn3": 748,
816
+ "cnn1": 749,
817
+ "1C2": 750,
818
+ "c2F": 751,
819
+ "CCCCC3": 752,
820
+ "1CCOCC1": 753,
821
+ "Nc1cccnc1": 754,
822
+ "N1CCC2": 755,
823
+ "n2nn": 756,
824
+ "Cc2csc": 757,
825
+ "c5ccccc5": 758,
826
+ "Cc1ccnc": 759,
827
+ "cc2s1": 760,
828
+ "C1NC": 761,
829
+ "COc1cc2c": 762,
830
+ "c2ccccn12": 763,
831
+ "3CCOCC3": 764,
832
+ "n1nn": 765,
833
+ "C1CCOCC1": 766,
834
+ "ncn3": 767,
835
+ "c3cc4": 768,
836
+ "n2c1": 769,
837
+ "ccc23": 770,
838
+ "CCc1nnc": 771,
839
+ "c2N": 772,
840
+ "CCN1": 773,
841
+ "C1CCC1": 774,
842
+ "CCOCC4": 775,
843
+ "c1ccccc1Br": 776,
844
+ "nn3C": 777,
845
+ "nn12": 778,
846
+ "nc3ccccc32": 779,
847
+ "CC1CCC": 780,
848
+ "Nc1cnn": 781,
849
+ "nc3c": 782,
850
+ "n1C1CC1": 783,
851
+ "c4ccccc34": 784,
852
+ "1Cc1ccccc1": 785,
853
+ "NCCO": 786,
854
+ "CNc1ccc": 787,
855
+ "N1CCc2cc": 788,
856
+ "nc3c2": 789,
857
+ "cccc1NC": 790,
858
+ "1CCCCCC1": 791,
859
+ "c2sccc2": 792,
860
+ "CC1CC1": 793,
861
+ "OCCCO2": 794,
862
+ "C1CN": 795,
863
+ "CCOCC2": 796,
864
+ "c2nnnn2": 797,
865
+ "CNc2ccc": 798,
866
+ "c2ncn": 799,
867
+ "C3CCCCC3": 800,
868
+ "c2cccnc21": 801,
869
+ "CN1": 802,
870
+ "N1CCc2ccccc21": 803,
871
+ "Cc1oc": 804,
872
+ "Oc2cc": 805,
873
+ "1CCCCO1": 806,
874
+ "Cn1ccc": 807,
875
+ "Cc1ccccc1F": 808,
876
+ "c2cc3ccccc3": 809,
877
+ "N1c1ccc": 810,
878
+ "o4": 811,
879
+ "Oc2cccc": 812,
880
+ "cc1OCC": 813,
881
+ "C3CCCC3": 814,
882
+ "c1ccc2": 815,
883
+ "ccc1N": 816,
884
+ "NC1CCN": 817,
885
+ "c2nc3ccc": 818,
886
+ "NCCc1ccc": 819,
887
+ "NC2CCCCC2": 820,
888
+ "+])(": 821,
889
+ "c1ncnc": 822,
890
+ "Cn2nc": 823,
891
+ "c2ccccc2OC": 824,
892
+ "Cc1ccn": 825,
893
+ "N1CCCN": 826,
894
+ "Cc2ccccn2": 827,
895
+ "12CC": 828,
896
+ "CCO3": 829,
897
+ "SCC2": 830,
898
+ "cccc1C": 831,
899
+ "1CCCc2ccccc21": 832,
900
+ "Cc2cccnc2": 833,
901
+ "Nc3nc": 834,
902
+ "c1CNC": 835,
903
+ "c2nccc": 836,
904
+ "N1CCOC": 837,
905
+ "nc2c1c": 838,
906
+ "SC2": 839,
907
+ "c3ccccc3n2": 840,
908
+ "OCCO4": 841,
909
+ "cc3F": 842,
910
+ "Cn1ncc2": 843,
911
+ "Cc1sc": 844,
912
+ "OCc2ccccc2": 845,
913
+ "Nc1nc2c": 846,
914
+ "23": 847,
915
+ "OCCN": 848,
916
+ "c2ccccc2N": 849,
917
+ "N1CCCCCC1": 850,
918
+ "c4nc": 851,
919
+ "cc3C2": 852,
920
+ "c1ccccc1O": 853,
921
+ "oc2cc": 854,
922
+ "2CO1": 855,
923
+ "Sc2ccc": 856,
924
+ "c1ccccc12": 857,
925
+ "c2nc3cc": 858,
926
+ "nc2ccccc2c1": 859,
927
+ "Nc1ncc": 860,
928
+ "2CCCCCC2": 861,
929
+ "ccc1OCC": 862,
930
+ "ccc2OC": 863,
931
+ "ccc2o1": 864,
932
+ "NC2CC": 865,
933
+ "c2ccnc": 866,
934
+ "CO1": 867,
935
+ "1CNC": 868,
936
+ "Nc1ccccc1F": 869,
937
+ "CN2CCO": 870,
938
+ "c2sc3c": 871,
939
+ "NCc3ccc": 872,
940
+ "NC3CC3": 873,
941
+ "Clc1ccc": 874,
942
+ "nc2s1": 875,
943
+ "c2cnccn2": 876,
944
+ "CCc1ccccc1NC": 877,
945
+ "NCCS": 878,
946
+ "NCc1cccnc1": 879,
947
+ "NCCCC": 880,
948
+ "cccnc12": 881,
949
+ "n3ccnc3": 882,
950
+ "oc12": 883,
951
+ "c2ccccc2O": 884,
952
+ "n2cnnn2": 885,
953
+ "2S": 886,
954
+ "c2n1CC": 887,
955
+ "C2CCOCC2": 888,
956
+ "noc2C": 889,
957
+ "cc3Cl": 890,
958
+ "CN1CCOCC1": 891,
959
+ "Cc1ccccc1Cl": 892,
960
+ "c1ccoc1": 893,
961
+ "c4ccco4": 894,
962
+ "cccc21": 895,
963
+ "c2ccoc2": 896,
964
+ "Cn1ccnc1": 897,
965
+ "c1cc2ccccc2o1": 898,
966
+ "Oc1cc": 899,
967
+ "Cc1ccncc1": 900,
968
+ "c3cncc": 901,
969
+ "c2cc1C": 902,
970
+ "c2ncnc": 903,
971
+ "CNc1cc": 904,
972
+ "c2ncnc3": 905,
973
+ "Cn1nn": 906,
974
+ "12C": 907,
975
+ "NC1CCC": 908,
976
+ "c2ccn": 909,
977
+ "c2ccc3ccccc3c2": 910,
978
+ "CCCOC": 911,
979
+ "Cc2ccccc2F": 912,
980
+ "CSc1cccc": 913,
981
+ "Cc1nc2c": 914,
982
+ "Cc1cncc": 915,
983
+ "Cc2ccccc2Cl": 916,
984
+ "CCc3ccccc3": 917,
985
+ "COc2ccccc2": 918,
986
+ "n3n2": 919,
987
+ "sc2n1": 920,
988
+ "CC2CC2": 921,
989
+ "COC2": 922,
990
+ "Nc1nccs1": 923,
991
+ "Sc1n": 924,
992
+ "oc2ccccc12": 925,
993
+ "c3ccsc3": 926,
994
+ "OCc2ccc": 927,
995
+ "CCOc1c": 928,
996
+ "ccc2n1": 929,
997
+ "5c": 930,
998
+ "NCc1nc": 931,
999
+ "c3n2": 932,
1000
+ "c2cccc3ccccc23": 933,
1001
+ "3c": 934,
1002
+ "Nc4ccc": 935,
1003
+ "CCc2ccc": 936,
1004
+ "COCCN1C": 937,
1005
+ "c2cs": 938,
1006
+ "cc3c": 939,
1007
+ "n3cncn3": 940,
1008
+ "OCc1ccccc1": 941,
1009
+ "C2CCC": 942,
1010
+ "1c1cc": 943,
1011
+ "Sc1ccc": 944,
1012
+ "nc2nc": 945,
1013
+ "NCCc1c": 946,
1014
+ "Cc1nc2cc": 947,
1015
+ "2c2cccc": 948,
1016
+ "3CS": 949,
1017
+ "nnc3": 950,
1018
+ "c2O": 951,
1019
+ "3CCCN": 952,
1020
+ "N4CCOCC4": 953,
1021
+ "2CCOC3": 954,
1022
+ "nc32": 955,
1023
+ "Cc2nnc": 956,
1024
+ "c2c3c": 957,
1025
+ "NCc1ccc2c": 958,
1026
+ "COCc1cc": 959,
1027
+ "cc2c": 960,
1028
+ "(=[": 961,
1029
+ "NC2CCCC2": 962,
1030
+ "1CCCCN": 963,
1031
+ "cc3s2": 964,
1032
+ "oc3": 965,
1033
+ "NCc1c": 966,
1034
+ "NCc2ccco2": 967,
1035
+ "n1n": 968,
1036
+ "c3cccc4": 969,
1037
+ "Cc1cs": 970,
1038
+ "c1oc": 971,
1039
+ "c2nnn": 972,
1040
+ "c1S": 973,
1041
+ "COc2cccc": 974,
1042
+ "2CCCCC": 975,
1043
+ "c4cccs4": 976,
1044
+ "c1cnccn1": 977,
1045
+ "Cc2ccc3c": 978,
1046
+ "nc2sc": 979,
1047
+ "Cn1n": 980,
1048
+ "ccc2O": 981,
1049
+ "c3sc": 982,
1050
+ "CCOc1ccccc1NC": 983,
1051
+ "c1nnc2ccccn12": 984,
1052
+ "c1nc2ccccc2c": 985,
1053
+ "CCSC": 986,
1054
+ "Nc2cnn": 987,
1055
+ "n1cccc1": 988,
1056
+ "c1nc2cc": 989,
1057
+ "c3ccccc3O2": 990,
1058
+ "c1Br": 991,
1059
+ "Nc1ccccc1Cl": 992,
1060
+ "nc2ccc": 993,
1061
+ "2CCC2": 994,
1062
+ "CS2": 995,
1063
+ "oc2C": 996,
1064
+ "n1N": 997,
1065
+ "N4CCCC4": 998,
1066
+ "c3nccs3": 999,
1067
+ "N1CCc2c": 1000,
1068
+ "N2CCCN": 1001,
1069
+ "cn12": 1002,
1070
+ "c2ncccc2C": 1003,
1071
+ "1c1cccs1": 1004,
1072
+ "cnc12": 1005,
1073
+ "OC2CCCC2": 1006,
1074
+ "nnc2": 1007,
1075
+ "3CCS": 1008,
1076
+ "CCCO4": 1009,
1077
+ "C2CCC2": 1010,
1078
+ "C3C": 1011,
1079
+ "c4ccccc4c3": 1012,
1080
+ "c1cs": 1013,
1081
+ "c1C1CC1": 1014,
1082
+ "n1cncn1": 1015,
1083
+ "CN3C": 1016,
1084
+ "c3noc": 1017,
1085
+ "NCCN1C": 1018,
1086
+ "CCc1ncc": 1019,
1087
+ "c1nc2ccc": 1020,
1088
+ "CCOC2": 1021,
1089
+ "N1N": 1022,
1090
+ "nc3C": 1023
1091
+ },
1092
+ "merges": [
1093
+ "c c",
1094
+ "C C",
1095
+ "c 1",
1096
+ "( =",
1097
+ "c 2",
1098
+ "c1 cc",
1099
+ "] (",
1100
+ "@ @",
1101
+ "N C",
1102
+ "c2 cc",
1103
+ "c 3",
1104
+ "CC C",
1105
+ "c1cc c",
1106
+ ") [",
1107
+ "cc 1",
1108
+ "N H",
1109
+ "+ ]",
1110
+ "c1cc cc",
1111
+ "C O",
1112
+ "1 CC",
1113
+ "c2cc cc",
1114
+ "n 1",
1115
+ "C l",
1116
+ "n c",
1117
+ "C 1",
1118
+ "c3 cc",
1119
+ "2 CC",
1120
+ "O C",
1121
+ "CC 1",
1122
+ "c2cc c",
1123
+ "c2cccc c2",
1124
+ "c1cccc c1",
1125
+ ") (",
1126
+ "( -",
1127
+ "C N",
1128
+ "( [",
1129
+ "cc 2",
1130
+ "n 2",
1131
+ "- ]",
1132
+ ") (=",
1133
+ "1 C",
1134
+ "n H",
1135
+ "C 2",
1136
+ "CC 2",
1137
+ "c1 n",
1138
+ "1 CCC",
1139
+ "c3cc cc",
1140
+ "-] )",
1141
+ "c1 c",
1142
+ "c2 c",
1143
+ "C c1ccc",
1144
+ "+ ](",
1145
+ "NH 2",
1146
+ "CC O",
1147
+ ") =",
1148
+ "c3cccc c3",
1149
+ "2 C",
1150
+ "CC N",
1151
+ "c 4",
1152
+ "C c1cc",
1153
+ "N 1CC",
1154
+ "n n",
1155
+ "CO c1ccc",
1156
+ "2 CCC",
1157
+ "s 1",
1158
+ "B r",
1159
+ "C NC",
1160
+ "C S",
1161
+ "c3cc c",
1162
+ "N 2CC",
1163
+ "c2 n",
1164
+ "cc c1",
1165
+ "+] (=",
1166
+ "c1 nc",
1167
+ "CO C",
1168
+ "o 1",
1169
+ "cc 3",
1170
+ "3 CC",
1171
+ "c1 C",
1172
+ "n c2",
1173
+ "cc cc",
1174
+ "N CC",
1175
+ "C 1CC",
1176
+ "n c1",
1177
+ "n 3",
1178
+ "c3 c",
1179
+ "+] [",
1180
+ "N c1ccc",
1181
+ "cc c",
1182
+ "C c1n",
1183
+ "CC 3",
1184
+ "]( [",
1185
+ "NH 3",
1186
+ "s 2",
1187
+ "C c1c",
1188
+ "c2 nc",
1189
+ "C n1",
1190
+ "cc c2",
1191
+ "C c1cccc",
1192
+ "s c",
1193
+ "C 2CC",
1194
+ "N 1CCC",
1195
+ "C 3",
1196
+ "O CC",
1197
+ "CCC 1",
1198
+ "1CC N",
1199
+ "o 2",
1200
+ "CO c1cc",
1201
+ "( /",
1202
+ "C c1ccccc1",
1203
+ "CCC 2",
1204
+ "@ ]",
1205
+ "CC OC",
1206
+ "@@ ]",
1207
+ "o c",
1208
+ "CC NC",
1209
+ "O 1",
1210
+ "N 1C",
1211
+ "cc1 C",
1212
+ "CC n1",
1213
+ "N c1cc",
1214
+ "+] )",
1215
+ "N1CC N",
1216
+ "N 2CCC",
1217
+ "c2ccccc2 1",
1218
+ ") /",
1219
+ "c4 cccc",
1220
+ "N 2C",
1221
+ "N c2ccc",
1222
+ "N 3",
1223
+ "N c1cccc",
1224
+ "CC CC",
1225
+ "S C",
1226
+ "CO c1cccc",
1227
+ "C c1nc",
1228
+ "n cc",
1229
+ "n2 c",
1230
+ "1 CN",
1231
+ "c2 c1",
1232
+ "N 1",
1233
+ "S CC",
1234
+ "C2CC 2",
1235
+ "cc1 F",
1236
+ "n c3",
1237
+ "CO c1ccccc1",
1238
+ "c4cccc c4",
1239
+ "C c2ccc",
1240
+ "c n1",
1241
+ "c1 2",
1242
+ "cc1 Cl",
1243
+ "C c2ccccc2",
1244
+ "CC S",
1245
+ "c1n nc",
1246
+ "3 C",
1247
+ "CCC O",
1248
+ "1CC CC",
1249
+ "C1CC 1",
1250
+ "c1cc c2c",
1251
+ "n n1",
1252
+ "1CCC N",
1253
+ "N2CC N",
1254
+ "c1n cc",
1255
+ "CCC 3",
1256
+ "c2 C",
1257
+ "N NC",
1258
+ "c2n cc",
1259
+ "N c1ccccc1",
1260
+ "CCO c1ccc",
1261
+ "O CO",
1262
+ "CCN 1C",
1263
+ "@ ](",
1264
+ "cc1 OC",
1265
+ "n o1",
1266
+ "O CC2",
1267
+ "O CC1",
1268
+ "N S",
1269
+ "N 2",
1270
+ "N c2cc",
1271
+ "n n2",
1272
+ "O 2",
1273
+ "c3 nc",
1274
+ "Cn1 c",
1275
+ "CN 1C",
1276
+ "@@ ](",
1277
+ "cc1 NC",
1278
+ "O CCO",
1279
+ "= [",
1280
+ "N 3CC",
1281
+ "c2n nc",
1282
+ "2CC CC",
1283
+ "CC c1ccc",
1284
+ "c s1",
1285
+ "CN 2CC",
1286
+ "NC c1ccc",
1287
+ "n2 cc",
1288
+ "c4 ccc",
1289
+ "s 3",
1290
+ "C 3CC",
1291
+ "c3ccccc3 2",
1292
+ "n cc2",
1293
+ "N CCC",
1294
+ "c2 3",
1295
+ "Cn1 cc",
1296
+ "c1n n",
1297
+ "c2ccc s2",
1298
+ "CCn1 c",
1299
+ "N c2cccc",
1300
+ "c1ccc s1",
1301
+ "cc c3",
1302
+ "N c2ccccc2",
1303
+ "o 3",
1304
+ "Cc1n n",
1305
+ "1CC O",
1306
+ "c2cccc c12",
1307
+ "C c1",
1308
+ "c2cc c3c",
1309
+ "s c1",
1310
+ "c3 n",
1311
+ "c1cc 2",
1312
+ "n1 C",
1313
+ "CC CC2",
1314
+ "n cc1",
1315
+ "1CC 1",
1316
+ "1CCC O1",
1317
+ "n1 c",
1318
+ "1 NC",
1319
+ "c2ccc nc2",
1320
+ "+]( [",
1321
+ "C c2c",
1322
+ "c2ccc o2",
1323
+ "N2CC OCC2",
1324
+ "1CC CC1",
1325
+ "n2 C",
1326
+ "N N",
1327
+ "c1ccc o1",
1328
+ "3 CCC",
1329
+ "CCO 1",
1330
+ "c2 nn",
1331
+ "c4 c",
1332
+ "cc cc2",
1333
+ "C c3ccccc3",
1334
+ "ccc2 1",
1335
+ "C1CC N",
1336
+ "N1CC OCC1",
1337
+ "N c1c",
1338
+ "cc 4",
1339
+ "CCC NC",
1340
+ "N2CC CC2",
1341
+ "C c2cc",
1342
+ "+] =",
1343
+ "c1n c2ccccc2",
1344
+ "2CCC O2",
1345
+ "1CC S",
1346
+ "1 CO",
1347
+ "2CC N",
1348
+ "C3CC 3",
1349
+ "1CC CCC1",
1350
+ "cc2 C",
1351
+ "-] )[",
1352
+ "C c2cccc",
1353
+ "C 4",
1354
+ "NC 1",
1355
+ "Cc1n oc",
1356
+ "OC 2",
1357
+ "O c1ccc",
1358
+ "c2ccccc2 C",
1359
+ "c2ccccc2 F",
1360
+ "CC CC1",
1361
+ "Cc1c sc",
1362
+ "c1ccccc1 F",
1363
+ "2CC CC2",
1364
+ "CCC N",
1365
+ "c2ccccc2 c1",
1366
+ "1 O",
1367
+ "NC 2",
1368
+ "c1ccccc1 C",
1369
+ "ccc1 C",
1370
+ "1 c1ccc",
1371
+ "c1cccc n1",
1372
+ "ccc1 2",
1373
+ "ccc1 F",
1374
+ "c n2",
1375
+ "n 4",
1376
+ "c2n c3ccccc3",
1377
+ "ccc n2",
1378
+ "c1 OC",
1379
+ "c1c n",
1380
+ "nc1 2",
1381
+ "CCO CC3",
1382
+ "c1cccc 2",
1383
+ "c2cc 3",
1384
+ "n o2",
1385
+ "c1ccccc1 Cl",
1386
+ "c1cc ncc1",
1387
+ "CC 4",
1388
+ "2CCC N",
1389
+ "n c1C",
1390
+ "OCO 2",
1391
+ "s c2",
1392
+ "c1ccccc1 NC",
1393
+ "n1 cc",
1394
+ "OCCO 2",
1395
+ "CN 2C",
1396
+ "c1ccc nc1",
1397
+ "n2 nc",
1398
+ "C c3ccc",
1399
+ "2CC CCC2",
1400
+ "c2ccccc2 Cl",
1401
+ "c 5",
1402
+ "S 1",
1403
+ "c2cccc n2",
1404
+ "c2c nn",
1405
+ "C1 C",
1406
+ "nn c1",
1407
+ "CC CC3",
1408
+ "c1 NC",
1409
+ "s c1C",
1410
+ "CCn1 cc",
1411
+ "nc 4",
1412
+ "N c3ccc",
1413
+ "2CC 2",
1414
+ "cc1 2",
1415
+ "c2 s",
1416
+ "c2cc ncc2",
1417
+ "Nc1cc c2c",
1418
+ "N c1nc",
1419
+ "( \\",
1420
+ "cc2 Cl",
1421
+ "CO CC",
1422
+ "cc2 1",
1423
+ "cc2 F",
1424
+ "c3cccc c23",
1425
+ "nc2 c1",
1426
+ "o n1",
1427
+ "c2c n",
1428
+ "c1c sc",
1429
+ "c1c nc",
1430
+ "2 C1",
1431
+ "N1CC CC1",
1432
+ "c s2",
1433
+ "ccc1 Cl",
1434
+ "ccc1 NC",
1435
+ "c1n oc",
1436
+ "N3 CCC",
1437
+ "CN 1CC",
1438
+ "N 4",
1439
+ "ccc1 OC",
1440
+ "Cc1n cc",
1441
+ "NC c2ccc",
1442
+ ")( [",
1443
+ "c3ccc s3",
1444
+ "C1CC CC1",
1445
+ "c1 F",
1446
+ "2 1",
1447
+ "s c2c1",
1448
+ "OCO 3",
1449
+ "CC c1nn",
1450
+ "n3 c",
1451
+ "3 2",
1452
+ "C1 CCC",
1453
+ "C1CC CCC1",
1454
+ "CN S",
1455
+ "n cc3",
1456
+ "CN 1CCN",
1457
+ "2 c",
1458
+ "CCCO 3",
1459
+ "nn1 C",
1460
+ "@@] (=",
1461
+ "c2ccccc2 C1",
1462
+ "2 CO",
1463
+ "CC CCC",
1464
+ "c3cc c4c",
1465
+ "c1c nn",
1466
+ "Cc1cc c2c",
1467
+ "CCO 2",
1468
+ "c1 O",
1469
+ "c3 c2",
1470
+ "N3CC CC3",
1471
+ "CCO c1cc",
1472
+ "CS C",
1473
+ "CC c1cc",
1474
+ "cc cc3",
1475
+ "c2c sc",
1476
+ "c2n oc",
1477
+ "C n2c",
1478
+ "cc1 S",
1479
+ "c3ccccc3 C",
1480
+ "cccc c4",
1481
+ "c3ccc nc3",
1482
+ "COc1cc c2c",
1483
+ "Cc1n nc",
1484
+ "c1 Cl",
1485
+ "N2CC CCC2",
1486
+ "cccc c2",
1487
+ "O c2ccc",
1488
+ "2 CN",
1489
+ "c1 s",
1490
+ "-]) =",
1491
+ "C2CC N",
1492
+ "c3 ncc",
1493
+ "c3 C",
1494
+ "c3ccc o3",
1495
+ "@] (=",
1496
+ "N c2nc",
1497
+ "nc n2",
1498
+ "N3 CCOCC3",
1499
+ "c3nc 4",
1500
+ "1 S",
1501
+ "CO c2ccc",
1502
+ "n c2ccccc21",
1503
+ "n2 cccn2",
1504
+ "ccc2 C",
1505
+ "c1cc2 c",
1506
+ "n o",
1507
+ "3CC CC",
1508
+ "cc2 c1",
1509
+ "n1 2",
1510
+ "1 c1ccccc1",
1511
+ "ccc1 O",
1512
+ "N c1n",
1513
+ "cc1 Br",
1514
+ "C2 C",
1515
+ "/ [",
1516
+ "N3 C",
1517
+ "c3cccc n3",
1518
+ "c1 N",
1519
+ "CS c1ccc",
1520
+ "c1c ncc",
1521
+ "OCCO 3",
1522
+ "cc ccc12",
1523
+ "CC CCC2",
1524
+ "2CC S",
1525
+ "c3ccccc3 c2",
1526
+ "C2CC CC2",
1527
+ "3 CCCO3",
1528
+ "c2 sc",
1529
+ "CC CCC1",
1530
+ "nn n1",
1531
+ "cccc c3",
1532
+ "C2CC CCC2",
1533
+ "ccc n1",
1534
+ "N3 CCN",
1535
+ "3 C2",
1536
+ "c3ccccc3 F",
1537
+ "CN2CC N",
1538
+ "2CC O",
1539
+ "ccc n3",
1540
+ "CS CC",
1541
+ "c2c nc",
1542
+ "CCn1 nc",
1543
+ "OC 3",
1544
+ "2 O",
1545
+ "N CC1",
1546
+ "Cc1c nc",
1547
+ "c3n nc",
1548
+ "N1CC CCC1",
1549
+ "CCN 2C",
1550
+ "CC c1nc",
1551
+ "N1CC O",
1552
+ "n 1CC",
1553
+ "nn 3",
1554
+ "Cn1 nc",
1555
+ "S 2",
1556
+ "n3 cc",
1557
+ "cc1 O",
1558
+ "c1ncc s1",
1559
+ "c2cccc 3",
1560
+ "CCO c1ccccc1",
1561
+ "o c1",
1562
+ "CC c1ccccc1",
1563
+ "nc n1",
1564
+ "c4 cc",
1565
+ "CS c1nnc",
1566
+ "CC c1cccc",
1567
+ "Cc1ccc s1",
1568
+ "1 2",
1569
+ "NC c1cc",
1570
+ "nc2 C",
1571
+ "1CCC N1C",
1572
+ "3CC CC3",
1573
+ "n3 C",
1574
+ "CCO c1cccc",
1575
+ "N c2c",
1576
+ "2CCC N2C",
1577
+ "s c3",
1578
+ "+] )[",
1579
+ "CCC N1C",
1580
+ "O c1cccc",
1581
+ "1C S",
1582
+ "1C c2ccccc2",
1583
+ "c1cc n",
1584
+ "NC c1ccccc1",
1585
+ "N CCN",
1586
+ "c3cc ncc3",
1587
+ "N c1nnc",
1588
+ "nn2 C",
1589
+ "CCC S",
1590
+ "CC 1CC",
1591
+ "NC 1CC1",
1592
+ "c2n c3c",
1593
+ "cc cc1Cl",
1594
+ "c1 sc",
1595
+ "c1cc sc1",
1596
+ "N1CC CC",
1597
+ "CCCC 4",
1598
+ "1 c1cccc",
1599
+ "CC l",
1600
+ "cc n1",
1601
+ "O C1",
1602
+ "cc2 C1",
1603
+ "Cc1cc 2",
1604
+ "nc2 n1",
1605
+ "c2cc3 c",
1606
+ "1 OCC",
1607
+ "o n2",
1608
+ "Nc2cc c3c",
1609
+ "o c2",
1610
+ "1CC CCN1C",
1611
+ "ccc2 F",
1612
+ "n c2c",
1613
+ "o c2c1",
1614
+ "NC 2CC2",
1615
+ "n nc",
1616
+ "CC c1n",
1617
+ "NC 3",
1618
+ "cc cc1F",
1619
+ "CS c1n",
1620
+ "1CCO C1",
1621
+ "2 NC",
1622
+ "nc2 1",
1623
+ "4 CC",
1624
+ "c2cc sc2",
1625
+ "CC 1CCN",
1626
+ "Nc1ccccc1 C",
1627
+ "c2n cccn2",
1628
+ "c2ncc s2",
1629
+ "Cc1cc sc1",
1630
+ "c3ccccc3 C2",
1631
+ "c2 n1",
1632
+ "2C S",
1633
+ "N c3ccccc3",
1634
+ "1 N",
1635
+ "C n2cc",
1636
+ "CCN S",
1637
+ "C n2",
1638
+ "CCCO 2",
1639
+ "Cc1ccc o1",
1640
+ "cc cc1",
1641
+ "3CC 3",
1642
+ "4 C",
1643
+ "NCC NC",
1644
+ "Cc1ccccc1 C",
1645
+ "c2c ncc",
1646
+ "3CC CCC3",
1647
+ "N c3cccc",
1648
+ "NC c1cccc",
1649
+ "OCO 4",
1650
+ "C c2nc",
1651
+ "n c2cc",
1652
+ "Cc1c nn",
1653
+ "CCN 1CC",
1654
+ "CC c1c",
1655
+ "c1cc2 ccccc2",
1656
+ "c3 2",
1657
+ "c1 o",
1658
+ "c2ccccc2 n1",
1659
+ "nc2 3",
1660
+ "CCO CC1",
1661
+ "Cc1cccc 2",
1662
+ "N3CC CCC3",
1663
+ "CS c2nnc",
1664
+ "2 c2ccc",
1665
+ "C c3c",
1666
+ "1CC OC2",
1667
+ "CCC n1c",
1668
+ ")= [",
1669
+ "N2CC CC",
1670
+ "ccc2 Cl",
1671
+ "Nc2ccccc2 C",
1672
+ "c4ccccc4 3",
1673
+ "O CCC",
1674
+ "c3nc4 ccccc4",
1675
+ "O c1ccccc1",
1676
+ "c2nc3ccccc3 s2",
1677
+ "2 N",
1678
+ "cc3 2",
1679
+ "cccc c23",
1680
+ "c2 oc",
1681
+ "Cc1ccc nc1",
1682
+ "c1nc2ccccc2 s1",
1683
+ "NC 1CCCCC1",
1684
+ "c2 1",
1685
+ "ccc1 Br",
1686
+ "S c1nnc",
1687
+ "O c2ccccc2",
1688
+ "1 OC",
1689
+ "c3ccccc3 Cl",
1690
+ "c3 s",
1691
+ "S C1",
1692
+ "NC c2cc",
1693
+ "c2 o",
1694
+ "CO 2",
1695
+ "CO CCN",
1696
+ "N c3cc",
1697
+ "cccc c5",
1698
+ "CO c1c",
1699
+ "C1 N",
1700
+ "COc1ccccc1 C",
1701
+ "COc1ccccc1 NC",
1702
+ "Cc1 o",
1703
+ "CS c1nc",
1704
+ "nc2 s",
1705
+ "n2c ncn2",
1706
+ "c2ccccc2 O1",
1707
+ "n2c nn",
1708
+ "N2CC O",
1709
+ "ccc3 2",
1710
+ "c3 4",
1711
+ "Cc1ccccc1 NC",
1712
+ "c3 nn",
1713
+ "c2 c1C",
1714
+ "n o3",
1715
+ "1CC CCC",
1716
+ "ccc2 c1",
1717
+ "3 CCN",
1718
+ "n1 nc",
1719
+ "o c1C",
1720
+ "Cc1cccc n1",
1721
+ "CC c2ccccc2",
1722
+ "n3 nc",
1723
+ "F c1ccc",
1724
+ "O 3",
1725
+ "c1cc c2ccccc2c1",
1726
+ "n 2CC",
1727
+ "CC CCNC",
1728
+ "c2 Cl",
1729
+ "CCC c1cc",
1730
+ "c1 CC",
1731
+ "Cc1n c2ccccc2",
1732
+ "Cc2ccc o2",
1733
+ "CN 1CCC",
1734
+ "CN 2CCC",
1735
+ "2CC OC2",
1736
+ "c1cccc2 ccccc12",
1737
+ "F C",
1738
+ "n2cc nc2",
1739
+ "CCO CC",
1740
+ "CCC 4",
1741
+ "2C 3",
1742
+ "c1cc nc",
1743
+ "2 c2ccccc2",
1744
+ "(\\ [",
1745
+ "C c3cccc",
1746
+ "cc3 C",
1747
+ "C c3cc",
1748
+ "n2 n1",
1749
+ "cc1 N",
1750
+ "cccc2 Cl",
1751
+ "c n",
1752
+ "NC c2ccccc2",
1753
+ "1CC c2c",
1754
+ "n1 cccn1",
1755
+ "2CC OCC2",
1756
+ "N CC2",
1757
+ "c n3",
1758
+ "CC CCN",
1759
+ "NC 1CC",
1760
+ "CO CCNC",
1761
+ "C c4ccccc4",
1762
+ "CCCO c1ccc",
1763
+ "COc1cccc 2",
1764
+ "NC 1CCCC1",
1765
+ "2CC 3CC",
1766
+ "cc2 3",
1767
+ "C4 CC4",
1768
+ "CN2CC OCC2",
1769
+ "CC c1noc",
1770
+ "c1n c2c",
1771
+ "c1 CCC",
1772
+ "c3c nn",
1773
+ "c nn1C",
1774
+ "s c2nc",
1775
+ "Cc1cc 2c",
1776
+ "cc c4",
1777
+ "c2 C1",
1778
+ "n c2ccccc12",
1779
+ "1CO c2ccccc2O1",
1780
+ "CCC O1",
1781
+ "n3 cccn3",
1782
+ "Cc1c n",
1783
+ "cccc2 F",
1784
+ "NC c2cccc",
1785
+ "Cc1cc c2nc",
1786
+ "N2CC CCCC2",
1787
+ "CCC n1cc",
1788
+ "c1ccccc1 OC",
1789
+ "n2 3",
1790
+ "S CC1",
1791
+ "cc1 CNC",
1792
+ "Nc1cccc 2",
1793
+ "CN 1CCO",
1794
+ "Cc2ccc s2",
1795
+ "n2cc cc2",
1796
+ "NCc1ccc o1",
1797
+ "N c2nnc",
1798
+ "c1ncc cc1C",
1799
+ "CCCCC 4",
1800
+ "s 4",
1801
+ "CCN 1CCN",
1802
+ "c3n cccn3",
1803
+ "c nn1",
1804
+ "1C 2",
1805
+ "c2 F",
1806
+ "CC CCC3",
1807
+ "1CC OCC1",
1808
+ "Nc1ccc nc1",
1809
+ "N1CCC 2",
1810
+ "n2 nn",
1811
+ "Cc2c sc",
1812
+ "c5 ccccc5",
1813
+ "Cc1cc nc",
1814
+ "cc2 s1",
1815
+ "C1 NC",
1816
+ "COc1cc 2c",
1817
+ "c2cccc n12",
1818
+ "3 CCOCC3",
1819
+ "n1 nn",
1820
+ "C1CC OCC1",
1821
+ "nc n3",
1822
+ "c3cc 4",
1823
+ "n2 c1",
1824
+ "ccc2 3",
1825
+ "CC c1nnc",
1826
+ "c2 N",
1827
+ "CCN 1",
1828
+ "C1 CCC1",
1829
+ "CCO CC4",
1830
+ "c1ccccc1 Br",
1831
+ "nn 3C",
1832
+ "nn1 2",
1833
+ "n c3ccccc32",
1834
+ "CC1 CCC",
1835
+ "Nc1c nn",
1836
+ "n c3c",
1837
+ "n1 C1CC1",
1838
+ "c4cccc c34",
1839
+ "1C c1ccccc1",
1840
+ "N CCO",
1841
+ "CN c1ccc",
1842
+ "N1CC c2cc",
1843
+ "nc3 c2",
1844
+ "cc cc1NC",
1845
+ "1CCCC CC1",
1846
+ "c2s ccc2",
1847
+ "CC 1CC1",
1848
+ "O CCCO2",
1849
+ "C1 CN",
1850
+ "CCO CC2",
1851
+ "c2nn nn2",
1852
+ "CN c2ccc",
1853
+ "c2nc n",
1854
+ "C3CC CCC3",
1855
+ "c2cccnc2 1",
1856
+ "CN 1",
1857
+ "N1CC c2ccccc21",
1858
+ "Cc1 oc",
1859
+ "O c2cc",
1860
+ "1CC CCO1",
1861
+ "Cn1 ccc",
1862
+ "Cc1ccccc1 F",
1863
+ "c2cc3 ccccc3",
1864
+ "N1 c1ccc",
1865
+ "o 4",
1866
+ "O c2cccc",
1867
+ "cc1 OCC",
1868
+ "C3CC CC3",
1869
+ "c1cc c2",
1870
+ "ccc1 N",
1871
+ "NC 1CCN",
1872
+ "c2n c3ccc",
1873
+ "NCC c1ccc",
1874
+ "NC 2CCCCC2",
1875
+ "+] )(",
1876
+ "c1nc nc",
1877
+ "C n2nc",
1878
+ "c2ccccc2 OC",
1879
+ "Cc1cc n",
1880
+ "N1CCC N",
1881
+ "Cc2cccc n2",
1882
+ "1 2CC",
1883
+ "CCO 3",
1884
+ "S CC2",
1885
+ "cc cc1C",
1886
+ "1CCC c2ccccc21",
1887
+ "Cc2ccc nc2",
1888
+ "N c3nc",
1889
+ "c1 CNC",
1890
+ "c2n ccc",
1891
+ "N1CC OC",
1892
+ "nc2 c1c",
1893
+ "S C2",
1894
+ "c3ccccc3 n2",
1895
+ "OCCO 4",
1896
+ "cc3 F",
1897
+ "Cn1 ncc2",
1898
+ "Cc1 sc",
1899
+ "OC c2ccccc2",
1900
+ "Nc1n c2c",
1901
+ "2 3",
1902
+ "O CCN",
1903
+ "c2ccccc2 N",
1904
+ "N1CC CCCC1",
1905
+ "c4 nc",
1906
+ "cc3 C2",
1907
+ "c1ccccc1 O",
1908
+ "o c2cc",
1909
+ "2CO 1",
1910
+ "S c2ccc",
1911
+ "c1ccccc1 2",
1912
+ "c2n c3cc",
1913
+ "n c2ccccc2c1",
1914
+ "N c1ncc",
1915
+ "2CCCC CC2",
1916
+ "ccc1 OCC",
1917
+ "ccc2 OC",
1918
+ "ccc2 o1",
1919
+ "NC 2CC",
1920
+ "c2cc nc",
1921
+ "CO 1",
1922
+ "1C NC",
1923
+ "Nc1ccccc1 F",
1924
+ "CN2CC O",
1925
+ "c2s c3c",
1926
+ "NC c3ccc",
1927
+ "NC 3CC3",
1928
+ "Cl c1ccc",
1929
+ "nc2 s1",
1930
+ "c2cncc n2",
1931
+ "CC c1ccccc1NC",
1932
+ "NCC S",
1933
+ "NCc1ccc nc1",
1934
+ "NCC CC",
1935
+ "ccc nc12",
1936
+ "n3cc nc3",
1937
+ "o c12",
1938
+ "c2ccccc2 O",
1939
+ "n2cnn n2",
1940
+ "2 S",
1941
+ "c2n 1CC",
1942
+ "C2CC OCC2",
1943
+ "no c2C",
1944
+ "cc3 Cl",
1945
+ "CN1CC OCC1",
1946
+ "Cc1ccccc1 Cl",
1947
+ "c1cc oc1",
1948
+ "c4ccc o4",
1949
+ "cccc2 1",
1950
+ "c2cc oc2",
1951
+ "Cn1cc nc1",
1952
+ "c1cc2ccccc2 o1",
1953
+ "O c1cc",
1954
+ "Cc1cc ncc1",
1955
+ "c3c ncc",
1956
+ "c2cc 1C",
1957
+ "c2nc nc",
1958
+ "CN c1cc",
1959
+ "c2nc nc3",
1960
+ "Cn1 nn",
1961
+ "1 2C",
1962
+ "NC 1CCC",
1963
+ "c2cc n",
1964
+ "c2cc c3ccccc3c2",
1965
+ "CCC OC",
1966
+ "Cc2ccccc2 F",
1967
+ "CS c1cccc",
1968
+ "Cc1n c2c",
1969
+ "Cc1c ncc",
1970
+ "Cc2ccccc2 Cl",
1971
+ "CC c3ccccc3",
1972
+ "CO c2ccccc2",
1973
+ "n3 n2",
1974
+ "sc2 n1",
1975
+ "CC 2CC2",
1976
+ "CO C2",
1977
+ "N c1nccs1",
1978
+ "S c1n",
1979
+ "o c2ccccc12",
1980
+ "c3cc sc3",
1981
+ "OC c2ccc",
1982
+ "CCO c1c",
1983
+ "ccc2 n1",
1984
+ "5 c",
1985
+ "NC c1nc",
1986
+ "c3 n2",
1987
+ "c2cccc3 ccccc23",
1988
+ "3 c",
1989
+ "N c4ccc",
1990
+ "CC c2ccc",
1991
+ "CO CCN1C",
1992
+ "c2c s",
1993
+ "cc3 c",
1994
+ "n3c ncn3",
1995
+ "OC c1ccccc1",
1996
+ "C2 CCC",
1997
+ "1 c1cc",
1998
+ "S c1ccc",
1999
+ "nc2 nc",
2000
+ "NCC c1c",
2001
+ "Cc1n c2cc",
2002
+ "2 c2cccc",
2003
+ "3 CS",
2004
+ "nn c3",
2005
+ "c2 O",
2006
+ "3CCC N",
2007
+ "N4 CCOCC4",
2008
+ "2CC OC3",
2009
+ "nc3 2",
2010
+ "C c2nnc",
2011
+ "c2 c3c",
2012
+ "NC c1ccc2c",
2013
+ "CO Cc1cc",
2014
+ "cc2 c",
2015
+ "(= [",
2016
+ "NC 2CCCC2",
2017
+ "1CC CCN",
2018
+ "cc3 s2",
2019
+ "o c3",
2020
+ "NC c1c",
2021
+ "NC c2ccco2",
2022
+ "n1 n",
2023
+ "c3cccc 4",
2024
+ "Cc1c s",
2025
+ "c1 oc",
2026
+ "c2nn n",
2027
+ "c1 S",
2028
+ "CO c2cccc",
2029
+ "2CC CCC",
2030
+ "c4ccc s4",
2031
+ "c1cncc n1",
2032
+ "C c2ccc3c",
2033
+ "nc2 sc",
2034
+ "Cn1 n",
2035
+ "ccc2 O",
2036
+ "c3 sc",
2037
+ "CCO c1ccccc1NC",
2038
+ "c1nn c2ccccn12",
2039
+ "c1nc2ccccc2 c",
2040
+ "CC SC",
2041
+ "N c2cnn",
2042
+ "n1cc cc1",
2043
+ "c1n c2cc",
2044
+ "c3ccccc3 O2",
2045
+ "c1 Br",
2046
+ "Nc1ccccc1 Cl",
2047
+ "n c2ccc",
2048
+ "2CCC 2",
2049
+ "CS 2",
2050
+ "o c2C",
2051
+ "n1 N",
2052
+ "N4 CCCC4",
2053
+ "c3ncc s3",
2054
+ "N1CC c2c",
2055
+ "N2CCC N",
2056
+ "cn1 2",
2057
+ "c2ncc cc2C",
2058
+ "1 c1cccs1",
2059
+ "c nc12",
2060
+ "OC 2CCCC2",
2061
+ "nn c2",
2062
+ "3CC S",
2063
+ "CCCO 4",
2064
+ "C2 CCC2",
2065
+ "C3 C",
2066
+ "c4ccccc4 c3",
2067
+ "c1c s",
2068
+ "c1C 1CC1",
2069
+ "n1c ncn1",
2070
+ "CN 3C",
2071
+ "c3n oc",
2072
+ "N CCN1C",
2073
+ "CC c1ncc",
2074
+ "c1n c2ccc",
2075
+ "CCOC 2",
2076
+ "N1 N",
2077
+ "nc3 C"
2078
+ ]
2079
+ }
2080
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "mask_token": "[MASK]",
47
+ "model_max_length": 1000000000000000019884624838656,
48
+ "pad_token": "[PAD]",
49
+ "sep_token": "[SEP]",
50
+ "tokenizer_class": "PreTrainedTokenizerFast",
51
+ "unk_token": "[UNK]"
52
+ }
trainer_state.json ADDED
@@ -0,0 +1,3839 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.1968414783477783,
3
+ "best_model_checkpoint": "./results/models/checkpoint-307584",
4
+ "epoch": 16.0,
5
+ "eval_steps": 500,
6
+ "global_step": 307584,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 0.0019989596337910945,
14
+ "loss": 2.0581,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.05,
19
+ "learning_rate": 0.001997919267582189,
20
+ "loss": 1.5829,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.08,
25
+ "learning_rate": 0.0019968789013732834,
26
+ "loss": 1.5037,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.1,
31
+ "learning_rate": 0.001995838535164378,
32
+ "loss": 1.4638,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.13,
37
+ "learning_rate": 0.0019947981689554723,
38
+ "loss": 1.4347,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.16,
43
+ "learning_rate": 0.001993757802746567,
44
+ "loss": 1.4135,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.18,
49
+ "learning_rate": 0.0019927174365376612,
50
+ "loss": 1.3959,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.21,
55
+ "learning_rate": 0.0019916770703287557,
56
+ "loss": 1.3816,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.23,
61
+ "learning_rate": 0.00199063670411985,
62
+ "loss": 1.3715,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.26,
67
+ "learning_rate": 0.0019895963379109446,
68
+ "loss": 1.3634,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.29,
73
+ "learning_rate": 0.001988555971702039,
74
+ "loss": 1.354,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.31,
79
+ "learning_rate": 0.0019875156054931335,
80
+ "loss": 1.3475,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.34,
85
+ "learning_rate": 0.001986475239284228,
86
+ "loss": 1.3392,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.36,
91
+ "learning_rate": 0.0019854348730753224,
92
+ "loss": 1.3336,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 0.39,
97
+ "learning_rate": 0.001984394506866417,
98
+ "loss": 1.3283,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 0.42,
103
+ "learning_rate": 0.0019833541406575114,
104
+ "loss": 1.3234,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.44,
109
+ "learning_rate": 0.001982313774448606,
110
+ "loss": 1.321,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.47,
115
+ "learning_rate": 0.0019812734082397003,
116
+ "loss": 1.3154,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.49,
121
+ "learning_rate": 0.0019802330420307947,
122
+ "loss": 1.3126,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.52,
127
+ "learning_rate": 0.0019791926758218896,
128
+ "loss": 1.3087,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 0.55,
133
+ "learning_rate": 0.001978152309612984,
134
+ "loss": 1.3058,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 0.57,
139
+ "learning_rate": 0.001977111943404078,
140
+ "loss": 1.3022,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 0.6,
145
+ "learning_rate": 0.0019760715771951726,
146
+ "loss": 1.3002,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 0.62,
151
+ "learning_rate": 0.001975031210986267,
152
+ "loss": 1.2965,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 0.65,
157
+ "learning_rate": 0.0019739908447773615,
158
+ "loss": 1.2948,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 0.68,
163
+ "learning_rate": 0.0019729504785684564,
164
+ "loss": 1.2932,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 0.7,
169
+ "learning_rate": 0.001971910112359551,
170
+ "loss": 1.289,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 0.73,
175
+ "learning_rate": 0.0019708697461506453,
176
+ "loss": 1.2894,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 0.75,
181
+ "learning_rate": 0.0019698293799417393,
182
+ "loss": 1.2862,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 0.78,
187
+ "learning_rate": 0.0019687890137328337,
188
+ "loss": 1.2827,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 0.81,
193
+ "learning_rate": 0.0019677486475239286,
194
+ "loss": 1.2818,
195
+ "step": 15500
196
+ },
197
+ {
198
+ "epoch": 0.83,
199
+ "learning_rate": 0.001966708281315023,
200
+ "loss": 1.2813,
201
+ "step": 16000
202
+ },
203
+ {
204
+ "epoch": 0.86,
205
+ "learning_rate": 0.0019656679151061176,
206
+ "loss": 1.279,
207
+ "step": 16500
208
+ },
209
+ {
210
+ "epoch": 0.88,
211
+ "learning_rate": 0.001964627548897212,
212
+ "loss": 1.276,
213
+ "step": 17000
214
+ },
215
+ {
216
+ "epoch": 0.91,
217
+ "learning_rate": 0.0019635871826883065,
218
+ "loss": 1.2761,
219
+ "step": 17500
220
+ },
221
+ {
222
+ "epoch": 0.94,
223
+ "learning_rate": 0.0019625468164794005,
224
+ "loss": 1.2737,
225
+ "step": 18000
226
+ },
227
+ {
228
+ "epoch": 0.96,
229
+ "learning_rate": 0.0019615064502704954,
230
+ "loss": 1.2729,
231
+ "step": 18500
232
+ },
233
+ {
234
+ "epoch": 0.99,
235
+ "learning_rate": 0.00196046608406159,
236
+ "loss": 1.2708,
237
+ "step": 19000
238
+ },
239
+ {
240
+ "epoch": 1.0,
241
+ "eval_loss": 1.28058660030365,
242
+ "eval_runtime": 0.6137,
243
+ "eval_samples_per_second": 1629.563,
244
+ "eval_steps_per_second": 3.259,
245
+ "step": 19224
246
+ },
247
+ {
248
+ "epoch": 1.01,
249
+ "learning_rate": 0.0019594257178526843,
250
+ "loss": 1.2682,
251
+ "step": 19500
252
+ },
253
+ {
254
+ "epoch": 1.04,
255
+ "learning_rate": 0.0019583853516437788,
256
+ "loss": 1.2641,
257
+ "step": 20000
258
+ },
259
+ {
260
+ "epoch": 1.07,
261
+ "learning_rate": 0.001957344985434873,
262
+ "loss": 1.2635,
263
+ "step": 20500
264
+ },
265
+ {
266
+ "epoch": 1.09,
267
+ "learning_rate": 0.0019563046192259677,
268
+ "loss": 1.265,
269
+ "step": 21000
270
+ },
271
+ {
272
+ "epoch": 1.12,
273
+ "learning_rate": 0.001955264253017062,
274
+ "loss": 1.2636,
275
+ "step": 21500
276
+ },
277
+ {
278
+ "epoch": 1.14,
279
+ "learning_rate": 0.0019542238868081566,
280
+ "loss": 1.2617,
281
+ "step": 22000
282
+ },
283
+ {
284
+ "epoch": 1.17,
285
+ "learning_rate": 0.001953183520599251,
286
+ "loss": 1.2605,
287
+ "step": 22500
288
+ },
289
+ {
290
+ "epoch": 1.2,
291
+ "learning_rate": 0.0019521431543903455,
292
+ "loss": 1.2585,
293
+ "step": 23000
294
+ },
295
+ {
296
+ "epoch": 1.22,
297
+ "learning_rate": 0.0019511027881814397,
298
+ "loss": 1.2583,
299
+ "step": 23500
300
+ },
301
+ {
302
+ "epoch": 1.25,
303
+ "learning_rate": 0.0019500624219725344,
304
+ "loss": 1.2565,
305
+ "step": 24000
306
+ },
307
+ {
308
+ "epoch": 1.27,
309
+ "learning_rate": 0.0019490220557636289,
310
+ "loss": 1.2558,
311
+ "step": 24500
312
+ },
313
+ {
314
+ "epoch": 1.3,
315
+ "learning_rate": 0.0019479816895547233,
316
+ "loss": 1.2523,
317
+ "step": 25000
318
+ },
319
+ {
320
+ "epoch": 1.33,
321
+ "learning_rate": 0.0019469413233458178,
322
+ "loss": 1.2553,
323
+ "step": 25500
324
+ },
325
+ {
326
+ "epoch": 1.35,
327
+ "learning_rate": 0.0019459009571369122,
328
+ "loss": 1.2539,
329
+ "step": 26000
330
+ },
331
+ {
332
+ "epoch": 1.38,
333
+ "learning_rate": 0.0019448605909280067,
334
+ "loss": 1.2521,
335
+ "step": 26500
336
+ },
337
+ {
338
+ "epoch": 1.4,
339
+ "learning_rate": 0.0019438202247191011,
340
+ "loss": 1.2511,
341
+ "step": 27000
342
+ },
343
+ {
344
+ "epoch": 1.43,
345
+ "learning_rate": 0.0019427798585101956,
346
+ "loss": 1.2517,
347
+ "step": 27500
348
+ },
349
+ {
350
+ "epoch": 1.46,
351
+ "learning_rate": 0.00194173949230129,
352
+ "loss": 1.2497,
353
+ "step": 28000
354
+ },
355
+ {
356
+ "epoch": 1.48,
357
+ "learning_rate": 0.0019406991260923845,
358
+ "loss": 1.2495,
359
+ "step": 28500
360
+ },
361
+ {
362
+ "epoch": 1.51,
363
+ "learning_rate": 0.001939658759883479,
364
+ "loss": 1.2488,
365
+ "step": 29000
366
+ },
367
+ {
368
+ "epoch": 1.53,
369
+ "learning_rate": 0.0019386183936745737,
370
+ "loss": 1.248,
371
+ "step": 29500
372
+ },
373
+ {
374
+ "epoch": 1.56,
375
+ "learning_rate": 0.001937578027465668,
376
+ "loss": 1.2471,
377
+ "step": 30000
378
+ },
379
+ {
380
+ "epoch": 1.59,
381
+ "learning_rate": 0.0019365376612567623,
382
+ "loss": 1.248,
383
+ "step": 30500
384
+ },
385
+ {
386
+ "epoch": 1.61,
387
+ "learning_rate": 0.0019354972950478568,
388
+ "loss": 1.2446,
389
+ "step": 31000
390
+ },
391
+ {
392
+ "epoch": 1.64,
393
+ "learning_rate": 0.0019344569288389513,
394
+ "loss": 1.2447,
395
+ "step": 31500
396
+ },
397
+ {
398
+ "epoch": 1.66,
399
+ "learning_rate": 0.0019334165626300457,
400
+ "loss": 1.2439,
401
+ "step": 32000
402
+ },
403
+ {
404
+ "epoch": 1.69,
405
+ "learning_rate": 0.0019323761964211404,
406
+ "loss": 1.2442,
407
+ "step": 32500
408
+ },
409
+ {
410
+ "epoch": 1.72,
411
+ "learning_rate": 0.0019313358302122348,
412
+ "loss": 1.2444,
413
+ "step": 33000
414
+ },
415
+ {
416
+ "epoch": 1.74,
417
+ "learning_rate": 0.0019302954640033293,
418
+ "loss": 1.2417,
419
+ "step": 33500
420
+ },
421
+ {
422
+ "epoch": 1.77,
423
+ "learning_rate": 0.0019292550977944235,
424
+ "loss": 1.2429,
425
+ "step": 34000
426
+ },
427
+ {
428
+ "epoch": 1.79,
429
+ "learning_rate": 0.001928214731585518,
430
+ "loss": 1.2398,
431
+ "step": 34500
432
+ },
433
+ {
434
+ "epoch": 1.82,
435
+ "learning_rate": 0.0019271743653766125,
436
+ "loss": 1.2419,
437
+ "step": 35000
438
+ },
439
+ {
440
+ "epoch": 1.85,
441
+ "learning_rate": 0.0019261339991677071,
442
+ "loss": 1.2393,
443
+ "step": 35500
444
+ },
445
+ {
446
+ "epoch": 1.87,
447
+ "learning_rate": 0.0019250936329588016,
448
+ "loss": 1.2396,
449
+ "step": 36000
450
+ },
451
+ {
452
+ "epoch": 1.9,
453
+ "learning_rate": 0.001924053266749896,
454
+ "loss": 1.2374,
455
+ "step": 36500
456
+ },
457
+ {
458
+ "epoch": 1.92,
459
+ "learning_rate": 0.0019230129005409905,
460
+ "loss": 1.24,
461
+ "step": 37000
462
+ },
463
+ {
464
+ "epoch": 1.95,
465
+ "learning_rate": 0.001921972534332085,
466
+ "loss": 1.2377,
467
+ "step": 37500
468
+ },
469
+ {
470
+ "epoch": 1.98,
471
+ "learning_rate": 0.0019209321681231794,
472
+ "loss": 1.2387,
473
+ "step": 38000
474
+ },
475
+ {
476
+ "epoch": 2.0,
477
+ "eval_loss": 1.2544126510620117,
478
+ "eval_runtime": 0.5912,
479
+ "eval_samples_per_second": 1691.485,
480
+ "eval_steps_per_second": 3.383,
481
+ "step": 38448
482
+ },
483
+ {
484
+ "epoch": 2.0,
485
+ "learning_rate": 0.0019198918019142739,
486
+ "loss": 1.2349,
487
+ "step": 38500
488
+ },
489
+ {
490
+ "epoch": 2.03,
491
+ "learning_rate": 0.0019188514357053683,
492
+ "loss": 1.2314,
493
+ "step": 39000
494
+ },
495
+ {
496
+ "epoch": 2.05,
497
+ "learning_rate": 0.0019178110694964628,
498
+ "loss": 1.2342,
499
+ "step": 39500
500
+ },
501
+ {
502
+ "epoch": 2.08,
503
+ "learning_rate": 0.0019167707032875572,
504
+ "loss": 1.2318,
505
+ "step": 40000
506
+ },
507
+ {
508
+ "epoch": 2.11,
509
+ "learning_rate": 0.0019157303370786517,
510
+ "loss": 1.231,
511
+ "step": 40500
512
+ },
513
+ {
514
+ "epoch": 2.13,
515
+ "learning_rate": 0.0019146899708697464,
516
+ "loss": 1.2321,
517
+ "step": 41000
518
+ },
519
+ {
520
+ "epoch": 2.16,
521
+ "learning_rate": 0.0019136496046608406,
522
+ "loss": 1.2315,
523
+ "step": 41500
524
+ },
525
+ {
526
+ "epoch": 2.18,
527
+ "learning_rate": 0.001912609238451935,
528
+ "loss": 1.2308,
529
+ "step": 42000
530
+ },
531
+ {
532
+ "epoch": 2.21,
533
+ "learning_rate": 0.0019115688722430295,
534
+ "loss": 1.2303,
535
+ "step": 42500
536
+ },
537
+ {
538
+ "epoch": 2.24,
539
+ "learning_rate": 0.001910528506034124,
540
+ "loss": 1.2297,
541
+ "step": 43000
542
+ },
543
+ {
544
+ "epoch": 2.26,
545
+ "learning_rate": 0.0019094881398252184,
546
+ "loss": 1.2295,
547
+ "step": 43500
548
+ },
549
+ {
550
+ "epoch": 2.29,
551
+ "learning_rate": 0.0019084477736163131,
552
+ "loss": 1.2271,
553
+ "step": 44000
554
+ },
555
+ {
556
+ "epoch": 2.31,
557
+ "learning_rate": 0.0019074074074074076,
558
+ "loss": 1.2275,
559
+ "step": 44500
560
+ },
561
+ {
562
+ "epoch": 2.34,
563
+ "learning_rate": 0.0019063670411985018,
564
+ "loss": 1.2284,
565
+ "step": 45000
566
+ },
567
+ {
568
+ "epoch": 2.37,
569
+ "learning_rate": 0.0019053266749895963,
570
+ "loss": 1.2327,
571
+ "step": 45500
572
+ },
573
+ {
574
+ "epoch": 2.39,
575
+ "learning_rate": 0.0019042863087806907,
576
+ "loss": 1.2286,
577
+ "step": 46000
578
+ },
579
+ {
580
+ "epoch": 2.42,
581
+ "learning_rate": 0.0019032459425717854,
582
+ "loss": 1.2293,
583
+ "step": 46500
584
+ },
585
+ {
586
+ "epoch": 2.44,
587
+ "learning_rate": 0.0019022055763628799,
588
+ "loss": 1.2263,
589
+ "step": 47000
590
+ },
591
+ {
592
+ "epoch": 2.47,
593
+ "learning_rate": 0.0019011652101539743,
594
+ "loss": 1.2297,
595
+ "step": 47500
596
+ },
597
+ {
598
+ "epoch": 2.5,
599
+ "learning_rate": 0.0019001248439450688,
600
+ "loss": 1.227,
601
+ "step": 48000
602
+ },
603
+ {
604
+ "epoch": 2.52,
605
+ "learning_rate": 0.001899084477736163,
606
+ "loss": 1.2263,
607
+ "step": 48500
608
+ },
609
+ {
610
+ "epoch": 2.55,
611
+ "learning_rate": 0.0018980441115272575,
612
+ "loss": 1.2274,
613
+ "step": 49000
614
+ },
615
+ {
616
+ "epoch": 2.57,
617
+ "learning_rate": 0.0018970037453183521,
618
+ "loss": 1.2271,
619
+ "step": 49500
620
+ },
621
+ {
622
+ "epoch": 2.6,
623
+ "learning_rate": 0.0018959633791094466,
624
+ "loss": 1.2262,
625
+ "step": 50000
626
+ },
627
+ {
628
+ "epoch": 2.63,
629
+ "learning_rate": 0.001894923012900541,
630
+ "loss": 1.2256,
631
+ "step": 50500
632
+ },
633
+ {
634
+ "epoch": 2.65,
635
+ "learning_rate": 0.0018938826466916355,
636
+ "loss": 1.2256,
637
+ "step": 51000
638
+ },
639
+ {
640
+ "epoch": 2.68,
641
+ "learning_rate": 0.00189284228048273,
642
+ "loss": 1.225,
643
+ "step": 51500
644
+ },
645
+ {
646
+ "epoch": 2.7,
647
+ "learning_rate": 0.0018918019142738244,
648
+ "loss": 1.2254,
649
+ "step": 52000
650
+ },
651
+ {
652
+ "epoch": 2.73,
653
+ "learning_rate": 0.0018907615480649189,
654
+ "loss": 1.2249,
655
+ "step": 52500
656
+ },
657
+ {
658
+ "epoch": 2.76,
659
+ "learning_rate": 0.0018897211818560133,
660
+ "loss": 1.2247,
661
+ "step": 53000
662
+ },
663
+ {
664
+ "epoch": 2.78,
665
+ "learning_rate": 0.0018886808156471078,
666
+ "loss": 1.2231,
667
+ "step": 53500
668
+ },
669
+ {
670
+ "epoch": 2.81,
671
+ "learning_rate": 0.0018876404494382023,
672
+ "loss": 1.2226,
673
+ "step": 54000
674
+ },
675
+ {
676
+ "epoch": 2.83,
677
+ "learning_rate": 0.0018866000832292967,
678
+ "loss": 1.2239,
679
+ "step": 54500
680
+ },
681
+ {
682
+ "epoch": 2.86,
683
+ "learning_rate": 0.0018855597170203914,
684
+ "loss": 1.2226,
685
+ "step": 55000
686
+ },
687
+ {
688
+ "epoch": 2.89,
689
+ "learning_rate": 0.0018845193508114856,
690
+ "loss": 1.2234,
691
+ "step": 55500
692
+ },
693
+ {
694
+ "epoch": 2.91,
695
+ "learning_rate": 0.00188347898460258,
696
+ "loss": 1.221,
697
+ "step": 56000
698
+ },
699
+ {
700
+ "epoch": 2.94,
701
+ "learning_rate": 0.0018824386183936745,
702
+ "loss": 1.2227,
703
+ "step": 56500
704
+ },
705
+ {
706
+ "epoch": 2.97,
707
+ "learning_rate": 0.001881398252184769,
708
+ "loss": 1.2199,
709
+ "step": 57000
710
+ },
711
+ {
712
+ "epoch": 2.99,
713
+ "learning_rate": 0.0018803578859758635,
714
+ "loss": 1.2195,
715
+ "step": 57500
716
+ },
717
+ {
718
+ "epoch": 3.0,
719
+ "eval_loss": 1.233520746231079,
720
+ "eval_runtime": 0.6051,
721
+ "eval_samples_per_second": 1652.629,
722
+ "eval_steps_per_second": 3.305,
723
+ "step": 57672
724
+ },
725
+ {
726
+ "epoch": 3.02,
727
+ "learning_rate": 0.0018793175197669581,
728
+ "loss": 1.2162,
729
+ "step": 58000
730
+ },
731
+ {
732
+ "epoch": 3.04,
733
+ "learning_rate": 0.0018782771535580526,
734
+ "loss": 1.2161,
735
+ "step": 58500
736
+ },
737
+ {
738
+ "epoch": 3.07,
739
+ "learning_rate": 0.001877236787349147,
740
+ "loss": 1.2146,
741
+ "step": 59000
742
+ },
743
+ {
744
+ "epoch": 3.1,
745
+ "learning_rate": 0.0018761964211402413,
746
+ "loss": 1.2163,
747
+ "step": 59500
748
+ },
749
+ {
750
+ "epoch": 3.12,
751
+ "learning_rate": 0.0018751560549313357,
752
+ "loss": 1.2163,
753
+ "step": 60000
754
+ },
755
+ {
756
+ "epoch": 3.15,
757
+ "learning_rate": 0.0018741156887224304,
758
+ "loss": 1.2152,
759
+ "step": 60500
760
+ },
761
+ {
762
+ "epoch": 3.17,
763
+ "learning_rate": 0.0018730753225135249,
764
+ "loss": 1.2164,
765
+ "step": 61000
766
+ },
767
+ {
768
+ "epoch": 3.2,
769
+ "learning_rate": 0.0018720349563046193,
770
+ "loss": 1.2157,
771
+ "step": 61500
772
+ },
773
+ {
774
+ "epoch": 3.23,
775
+ "learning_rate": 0.0018709945900957138,
776
+ "loss": 1.2176,
777
+ "step": 62000
778
+ },
779
+ {
780
+ "epoch": 3.25,
781
+ "learning_rate": 0.0018699542238868082,
782
+ "loss": 1.2168,
783
+ "step": 62500
784
+ },
785
+ {
786
+ "epoch": 3.28,
787
+ "learning_rate": 0.0018689138576779025,
788
+ "loss": 1.2159,
789
+ "step": 63000
790
+ },
791
+ {
792
+ "epoch": 3.3,
793
+ "learning_rate": 0.0018678734914689972,
794
+ "loss": 1.2145,
795
+ "step": 63500
796
+ },
797
+ {
798
+ "epoch": 3.33,
799
+ "learning_rate": 0.0018668331252600916,
800
+ "loss": 1.2148,
801
+ "step": 64000
802
+ },
803
+ {
804
+ "epoch": 3.36,
805
+ "learning_rate": 0.001865792759051186,
806
+ "loss": 1.2151,
807
+ "step": 64500
808
+ },
809
+ {
810
+ "epoch": 3.38,
811
+ "learning_rate": 0.0018647523928422805,
812
+ "loss": 1.2145,
813
+ "step": 65000
814
+ },
815
+ {
816
+ "epoch": 3.41,
817
+ "learning_rate": 0.001863712026633375,
818
+ "loss": 1.216,
819
+ "step": 65500
820
+ },
821
+ {
822
+ "epoch": 3.43,
823
+ "learning_rate": 0.0018626716604244697,
824
+ "loss": 1.2152,
825
+ "step": 66000
826
+ },
827
+ {
828
+ "epoch": 3.46,
829
+ "learning_rate": 0.001861631294215564,
830
+ "loss": 1.2132,
831
+ "step": 66500
832
+ },
833
+ {
834
+ "epoch": 3.49,
835
+ "learning_rate": 0.0018605909280066584,
836
+ "loss": 1.2137,
837
+ "step": 67000
838
+ },
839
+ {
840
+ "epoch": 3.51,
841
+ "learning_rate": 0.0018595505617977528,
842
+ "loss": 1.2145,
843
+ "step": 67500
844
+ },
845
+ {
846
+ "epoch": 3.54,
847
+ "learning_rate": 0.0018585101955888473,
848
+ "loss": 1.2141,
849
+ "step": 68000
850
+ },
851
+ {
852
+ "epoch": 3.56,
853
+ "learning_rate": 0.0018574698293799417,
854
+ "loss": 1.2148,
855
+ "step": 68500
856
+ },
857
+ {
858
+ "epoch": 3.59,
859
+ "learning_rate": 0.0018564294631710364,
860
+ "loss": 1.2125,
861
+ "step": 69000
862
+ },
863
+ {
864
+ "epoch": 3.62,
865
+ "learning_rate": 0.0018553890969621309,
866
+ "loss": 1.2132,
867
+ "step": 69500
868
+ },
869
+ {
870
+ "epoch": 3.64,
871
+ "learning_rate": 0.001854348730753225,
872
+ "loss": 1.2124,
873
+ "step": 70000
874
+ },
875
+ {
876
+ "epoch": 3.67,
877
+ "learning_rate": 0.0018533083645443196,
878
+ "loss": 1.2148,
879
+ "step": 70500
880
+ },
881
+ {
882
+ "epoch": 3.69,
883
+ "learning_rate": 0.001852267998335414,
884
+ "loss": 1.2135,
885
+ "step": 71000
886
+ },
887
+ {
888
+ "epoch": 3.72,
889
+ "learning_rate": 0.0018512276321265085,
890
+ "loss": 1.2132,
891
+ "step": 71500
892
+ },
893
+ {
894
+ "epoch": 3.75,
895
+ "learning_rate": 0.0018501872659176031,
896
+ "loss": 1.2133,
897
+ "step": 72000
898
+ },
899
+ {
900
+ "epoch": 3.77,
901
+ "learning_rate": 0.0018491468997086976,
902
+ "loss": 1.2144,
903
+ "step": 72500
904
+ },
905
+ {
906
+ "epoch": 3.8,
907
+ "learning_rate": 0.001848106533499792,
908
+ "loss": 1.2101,
909
+ "step": 73000
910
+ },
911
+ {
912
+ "epoch": 3.82,
913
+ "learning_rate": 0.0018470661672908863,
914
+ "loss": 1.212,
915
+ "step": 73500
916
+ },
917
+ {
918
+ "epoch": 3.85,
919
+ "learning_rate": 0.0018460258010819808,
920
+ "loss": 1.2119,
921
+ "step": 74000
922
+ },
923
+ {
924
+ "epoch": 3.88,
925
+ "learning_rate": 0.0018449854348730754,
926
+ "loss": 1.2117,
927
+ "step": 74500
928
+ },
929
+ {
930
+ "epoch": 3.9,
931
+ "learning_rate": 0.0018439450686641699,
932
+ "loss": 1.2106,
933
+ "step": 75000
934
+ },
935
+ {
936
+ "epoch": 3.93,
937
+ "learning_rate": 0.0018429047024552643,
938
+ "loss": 1.2125,
939
+ "step": 75500
940
+ },
941
+ {
942
+ "epoch": 3.95,
943
+ "learning_rate": 0.0018418643362463588,
944
+ "loss": 1.2106,
945
+ "step": 76000
946
+ },
947
+ {
948
+ "epoch": 3.98,
949
+ "learning_rate": 0.0018408239700374533,
950
+ "loss": 1.2108,
951
+ "step": 76500
952
+ },
953
+ {
954
+ "epoch": 4.0,
955
+ "eval_loss": 1.2261288166046143,
956
+ "eval_runtime": 0.8323,
957
+ "eval_samples_per_second": 1201.497,
958
+ "eval_steps_per_second": 2.403,
959
+ "step": 76896
960
+ },
961
+ {
962
+ "epoch": 4.01,
963
+ "learning_rate": 0.0018397836038285475,
964
+ "loss": 1.2091,
965
+ "step": 77000
966
+ },
967
+ {
968
+ "epoch": 4.03,
969
+ "learning_rate": 0.0018387432376196422,
970
+ "loss": 1.2055,
971
+ "step": 77500
972
+ },
973
+ {
974
+ "epoch": 4.06,
975
+ "learning_rate": 0.0018377028714107366,
976
+ "loss": 1.2061,
977
+ "step": 78000
978
+ },
979
+ {
980
+ "epoch": 4.08,
981
+ "learning_rate": 0.001836662505201831,
982
+ "loss": 1.2047,
983
+ "step": 78500
984
+ },
985
+ {
986
+ "epoch": 4.11,
987
+ "learning_rate": 0.0018356221389929255,
988
+ "loss": 1.2077,
989
+ "step": 79000
990
+ },
991
+ {
992
+ "epoch": 4.14,
993
+ "learning_rate": 0.00183458177278402,
994
+ "loss": 1.2069,
995
+ "step": 79500
996
+ },
997
+ {
998
+ "epoch": 4.16,
999
+ "learning_rate": 0.0018335414065751145,
1000
+ "loss": 1.2078,
1001
+ "step": 80000
1002
+ },
1003
+ {
1004
+ "epoch": 4.19,
1005
+ "learning_rate": 0.001832501040366209,
1006
+ "loss": 1.2058,
1007
+ "step": 80500
1008
+ },
1009
+ {
1010
+ "epoch": 4.21,
1011
+ "learning_rate": 0.0018314606741573034,
1012
+ "loss": 1.2075,
1013
+ "step": 81000
1014
+ },
1015
+ {
1016
+ "epoch": 4.24,
1017
+ "learning_rate": 0.0018304203079483978,
1018
+ "loss": 1.2064,
1019
+ "step": 81500
1020
+ },
1021
+ {
1022
+ "epoch": 4.27,
1023
+ "learning_rate": 0.0018293799417394923,
1024
+ "loss": 1.2059,
1025
+ "step": 82000
1026
+ },
1027
+ {
1028
+ "epoch": 4.29,
1029
+ "learning_rate": 0.0018283395755305867,
1030
+ "loss": 1.2075,
1031
+ "step": 82500
1032
+ },
1033
+ {
1034
+ "epoch": 4.32,
1035
+ "learning_rate": 0.0018272992093216814,
1036
+ "loss": 1.2042,
1037
+ "step": 83000
1038
+ },
1039
+ {
1040
+ "epoch": 4.34,
1041
+ "learning_rate": 0.0018262588431127759,
1042
+ "loss": 1.2051,
1043
+ "step": 83500
1044
+ },
1045
+ {
1046
+ "epoch": 4.37,
1047
+ "learning_rate": 0.0018252184769038703,
1048
+ "loss": 1.2066,
1049
+ "step": 84000
1050
+ },
1051
+ {
1052
+ "epoch": 4.4,
1053
+ "learning_rate": 0.0018241781106949646,
1054
+ "loss": 1.2081,
1055
+ "step": 84500
1056
+ },
1057
+ {
1058
+ "epoch": 4.42,
1059
+ "learning_rate": 0.001823137744486059,
1060
+ "loss": 1.2072,
1061
+ "step": 85000
1062
+ },
1063
+ {
1064
+ "epoch": 4.45,
1065
+ "learning_rate": 0.0018220973782771535,
1066
+ "loss": 1.2052,
1067
+ "step": 85500
1068
+ },
1069
+ {
1070
+ "epoch": 4.47,
1071
+ "learning_rate": 0.0018210570120682482,
1072
+ "loss": 1.2042,
1073
+ "step": 86000
1074
+ },
1075
+ {
1076
+ "epoch": 4.5,
1077
+ "learning_rate": 0.0018200166458593426,
1078
+ "loss": 1.2058,
1079
+ "step": 86500
1080
+ },
1081
+ {
1082
+ "epoch": 4.53,
1083
+ "learning_rate": 0.001818976279650437,
1084
+ "loss": 1.2064,
1085
+ "step": 87000
1086
+ },
1087
+ {
1088
+ "epoch": 4.55,
1089
+ "learning_rate": 0.0018179359134415315,
1090
+ "loss": 1.2032,
1091
+ "step": 87500
1092
+ },
1093
+ {
1094
+ "epoch": 4.58,
1095
+ "learning_rate": 0.0018168955472326258,
1096
+ "loss": 1.2059,
1097
+ "step": 88000
1098
+ },
1099
+ {
1100
+ "epoch": 4.6,
1101
+ "learning_rate": 0.0018158551810237204,
1102
+ "loss": 1.2058,
1103
+ "step": 88500
1104
+ },
1105
+ {
1106
+ "epoch": 4.63,
1107
+ "learning_rate": 0.001814814814814815,
1108
+ "loss": 1.2041,
1109
+ "step": 89000
1110
+ },
1111
+ {
1112
+ "epoch": 4.66,
1113
+ "learning_rate": 0.0018137744486059093,
1114
+ "loss": 1.2057,
1115
+ "step": 89500
1116
+ },
1117
+ {
1118
+ "epoch": 4.68,
1119
+ "learning_rate": 0.0018127340823970038,
1120
+ "loss": 1.2031,
1121
+ "step": 90000
1122
+ },
1123
+ {
1124
+ "epoch": 4.71,
1125
+ "learning_rate": 0.0018116937161880983,
1126
+ "loss": 1.2062,
1127
+ "step": 90500
1128
+ },
1129
+ {
1130
+ "epoch": 4.73,
1131
+ "learning_rate": 0.0018106533499791927,
1132
+ "loss": 1.2051,
1133
+ "step": 91000
1134
+ },
1135
+ {
1136
+ "epoch": 4.76,
1137
+ "learning_rate": 0.0018096129837702872,
1138
+ "loss": 1.2037,
1139
+ "step": 91500
1140
+ },
1141
+ {
1142
+ "epoch": 4.79,
1143
+ "learning_rate": 0.0018085726175613816,
1144
+ "loss": 1.2053,
1145
+ "step": 92000
1146
+ },
1147
+ {
1148
+ "epoch": 4.81,
1149
+ "learning_rate": 0.001807532251352476,
1150
+ "loss": 1.2046,
1151
+ "step": 92500
1152
+ },
1153
+ {
1154
+ "epoch": 4.84,
1155
+ "learning_rate": 0.0018064918851435705,
1156
+ "loss": 1.2023,
1157
+ "step": 93000
1158
+ },
1159
+ {
1160
+ "epoch": 4.86,
1161
+ "learning_rate": 0.001805451518934665,
1162
+ "loss": 1.2045,
1163
+ "step": 93500
1164
+ },
1165
+ {
1166
+ "epoch": 4.89,
1167
+ "learning_rate": 0.0018044111527257595,
1168
+ "loss": 1.204,
1169
+ "step": 94000
1170
+ },
1171
+ {
1172
+ "epoch": 4.92,
1173
+ "learning_rate": 0.0018033707865168541,
1174
+ "loss": 1.2037,
1175
+ "step": 94500
1176
+ },
1177
+ {
1178
+ "epoch": 4.94,
1179
+ "learning_rate": 0.0018023304203079484,
1180
+ "loss": 1.204,
1181
+ "step": 95000
1182
+ },
1183
+ {
1184
+ "epoch": 4.97,
1185
+ "learning_rate": 0.0018012900540990428,
1186
+ "loss": 1.2044,
1187
+ "step": 95500
1188
+ },
1189
+ {
1190
+ "epoch": 4.99,
1191
+ "learning_rate": 0.0018002496878901373,
1192
+ "loss": 1.2022,
1193
+ "step": 96000
1194
+ },
1195
+ {
1196
+ "epoch": 5.0,
1197
+ "eval_loss": 1.2207547426223755,
1198
+ "eval_runtime": 0.6112,
1199
+ "eval_samples_per_second": 1636.066,
1200
+ "eval_steps_per_second": 3.272,
1201
+ "step": 96120
1202
+ },
1203
+ {
1204
+ "epoch": 5.02,
1205
+ "learning_rate": 0.0017992093216812317,
1206
+ "loss": 1.2001,
1207
+ "step": 96500
1208
+ },
1209
+ {
1210
+ "epoch": 5.05,
1211
+ "learning_rate": 0.0017981689554723264,
1212
+ "loss": 1.1996,
1213
+ "step": 97000
1214
+ },
1215
+ {
1216
+ "epoch": 5.07,
1217
+ "learning_rate": 0.0017971285892634209,
1218
+ "loss": 1.1989,
1219
+ "step": 97500
1220
+ },
1221
+ {
1222
+ "epoch": 5.1,
1223
+ "learning_rate": 0.0017960882230545153,
1224
+ "loss": 1.1998,
1225
+ "step": 98000
1226
+ },
1227
+ {
1228
+ "epoch": 5.12,
1229
+ "learning_rate": 0.0017950478568456096,
1230
+ "loss": 1.1989,
1231
+ "step": 98500
1232
+ },
1233
+ {
1234
+ "epoch": 5.15,
1235
+ "learning_rate": 0.001794007490636704,
1236
+ "loss": 1.1984,
1237
+ "step": 99000
1238
+ },
1239
+ {
1240
+ "epoch": 5.18,
1241
+ "learning_rate": 0.0017929671244277985,
1242
+ "loss": 1.1991,
1243
+ "step": 99500
1244
+ },
1245
+ {
1246
+ "epoch": 5.2,
1247
+ "learning_rate": 0.0017919267582188932,
1248
+ "loss": 1.1993,
1249
+ "step": 100000
1250
+ },
1251
+ {
1252
+ "epoch": 5.23,
1253
+ "learning_rate": 0.0017908863920099876,
1254
+ "loss": 1.1996,
1255
+ "step": 100500
1256
+ },
1257
+ {
1258
+ "epoch": 5.25,
1259
+ "learning_rate": 0.001789846025801082,
1260
+ "loss": 1.1995,
1261
+ "step": 101000
1262
+ },
1263
+ {
1264
+ "epoch": 5.28,
1265
+ "learning_rate": 0.0017888056595921765,
1266
+ "loss": 1.1987,
1267
+ "step": 101500
1268
+ },
1269
+ {
1270
+ "epoch": 5.31,
1271
+ "learning_rate": 0.0017877652933832708,
1272
+ "loss": 1.1971,
1273
+ "step": 102000
1274
+ },
1275
+ {
1276
+ "epoch": 5.33,
1277
+ "learning_rate": 0.0017867249271743654,
1278
+ "loss": 1.1984,
1279
+ "step": 102500
1280
+ },
1281
+ {
1282
+ "epoch": 5.36,
1283
+ "learning_rate": 0.00178568456096546,
1284
+ "loss": 1.2003,
1285
+ "step": 103000
1286
+ },
1287
+ {
1288
+ "epoch": 5.38,
1289
+ "learning_rate": 0.0017846441947565544,
1290
+ "loss": 1.2011,
1291
+ "step": 103500
1292
+ },
1293
+ {
1294
+ "epoch": 5.41,
1295
+ "learning_rate": 0.0017836038285476488,
1296
+ "loss": 1.1994,
1297
+ "step": 104000
1298
+ },
1299
+ {
1300
+ "epoch": 5.44,
1301
+ "learning_rate": 0.0017825634623387433,
1302
+ "loss": 1.1989,
1303
+ "step": 104500
1304
+ },
1305
+ {
1306
+ "epoch": 5.46,
1307
+ "learning_rate": 0.0017815230961298377,
1308
+ "loss": 1.1996,
1309
+ "step": 105000
1310
+ },
1311
+ {
1312
+ "epoch": 5.49,
1313
+ "learning_rate": 0.0017804827299209324,
1314
+ "loss": 1.1982,
1315
+ "step": 105500
1316
+ },
1317
+ {
1318
+ "epoch": 5.51,
1319
+ "learning_rate": 0.0017794423637120266,
1320
+ "loss": 1.1971,
1321
+ "step": 106000
1322
+ },
1323
+ {
1324
+ "epoch": 5.54,
1325
+ "learning_rate": 0.001778401997503121,
1326
+ "loss": 1.1988,
1327
+ "step": 106500
1328
+ },
1329
+ {
1330
+ "epoch": 5.57,
1331
+ "learning_rate": 0.0017773616312942156,
1332
+ "loss": 1.1996,
1333
+ "step": 107000
1334
+ },
1335
+ {
1336
+ "epoch": 5.59,
1337
+ "learning_rate": 0.00177632126508531,
1338
+ "loss": 1.1972,
1339
+ "step": 107500
1340
+ },
1341
+ {
1342
+ "epoch": 5.62,
1343
+ "learning_rate": 0.0017752808988764045,
1344
+ "loss": 1.1991,
1345
+ "step": 108000
1346
+ },
1347
+ {
1348
+ "epoch": 5.64,
1349
+ "learning_rate": 0.0017742405326674991,
1350
+ "loss": 1.1987,
1351
+ "step": 108500
1352
+ },
1353
+ {
1354
+ "epoch": 5.67,
1355
+ "learning_rate": 0.0017732001664585936,
1356
+ "loss": 1.1983,
1357
+ "step": 109000
1358
+ },
1359
+ {
1360
+ "epoch": 5.7,
1361
+ "learning_rate": 0.0017721598002496878,
1362
+ "loss": 1.1993,
1363
+ "step": 109500
1364
+ },
1365
+ {
1366
+ "epoch": 5.72,
1367
+ "learning_rate": 0.0017711194340407823,
1368
+ "loss": 1.1979,
1369
+ "step": 110000
1370
+ },
1371
+ {
1372
+ "epoch": 5.75,
1373
+ "learning_rate": 0.0017700790678318768,
1374
+ "loss": 1.1989,
1375
+ "step": 110500
1376
+ },
1377
+ {
1378
+ "epoch": 5.77,
1379
+ "learning_rate": 0.0017690387016229714,
1380
+ "loss": 1.2003,
1381
+ "step": 111000
1382
+ },
1383
+ {
1384
+ "epoch": 5.8,
1385
+ "learning_rate": 0.0017679983354140659,
1386
+ "loss": 1.1987,
1387
+ "step": 111500
1388
+ },
1389
+ {
1390
+ "epoch": 5.83,
1391
+ "learning_rate": 0.0017669579692051603,
1392
+ "loss": 1.1987,
1393
+ "step": 112000
1394
+ },
1395
+ {
1396
+ "epoch": 5.85,
1397
+ "learning_rate": 0.0017659176029962548,
1398
+ "loss": 1.1976,
1399
+ "step": 112500
1400
+ },
1401
+ {
1402
+ "epoch": 5.88,
1403
+ "learning_rate": 0.001764877236787349,
1404
+ "loss": 1.1983,
1405
+ "step": 113000
1406
+ },
1407
+ {
1408
+ "epoch": 5.9,
1409
+ "learning_rate": 0.0017638368705784435,
1410
+ "loss": 1.1981,
1411
+ "step": 113500
1412
+ },
1413
+ {
1414
+ "epoch": 5.93,
1415
+ "learning_rate": 0.0017627965043695382,
1416
+ "loss": 1.1989,
1417
+ "step": 114000
1418
+ },
1419
+ {
1420
+ "epoch": 5.96,
1421
+ "learning_rate": 0.0017617561381606326,
1422
+ "loss": 1.1963,
1423
+ "step": 114500
1424
+ },
1425
+ {
1426
+ "epoch": 5.98,
1427
+ "learning_rate": 0.001760715771951727,
1428
+ "loss": 1.1983,
1429
+ "step": 115000
1430
+ },
1431
+ {
1432
+ "epoch": 6.0,
1433
+ "eval_loss": 1.216284155845642,
1434
+ "eval_runtime": 0.6159,
1435
+ "eval_samples_per_second": 1623.524,
1436
+ "eval_steps_per_second": 3.247,
1437
+ "step": 115344
1438
+ },
1439
+ {
1440
+ "epoch": 6.01,
1441
+ "learning_rate": 0.0017596754057428215,
1442
+ "loss": 1.1979,
1443
+ "step": 115500
1444
+ },
1445
+ {
1446
+ "epoch": 6.03,
1447
+ "learning_rate": 0.001758635039533916,
1448
+ "loss": 1.1933,
1449
+ "step": 116000
1450
+ },
1451
+ {
1452
+ "epoch": 6.06,
1453
+ "learning_rate": 0.0017575946733250102,
1454
+ "loss": 1.1942,
1455
+ "step": 116500
1456
+ },
1457
+ {
1458
+ "epoch": 6.09,
1459
+ "learning_rate": 0.001756554307116105,
1460
+ "loss": 1.1937,
1461
+ "step": 117000
1462
+ },
1463
+ {
1464
+ "epoch": 6.11,
1465
+ "learning_rate": 0.0017555139409071994,
1466
+ "loss": 1.1937,
1467
+ "step": 117500
1468
+ },
1469
+ {
1470
+ "epoch": 6.14,
1471
+ "learning_rate": 0.0017544735746982938,
1472
+ "loss": 1.1958,
1473
+ "step": 118000
1474
+ },
1475
+ {
1476
+ "epoch": 6.16,
1477
+ "learning_rate": 0.0017534332084893883,
1478
+ "loss": 1.1936,
1479
+ "step": 118500
1480
+ },
1481
+ {
1482
+ "epoch": 6.19,
1483
+ "learning_rate": 0.0017523928422804827,
1484
+ "loss": 1.1952,
1485
+ "step": 119000
1486
+ },
1487
+ {
1488
+ "epoch": 6.22,
1489
+ "learning_rate": 0.0017513524760715774,
1490
+ "loss": 1.1959,
1491
+ "step": 119500
1492
+ },
1493
+ {
1494
+ "epoch": 6.24,
1495
+ "learning_rate": 0.0017503121098626717,
1496
+ "loss": 1.1943,
1497
+ "step": 120000
1498
+ },
1499
+ {
1500
+ "epoch": 6.27,
1501
+ "learning_rate": 0.0017492717436537661,
1502
+ "loss": 1.1945,
1503
+ "step": 120500
1504
+ },
1505
+ {
1506
+ "epoch": 6.29,
1507
+ "learning_rate": 0.0017482313774448606,
1508
+ "loss": 1.1952,
1509
+ "step": 121000
1510
+ },
1511
+ {
1512
+ "epoch": 6.32,
1513
+ "learning_rate": 0.001747191011235955,
1514
+ "loss": 1.1972,
1515
+ "step": 121500
1516
+ },
1517
+ {
1518
+ "epoch": 6.35,
1519
+ "learning_rate": 0.0017461506450270495,
1520
+ "loss": 1.1933,
1521
+ "step": 122000
1522
+ },
1523
+ {
1524
+ "epoch": 6.37,
1525
+ "learning_rate": 0.0017451102788181442,
1526
+ "loss": 1.1949,
1527
+ "step": 122500
1528
+ },
1529
+ {
1530
+ "epoch": 6.4,
1531
+ "learning_rate": 0.0017440699126092386,
1532
+ "loss": 1.1947,
1533
+ "step": 123000
1534
+ },
1535
+ {
1536
+ "epoch": 6.42,
1537
+ "learning_rate": 0.0017430295464003329,
1538
+ "loss": 1.1935,
1539
+ "step": 123500
1540
+ },
1541
+ {
1542
+ "epoch": 6.45,
1543
+ "learning_rate": 0.0017419891801914273,
1544
+ "loss": 1.1954,
1545
+ "step": 124000
1546
+ },
1547
+ {
1548
+ "epoch": 6.48,
1549
+ "learning_rate": 0.0017409488139825218,
1550
+ "loss": 1.193,
1551
+ "step": 124500
1552
+ },
1553
+ {
1554
+ "epoch": 6.5,
1555
+ "learning_rate": 0.0017399084477736164,
1556
+ "loss": 1.1938,
1557
+ "step": 125000
1558
+ },
1559
+ {
1560
+ "epoch": 6.53,
1561
+ "learning_rate": 0.001738868081564711,
1562
+ "loss": 1.1939,
1563
+ "step": 125500
1564
+ },
1565
+ {
1566
+ "epoch": 6.55,
1567
+ "learning_rate": 0.0017378277153558054,
1568
+ "loss": 1.1948,
1569
+ "step": 126000
1570
+ },
1571
+ {
1572
+ "epoch": 6.58,
1573
+ "learning_rate": 0.0017367873491468998,
1574
+ "loss": 1.1926,
1575
+ "step": 126500
1576
+ },
1577
+ {
1578
+ "epoch": 6.61,
1579
+ "learning_rate": 0.001735746982937994,
1580
+ "loss": 1.1936,
1581
+ "step": 127000
1582
+ },
1583
+ {
1584
+ "epoch": 6.63,
1585
+ "learning_rate": 0.0017347066167290885,
1586
+ "loss": 1.1933,
1587
+ "step": 127500
1588
+ },
1589
+ {
1590
+ "epoch": 6.66,
1591
+ "learning_rate": 0.0017336662505201832,
1592
+ "loss": 1.1947,
1593
+ "step": 128000
1594
+ },
1595
+ {
1596
+ "epoch": 6.68,
1597
+ "learning_rate": 0.0017326258843112776,
1598
+ "loss": 1.1931,
1599
+ "step": 128500
1600
+ },
1601
+ {
1602
+ "epoch": 6.71,
1603
+ "learning_rate": 0.001731585518102372,
1604
+ "loss": 1.1931,
1605
+ "step": 129000
1606
+ },
1607
+ {
1608
+ "epoch": 6.74,
1609
+ "learning_rate": 0.0017305451518934666,
1610
+ "loss": 1.1938,
1611
+ "step": 129500
1612
+ },
1613
+ {
1614
+ "epoch": 6.76,
1615
+ "learning_rate": 0.001729504785684561,
1616
+ "loss": 1.1939,
1617
+ "step": 130000
1618
+ },
1619
+ {
1620
+ "epoch": 6.79,
1621
+ "learning_rate": 0.0017284644194756553,
1622
+ "loss": 1.1923,
1623
+ "step": 130500
1624
+ },
1625
+ {
1626
+ "epoch": 6.81,
1627
+ "learning_rate": 0.00172742405326675,
1628
+ "loss": 1.1932,
1629
+ "step": 131000
1630
+ },
1631
+ {
1632
+ "epoch": 6.84,
1633
+ "learning_rate": 0.0017263836870578444,
1634
+ "loss": 1.1926,
1635
+ "step": 131500
1636
+ },
1637
+ {
1638
+ "epoch": 6.87,
1639
+ "learning_rate": 0.0017253433208489388,
1640
+ "loss": 1.1929,
1641
+ "step": 132000
1642
+ },
1643
+ {
1644
+ "epoch": 6.89,
1645
+ "learning_rate": 0.0017243029546400333,
1646
+ "loss": 1.1932,
1647
+ "step": 132500
1648
+ },
1649
+ {
1650
+ "epoch": 6.92,
1651
+ "learning_rate": 0.0017232625884311278,
1652
+ "loss": 1.1932,
1653
+ "step": 133000
1654
+ },
1655
+ {
1656
+ "epoch": 6.94,
1657
+ "learning_rate": 0.0017222222222222224,
1658
+ "loss": 1.1952,
1659
+ "step": 133500
1660
+ },
1661
+ {
1662
+ "epoch": 6.97,
1663
+ "learning_rate": 0.0017211818560133169,
1664
+ "loss": 1.1924,
1665
+ "step": 134000
1666
+ },
1667
+ {
1668
+ "epoch": 7.0,
1669
+ "learning_rate": 0.0017201414898044111,
1670
+ "loss": 1.1927,
1671
+ "step": 134500
1672
+ },
1673
+ {
1674
+ "epoch": 7.0,
1675
+ "eval_loss": 1.2103557586669922,
1676
+ "eval_runtime": 0.6147,
1677
+ "eval_samples_per_second": 1626.936,
1678
+ "eval_steps_per_second": 3.254,
1679
+ "step": 134568
1680
+ },
1681
+ {
1682
+ "epoch": 7.02,
1683
+ "learning_rate": 0.0017191011235955056,
1684
+ "loss": 1.1869,
1685
+ "step": 135000
1686
+ },
1687
+ {
1688
+ "epoch": 7.05,
1689
+ "learning_rate": 0.0017180607573866,
1690
+ "loss": 1.1872,
1691
+ "step": 135500
1692
+ },
1693
+ {
1694
+ "epoch": 7.07,
1695
+ "learning_rate": 0.0017170203911776945,
1696
+ "loss": 1.1898,
1697
+ "step": 136000
1698
+ },
1699
+ {
1700
+ "epoch": 7.1,
1701
+ "learning_rate": 0.0017159800249687892,
1702
+ "loss": 1.1895,
1703
+ "step": 136500
1704
+ },
1705
+ {
1706
+ "epoch": 7.13,
1707
+ "learning_rate": 0.0017149396587598836,
1708
+ "loss": 1.1902,
1709
+ "step": 137000
1710
+ },
1711
+ {
1712
+ "epoch": 7.15,
1713
+ "learning_rate": 0.001713899292550978,
1714
+ "loss": 1.1901,
1715
+ "step": 137500
1716
+ },
1717
+ {
1718
+ "epoch": 7.18,
1719
+ "learning_rate": 0.0017128589263420723,
1720
+ "loss": 1.1892,
1721
+ "step": 138000
1722
+ },
1723
+ {
1724
+ "epoch": 7.2,
1725
+ "learning_rate": 0.0017118185601331668,
1726
+ "loss": 1.1902,
1727
+ "step": 138500
1728
+ },
1729
+ {
1730
+ "epoch": 7.23,
1731
+ "learning_rate": 0.0017107781939242615,
1732
+ "loss": 1.1906,
1733
+ "step": 139000
1734
+ },
1735
+ {
1736
+ "epoch": 7.26,
1737
+ "learning_rate": 0.001709737827715356,
1738
+ "loss": 1.1904,
1739
+ "step": 139500
1740
+ },
1741
+ {
1742
+ "epoch": 7.28,
1743
+ "learning_rate": 0.0017086974615064504,
1744
+ "loss": 1.1898,
1745
+ "step": 140000
1746
+ },
1747
+ {
1748
+ "epoch": 7.31,
1749
+ "learning_rate": 0.0017076570952975448,
1750
+ "loss": 1.1917,
1751
+ "step": 140500
1752
+ },
1753
+ {
1754
+ "epoch": 7.33,
1755
+ "learning_rate": 0.0017066167290886393,
1756
+ "loss": 1.1914,
1757
+ "step": 141000
1758
+ },
1759
+ {
1760
+ "epoch": 7.36,
1761
+ "learning_rate": 0.0017055763628797335,
1762
+ "loss": 1.1905,
1763
+ "step": 141500
1764
+ },
1765
+ {
1766
+ "epoch": 7.39,
1767
+ "learning_rate": 0.0017045359966708282,
1768
+ "loss": 1.1921,
1769
+ "step": 142000
1770
+ },
1771
+ {
1772
+ "epoch": 7.41,
1773
+ "learning_rate": 0.0017034956304619227,
1774
+ "loss": 1.1899,
1775
+ "step": 142500
1776
+ },
1777
+ {
1778
+ "epoch": 7.44,
1779
+ "learning_rate": 0.001702455264253017,
1780
+ "loss": 1.19,
1781
+ "step": 143000
1782
+ },
1783
+ {
1784
+ "epoch": 7.46,
1785
+ "learning_rate": 0.0017014148980441116,
1786
+ "loss": 1.1883,
1787
+ "step": 143500
1788
+ },
1789
+ {
1790
+ "epoch": 7.49,
1791
+ "learning_rate": 0.001700374531835206,
1792
+ "loss": 1.191,
1793
+ "step": 144000
1794
+ },
1795
+ {
1796
+ "epoch": 7.52,
1797
+ "learning_rate": 0.0016993341656263005,
1798
+ "loss": 1.1896,
1799
+ "step": 144500
1800
+ },
1801
+ {
1802
+ "epoch": 7.54,
1803
+ "learning_rate": 0.001698293799417395,
1804
+ "loss": 1.1893,
1805
+ "step": 145000
1806
+ },
1807
+ {
1808
+ "epoch": 7.57,
1809
+ "learning_rate": 0.0016972534332084894,
1810
+ "loss": 1.1892,
1811
+ "step": 145500
1812
+ },
1813
+ {
1814
+ "epoch": 7.59,
1815
+ "learning_rate": 0.0016962130669995838,
1816
+ "loss": 1.1887,
1817
+ "step": 146000
1818
+ },
1819
+ {
1820
+ "epoch": 7.62,
1821
+ "learning_rate": 0.0016951727007906783,
1822
+ "loss": 1.1913,
1823
+ "step": 146500
1824
+ },
1825
+ {
1826
+ "epoch": 7.65,
1827
+ "learning_rate": 0.0016941323345817728,
1828
+ "loss": 1.1895,
1829
+ "step": 147000
1830
+ },
1831
+ {
1832
+ "epoch": 7.67,
1833
+ "learning_rate": 0.0016930919683728674,
1834
+ "loss": 1.1891,
1835
+ "step": 147500
1836
+ },
1837
+ {
1838
+ "epoch": 7.7,
1839
+ "learning_rate": 0.001692051602163962,
1840
+ "loss": 1.189,
1841
+ "step": 148000
1842
+ },
1843
+ {
1844
+ "epoch": 7.72,
1845
+ "learning_rate": 0.0016910112359550561,
1846
+ "loss": 1.1907,
1847
+ "step": 148500
1848
+ },
1849
+ {
1850
+ "epoch": 7.75,
1851
+ "learning_rate": 0.0016899708697461506,
1852
+ "loss": 1.1911,
1853
+ "step": 149000
1854
+ },
1855
+ {
1856
+ "epoch": 7.78,
1857
+ "learning_rate": 0.001688930503537245,
1858
+ "loss": 1.1881,
1859
+ "step": 149500
1860
+ },
1861
+ {
1862
+ "epoch": 7.8,
1863
+ "learning_rate": 0.0016878901373283395,
1864
+ "loss": 1.1893,
1865
+ "step": 150000
1866
+ },
1867
+ {
1868
+ "epoch": 7.83,
1869
+ "learning_rate": 0.0016868497711194342,
1870
+ "loss": 1.1902,
1871
+ "step": 150500
1872
+ },
1873
+ {
1874
+ "epoch": 7.85,
1875
+ "learning_rate": 0.0016858094049105286,
1876
+ "loss": 1.1912,
1877
+ "step": 151000
1878
+ },
1879
+ {
1880
+ "epoch": 7.88,
1881
+ "learning_rate": 0.001684769038701623,
1882
+ "loss": 1.1907,
1883
+ "step": 151500
1884
+ },
1885
+ {
1886
+ "epoch": 7.91,
1887
+ "learning_rate": 0.0016837286724927173,
1888
+ "loss": 1.1909,
1889
+ "step": 152000
1890
+ },
1891
+ {
1892
+ "epoch": 7.93,
1893
+ "learning_rate": 0.0016826883062838118,
1894
+ "loss": 1.1875,
1895
+ "step": 152500
1896
+ },
1897
+ {
1898
+ "epoch": 7.96,
1899
+ "learning_rate": 0.0016816479400749065,
1900
+ "loss": 1.19,
1901
+ "step": 153000
1902
+ },
1903
+ {
1904
+ "epoch": 7.98,
1905
+ "learning_rate": 0.001680607573866001,
1906
+ "loss": 1.1881,
1907
+ "step": 153500
1908
+ },
1909
+ {
1910
+ "epoch": 8.0,
1911
+ "eval_loss": 1.2096730470657349,
1912
+ "eval_runtime": 0.6211,
1913
+ "eval_samples_per_second": 1609.947,
1914
+ "eval_steps_per_second": 3.22,
1915
+ "step": 153792
1916
+ },
1917
+ {
1918
+ "epoch": 8.01,
1919
+ "learning_rate": 0.0016795672076570954,
1920
+ "loss": 1.1874,
1921
+ "step": 154000
1922
+ },
1923
+ {
1924
+ "epoch": 8.04,
1925
+ "learning_rate": 0.0016785268414481898,
1926
+ "loss": 1.1844,
1927
+ "step": 154500
1928
+ },
1929
+ {
1930
+ "epoch": 8.06,
1931
+ "learning_rate": 0.0016774864752392843,
1932
+ "loss": 1.1845,
1933
+ "step": 155000
1934
+ },
1935
+ {
1936
+ "epoch": 8.09,
1937
+ "learning_rate": 0.0016764461090303787,
1938
+ "loss": 1.186,
1939
+ "step": 155500
1940
+ },
1941
+ {
1942
+ "epoch": 8.11,
1943
+ "learning_rate": 0.0016754057428214732,
1944
+ "loss": 1.1851,
1945
+ "step": 156000
1946
+ },
1947
+ {
1948
+ "epoch": 8.14,
1949
+ "learning_rate": 0.0016743653766125677,
1950
+ "loss": 1.1872,
1951
+ "step": 156500
1952
+ },
1953
+ {
1954
+ "epoch": 8.17,
1955
+ "learning_rate": 0.0016733250104036621,
1956
+ "loss": 1.1872,
1957
+ "step": 157000
1958
+ },
1959
+ {
1960
+ "epoch": 8.19,
1961
+ "learning_rate": 0.0016722846441947566,
1962
+ "loss": 1.1862,
1963
+ "step": 157500
1964
+ },
1965
+ {
1966
+ "epoch": 8.22,
1967
+ "learning_rate": 0.001671244277985851,
1968
+ "loss": 1.1867,
1969
+ "step": 158000
1970
+ },
1971
+ {
1972
+ "epoch": 8.24,
1973
+ "learning_rate": 0.0016702039117769455,
1974
+ "loss": 1.186,
1975
+ "step": 158500
1976
+ },
1977
+ {
1978
+ "epoch": 8.27,
1979
+ "learning_rate": 0.0016691635455680402,
1980
+ "loss": 1.1858,
1981
+ "step": 159000
1982
+ },
1983
+ {
1984
+ "epoch": 8.3,
1985
+ "learning_rate": 0.0016681231793591344,
1986
+ "loss": 1.1843,
1987
+ "step": 159500
1988
+ },
1989
+ {
1990
+ "epoch": 8.32,
1991
+ "learning_rate": 0.0016670828131502289,
1992
+ "loss": 1.1857,
1993
+ "step": 160000
1994
+ },
1995
+ {
1996
+ "epoch": 8.35,
1997
+ "learning_rate": 0.0016660424469413233,
1998
+ "loss": 1.1872,
1999
+ "step": 160500
2000
+ },
2001
+ {
2002
+ "epoch": 8.37,
2003
+ "learning_rate": 0.0016650020807324178,
2004
+ "loss": 1.1859,
2005
+ "step": 161000
2006
+ },
2007
+ {
2008
+ "epoch": 8.4,
2009
+ "learning_rate": 0.0016639617145235124,
2010
+ "loss": 1.1865,
2011
+ "step": 161500
2012
+ },
2013
+ {
2014
+ "epoch": 8.43,
2015
+ "learning_rate": 0.001662921348314607,
2016
+ "loss": 1.1871,
2017
+ "step": 162000
2018
+ },
2019
+ {
2020
+ "epoch": 8.45,
2021
+ "learning_rate": 0.0016618809821057014,
2022
+ "loss": 1.1871,
2023
+ "step": 162500
2024
+ },
2025
+ {
2026
+ "epoch": 8.48,
2027
+ "learning_rate": 0.0016608406158967956,
2028
+ "loss": 1.1876,
2029
+ "step": 163000
2030
+ },
2031
+ {
2032
+ "epoch": 8.5,
2033
+ "learning_rate": 0.00165980024968789,
2034
+ "loss": 1.1876,
2035
+ "step": 163500
2036
+ },
2037
+ {
2038
+ "epoch": 8.53,
2039
+ "learning_rate": 0.0016587598834789845,
2040
+ "loss": 1.1874,
2041
+ "step": 164000
2042
+ },
2043
+ {
2044
+ "epoch": 8.56,
2045
+ "learning_rate": 0.0016577195172700792,
2046
+ "loss": 1.186,
2047
+ "step": 164500
2048
+ },
2049
+ {
2050
+ "epoch": 8.58,
2051
+ "learning_rate": 0.0016566791510611736,
2052
+ "loss": 1.1869,
2053
+ "step": 165000
2054
+ },
2055
+ {
2056
+ "epoch": 8.61,
2057
+ "learning_rate": 0.001655638784852268,
2058
+ "loss": 1.1865,
2059
+ "step": 165500
2060
+ },
2061
+ {
2062
+ "epoch": 8.64,
2063
+ "learning_rate": 0.0016545984186433626,
2064
+ "loss": 1.1862,
2065
+ "step": 166000
2066
+ },
2067
+ {
2068
+ "epoch": 8.66,
2069
+ "learning_rate": 0.0016535580524344568,
2070
+ "loss": 1.1881,
2071
+ "step": 166500
2072
+ },
2073
+ {
2074
+ "epoch": 8.69,
2075
+ "learning_rate": 0.0016525176862255513,
2076
+ "loss": 1.1875,
2077
+ "step": 167000
2078
+ },
2079
+ {
2080
+ "epoch": 8.71,
2081
+ "learning_rate": 0.001651477320016646,
2082
+ "loss": 1.1866,
2083
+ "step": 167500
2084
+ },
2085
+ {
2086
+ "epoch": 8.74,
2087
+ "learning_rate": 0.0016504369538077404,
2088
+ "loss": 1.1868,
2089
+ "step": 168000
2090
+ },
2091
+ {
2092
+ "epoch": 8.77,
2093
+ "learning_rate": 0.0016493965875988348,
2094
+ "loss": 1.1867,
2095
+ "step": 168500
2096
+ },
2097
+ {
2098
+ "epoch": 8.79,
2099
+ "learning_rate": 0.0016483562213899293,
2100
+ "loss": 1.1858,
2101
+ "step": 169000
2102
+ },
2103
+ {
2104
+ "epoch": 8.82,
2105
+ "learning_rate": 0.0016473158551810238,
2106
+ "loss": 1.1869,
2107
+ "step": 169500
2108
+ },
2109
+ {
2110
+ "epoch": 8.84,
2111
+ "learning_rate": 0.0016462754889721182,
2112
+ "loss": 1.1861,
2113
+ "step": 170000
2114
+ },
2115
+ {
2116
+ "epoch": 8.87,
2117
+ "learning_rate": 0.0016452351227632127,
2118
+ "loss": 1.1849,
2119
+ "step": 170500
2120
+ },
2121
+ {
2122
+ "epoch": 8.9,
2123
+ "learning_rate": 0.0016441947565543071,
2124
+ "loss": 1.1857,
2125
+ "step": 171000
2126
+ },
2127
+ {
2128
+ "epoch": 8.92,
2129
+ "learning_rate": 0.0016431543903454016,
2130
+ "loss": 1.1883,
2131
+ "step": 171500
2132
+ },
2133
+ {
2134
+ "epoch": 8.95,
2135
+ "learning_rate": 0.001642114024136496,
2136
+ "loss": 1.1862,
2137
+ "step": 172000
2138
+ },
2139
+ {
2140
+ "epoch": 8.97,
2141
+ "learning_rate": 0.0016410736579275905,
2142
+ "loss": 1.1859,
2143
+ "step": 172500
2144
+ },
2145
+ {
2146
+ "epoch": 9.0,
2147
+ "learning_rate": 0.0016400332917186852,
2148
+ "loss": 1.1865,
2149
+ "step": 173000
2150
+ },
2151
+ {
2152
+ "epoch": 9.0,
2153
+ "eval_loss": 1.2050005197525024,
2154
+ "eval_runtime": 0.6154,
2155
+ "eval_samples_per_second": 1624.869,
2156
+ "eval_steps_per_second": 3.25,
2157
+ "step": 173016
2158
+ },
2159
+ {
2160
+ "epoch": 9.03,
2161
+ "learning_rate": 0.0016389929255097794,
2162
+ "loss": 1.181,
2163
+ "step": 173500
2164
+ },
2165
+ {
2166
+ "epoch": 9.05,
2167
+ "learning_rate": 0.0016379525593008739,
2168
+ "loss": 1.1819,
2169
+ "step": 174000
2170
+ },
2171
+ {
2172
+ "epoch": 9.08,
2173
+ "learning_rate": 0.0016369121930919683,
2174
+ "loss": 1.1832,
2175
+ "step": 174500
2176
+ },
2177
+ {
2178
+ "epoch": 9.1,
2179
+ "learning_rate": 0.0016358718268830628,
2180
+ "loss": 1.1833,
2181
+ "step": 175000
2182
+ },
2183
+ {
2184
+ "epoch": 9.13,
2185
+ "learning_rate": 0.0016348314606741575,
2186
+ "loss": 1.1814,
2187
+ "step": 175500
2188
+ },
2189
+ {
2190
+ "epoch": 9.16,
2191
+ "learning_rate": 0.001633791094465252,
2192
+ "loss": 1.1824,
2193
+ "step": 176000
2194
+ },
2195
+ {
2196
+ "epoch": 9.18,
2197
+ "learning_rate": 0.0016327507282563464,
2198
+ "loss": 1.1836,
2199
+ "step": 176500
2200
+ },
2201
+ {
2202
+ "epoch": 9.21,
2203
+ "learning_rate": 0.0016317103620474408,
2204
+ "loss": 1.1824,
2205
+ "step": 177000
2206
+ },
2207
+ {
2208
+ "epoch": 9.23,
2209
+ "learning_rate": 0.001630669995838535,
2210
+ "loss": 1.1837,
2211
+ "step": 177500
2212
+ },
2213
+ {
2214
+ "epoch": 9.26,
2215
+ "learning_rate": 0.0016296296296296295,
2216
+ "loss": 1.1859,
2217
+ "step": 178000
2218
+ },
2219
+ {
2220
+ "epoch": 9.29,
2221
+ "learning_rate": 0.0016285892634207242,
2222
+ "loss": 1.1834,
2223
+ "step": 178500
2224
+ },
2225
+ {
2226
+ "epoch": 9.31,
2227
+ "learning_rate": 0.0016275488972118187,
2228
+ "loss": 1.1829,
2229
+ "step": 179000
2230
+ },
2231
+ {
2232
+ "epoch": 9.34,
2233
+ "learning_rate": 0.0016265085310029131,
2234
+ "loss": 1.1827,
2235
+ "step": 179500
2236
+ },
2237
+ {
2238
+ "epoch": 9.36,
2239
+ "learning_rate": 0.0016254681647940076,
2240
+ "loss": 1.1845,
2241
+ "step": 180000
2242
+ },
2243
+ {
2244
+ "epoch": 9.39,
2245
+ "learning_rate": 0.001624427798585102,
2246
+ "loss": 1.1836,
2247
+ "step": 180500
2248
+ },
2249
+ {
2250
+ "epoch": 9.42,
2251
+ "learning_rate": 0.0016233874323761963,
2252
+ "loss": 1.1821,
2253
+ "step": 181000
2254
+ },
2255
+ {
2256
+ "epoch": 9.44,
2257
+ "learning_rate": 0.001622347066167291,
2258
+ "loss": 1.1833,
2259
+ "step": 181500
2260
+ },
2261
+ {
2262
+ "epoch": 9.47,
2263
+ "learning_rate": 0.0016213066999583854,
2264
+ "loss": 1.1843,
2265
+ "step": 182000
2266
+ },
2267
+ {
2268
+ "epoch": 9.49,
2269
+ "learning_rate": 0.0016202663337494799,
2270
+ "loss": 1.1845,
2271
+ "step": 182500
2272
+ },
2273
+ {
2274
+ "epoch": 9.52,
2275
+ "learning_rate": 0.0016192259675405743,
2276
+ "loss": 1.1837,
2277
+ "step": 183000
2278
+ },
2279
+ {
2280
+ "epoch": 9.55,
2281
+ "learning_rate": 0.0016181856013316688,
2282
+ "loss": 1.1837,
2283
+ "step": 183500
2284
+ },
2285
+ {
2286
+ "epoch": 9.57,
2287
+ "learning_rate": 0.0016171452351227634,
2288
+ "loss": 1.184,
2289
+ "step": 184000
2290
+ },
2291
+ {
2292
+ "epoch": 9.6,
2293
+ "learning_rate": 0.0016161048689138577,
2294
+ "loss": 1.1835,
2295
+ "step": 184500
2296
+ },
2297
+ {
2298
+ "epoch": 9.62,
2299
+ "learning_rate": 0.0016150645027049521,
2300
+ "loss": 1.1826,
2301
+ "step": 185000
2302
+ },
2303
+ {
2304
+ "epoch": 9.65,
2305
+ "learning_rate": 0.0016140241364960466,
2306
+ "loss": 1.1846,
2307
+ "step": 185500
2308
+ },
2309
+ {
2310
+ "epoch": 9.68,
2311
+ "learning_rate": 0.001612983770287141,
2312
+ "loss": 1.1833,
2313
+ "step": 186000
2314
+ },
2315
+ {
2316
+ "epoch": 9.7,
2317
+ "learning_rate": 0.0016119434040782355,
2318
+ "loss": 1.1829,
2319
+ "step": 186500
2320
+ },
2321
+ {
2322
+ "epoch": 9.73,
2323
+ "learning_rate": 0.0016109030378693302,
2324
+ "loss": 1.183,
2325
+ "step": 187000
2326
+ },
2327
+ {
2328
+ "epoch": 9.75,
2329
+ "learning_rate": 0.0016098626716604246,
2330
+ "loss": 1.184,
2331
+ "step": 187500
2332
+ },
2333
+ {
2334
+ "epoch": 9.78,
2335
+ "learning_rate": 0.0016088223054515189,
2336
+ "loss": 1.1831,
2337
+ "step": 188000
2338
+ },
2339
+ {
2340
+ "epoch": 9.81,
2341
+ "learning_rate": 0.0016077819392426133,
2342
+ "loss": 1.185,
2343
+ "step": 188500
2344
+ },
2345
+ {
2346
+ "epoch": 9.83,
2347
+ "learning_rate": 0.0016067415730337078,
2348
+ "loss": 1.183,
2349
+ "step": 189000
2350
+ },
2351
+ {
2352
+ "epoch": 9.86,
2353
+ "learning_rate": 0.0016057012068248025,
2354
+ "loss": 1.183,
2355
+ "step": 189500
2356
+ },
2357
+ {
2358
+ "epoch": 9.88,
2359
+ "learning_rate": 0.001604660840615897,
2360
+ "loss": 1.1837,
2361
+ "step": 190000
2362
+ },
2363
+ {
2364
+ "epoch": 9.91,
2365
+ "learning_rate": 0.0016036204744069914,
2366
+ "loss": 1.1809,
2367
+ "step": 190500
2368
+ },
2369
+ {
2370
+ "epoch": 9.94,
2371
+ "learning_rate": 0.0016025801081980858,
2372
+ "loss": 1.1838,
2373
+ "step": 191000
2374
+ },
2375
+ {
2376
+ "epoch": 9.96,
2377
+ "learning_rate": 0.00160153974198918,
2378
+ "loss": 1.1831,
2379
+ "step": 191500
2380
+ },
2381
+ {
2382
+ "epoch": 9.99,
2383
+ "learning_rate": 0.0016004993757802745,
2384
+ "loss": 1.1846,
2385
+ "step": 192000
2386
+ },
2387
+ {
2388
+ "epoch": 10.0,
2389
+ "eval_loss": 1.2037365436553955,
2390
+ "eval_runtime": 0.6098,
2391
+ "eval_samples_per_second": 1639.906,
2392
+ "eval_steps_per_second": 3.28,
2393
+ "step": 192240
2394
+ },
2395
+ {
2396
+ "epoch": 10.01,
2397
+ "learning_rate": 0.0015994590095713692,
2398
+ "loss": 1.181,
2399
+ "step": 192500
2400
+ },
2401
+ {
2402
+ "epoch": 10.04,
2403
+ "learning_rate": 0.0015984186433624637,
2404
+ "loss": 1.1792,
2405
+ "step": 193000
2406
+ },
2407
+ {
2408
+ "epoch": 10.07,
2409
+ "learning_rate": 0.0015973782771535581,
2410
+ "loss": 1.1796,
2411
+ "step": 193500
2412
+ },
2413
+ {
2414
+ "epoch": 10.09,
2415
+ "learning_rate": 0.0015963379109446526,
2416
+ "loss": 1.1806,
2417
+ "step": 194000
2418
+ },
2419
+ {
2420
+ "epoch": 10.12,
2421
+ "learning_rate": 0.001595297544735747,
2422
+ "loss": 1.1803,
2423
+ "step": 194500
2424
+ },
2425
+ {
2426
+ "epoch": 10.14,
2427
+ "learning_rate": 0.0015942571785268413,
2428
+ "loss": 1.1801,
2429
+ "step": 195000
2430
+ },
2431
+ {
2432
+ "epoch": 10.17,
2433
+ "learning_rate": 0.001593216812317936,
2434
+ "loss": 1.1799,
2435
+ "step": 195500
2436
+ },
2437
+ {
2438
+ "epoch": 10.2,
2439
+ "learning_rate": 0.0015921764461090304,
2440
+ "loss": 1.1812,
2441
+ "step": 196000
2442
+ },
2443
+ {
2444
+ "epoch": 10.22,
2445
+ "learning_rate": 0.0015911360799001249,
2446
+ "loss": 1.1795,
2447
+ "step": 196500
2448
+ },
2449
+ {
2450
+ "epoch": 10.25,
2451
+ "learning_rate": 0.0015900957136912193,
2452
+ "loss": 1.1812,
2453
+ "step": 197000
2454
+ },
2455
+ {
2456
+ "epoch": 10.27,
2457
+ "learning_rate": 0.0015890553474823138,
2458
+ "loss": 1.1803,
2459
+ "step": 197500
2460
+ },
2461
+ {
2462
+ "epoch": 10.3,
2463
+ "learning_rate": 0.0015880149812734085,
2464
+ "loss": 1.1818,
2465
+ "step": 198000
2466
+ },
2467
+ {
2468
+ "epoch": 10.33,
2469
+ "learning_rate": 0.0015869746150645027,
2470
+ "loss": 1.1802,
2471
+ "step": 198500
2472
+ },
2473
+ {
2474
+ "epoch": 10.35,
2475
+ "learning_rate": 0.0015859342488555972,
2476
+ "loss": 1.1805,
2477
+ "step": 199000
2478
+ },
2479
+ {
2480
+ "epoch": 10.38,
2481
+ "learning_rate": 0.0015848938826466916,
2482
+ "loss": 1.1802,
2483
+ "step": 199500
2484
+ },
2485
+ {
2486
+ "epoch": 10.4,
2487
+ "learning_rate": 0.001583853516437786,
2488
+ "loss": 1.1812,
2489
+ "step": 200000
2490
+ },
2491
+ {
2492
+ "epoch": 10.43,
2493
+ "learning_rate": 0.0015828131502288805,
2494
+ "loss": 1.1817,
2495
+ "step": 200500
2496
+ },
2497
+ {
2498
+ "epoch": 10.46,
2499
+ "learning_rate": 0.0015817727840199752,
2500
+ "loss": 1.1828,
2501
+ "step": 201000
2502
+ },
2503
+ {
2504
+ "epoch": 10.48,
2505
+ "learning_rate": 0.0015807324178110697,
2506
+ "loss": 1.1798,
2507
+ "step": 201500
2508
+ },
2509
+ {
2510
+ "epoch": 10.51,
2511
+ "learning_rate": 0.0015796920516021641,
2512
+ "loss": 1.1817,
2513
+ "step": 202000
2514
+ },
2515
+ {
2516
+ "epoch": 10.53,
2517
+ "learning_rate": 0.0015786516853932584,
2518
+ "loss": 1.181,
2519
+ "step": 202500
2520
+ },
2521
+ {
2522
+ "epoch": 10.56,
2523
+ "learning_rate": 0.0015776113191843528,
2524
+ "loss": 1.1814,
2525
+ "step": 203000
2526
+ },
2527
+ {
2528
+ "epoch": 10.59,
2529
+ "learning_rate": 0.0015765709529754473,
2530
+ "loss": 1.1798,
2531
+ "step": 203500
2532
+ },
2533
+ {
2534
+ "epoch": 10.61,
2535
+ "learning_rate": 0.001575530586766542,
2536
+ "loss": 1.1819,
2537
+ "step": 204000
2538
+ },
2539
+ {
2540
+ "epoch": 10.64,
2541
+ "learning_rate": 0.0015744902205576364,
2542
+ "loss": 1.1818,
2543
+ "step": 204500
2544
+ },
2545
+ {
2546
+ "epoch": 10.66,
2547
+ "learning_rate": 0.0015734498543487309,
2548
+ "loss": 1.182,
2549
+ "step": 205000
2550
+ },
2551
+ {
2552
+ "epoch": 10.69,
2553
+ "learning_rate": 0.0015724094881398253,
2554
+ "loss": 1.1821,
2555
+ "step": 205500
2556
+ },
2557
+ {
2558
+ "epoch": 10.72,
2559
+ "learning_rate": 0.0015713691219309195,
2560
+ "loss": 1.1819,
2561
+ "step": 206000
2562
+ },
2563
+ {
2564
+ "epoch": 10.74,
2565
+ "learning_rate": 0.0015703287557220142,
2566
+ "loss": 1.1809,
2567
+ "step": 206500
2568
+ },
2569
+ {
2570
+ "epoch": 10.77,
2571
+ "learning_rate": 0.0015692883895131087,
2572
+ "loss": 1.1806,
2573
+ "step": 207000
2574
+ },
2575
+ {
2576
+ "epoch": 10.79,
2577
+ "learning_rate": 0.0015682480233042031,
2578
+ "loss": 1.1814,
2579
+ "step": 207500
2580
+ },
2581
+ {
2582
+ "epoch": 10.82,
2583
+ "learning_rate": 0.0015672076570952976,
2584
+ "loss": 1.181,
2585
+ "step": 208000
2586
+ },
2587
+ {
2588
+ "epoch": 10.85,
2589
+ "learning_rate": 0.001566167290886392,
2590
+ "loss": 1.183,
2591
+ "step": 208500
2592
+ },
2593
+ {
2594
+ "epoch": 10.87,
2595
+ "learning_rate": 0.0015651269246774865,
2596
+ "loss": 1.1812,
2597
+ "step": 209000
2598
+ },
2599
+ {
2600
+ "epoch": 10.9,
2601
+ "learning_rate": 0.001564086558468581,
2602
+ "loss": 1.1803,
2603
+ "step": 209500
2604
+ },
2605
+ {
2606
+ "epoch": 10.92,
2607
+ "learning_rate": 0.0015630461922596754,
2608
+ "loss": 1.1817,
2609
+ "step": 210000
2610
+ },
2611
+ {
2612
+ "epoch": 10.95,
2613
+ "learning_rate": 0.0015620058260507699,
2614
+ "loss": 1.1781,
2615
+ "step": 210500
2616
+ },
2617
+ {
2618
+ "epoch": 10.98,
2619
+ "learning_rate": 0.0015609654598418643,
2620
+ "loss": 1.1806,
2621
+ "step": 211000
2622
+ },
2623
+ {
2624
+ "epoch": 11.0,
2625
+ "eval_loss": 1.2047163248062134,
2626
+ "eval_runtime": 0.6153,
2627
+ "eval_samples_per_second": 1625.195,
2628
+ "eval_steps_per_second": 3.25,
2629
+ "step": 211464
2630
+ },
2631
+ {
2632
+ "epoch": 11.0,
2633
+ "learning_rate": 0.0015599250936329588,
2634
+ "loss": 1.1819,
2635
+ "step": 211500
2636
+ },
2637
+ {
2638
+ "epoch": 11.03,
2639
+ "learning_rate": 0.0015588847274240535,
2640
+ "loss": 1.1753,
2641
+ "step": 212000
2642
+ },
2643
+ {
2644
+ "epoch": 11.05,
2645
+ "learning_rate": 0.001557844361215148,
2646
+ "loss": 1.1781,
2647
+ "step": 212500
2648
+ },
2649
+ {
2650
+ "epoch": 11.08,
2651
+ "learning_rate": 0.0015568039950062422,
2652
+ "loss": 1.1788,
2653
+ "step": 213000
2654
+ },
2655
+ {
2656
+ "epoch": 11.11,
2657
+ "learning_rate": 0.0015557636287973366,
2658
+ "loss": 1.1768,
2659
+ "step": 213500
2660
+ },
2661
+ {
2662
+ "epoch": 11.13,
2663
+ "learning_rate": 0.001554723262588431,
2664
+ "loss": 1.1775,
2665
+ "step": 214000
2666
+ },
2667
+ {
2668
+ "epoch": 11.16,
2669
+ "learning_rate": 0.0015536828963795255,
2670
+ "loss": 1.1782,
2671
+ "step": 214500
2672
+ },
2673
+ {
2674
+ "epoch": 11.18,
2675
+ "learning_rate": 0.0015526425301706202,
2676
+ "loss": 1.1771,
2677
+ "step": 215000
2678
+ },
2679
+ {
2680
+ "epoch": 11.21,
2681
+ "learning_rate": 0.0015516021639617147,
2682
+ "loss": 1.1778,
2683
+ "step": 215500
2684
+ },
2685
+ {
2686
+ "epoch": 11.24,
2687
+ "learning_rate": 0.0015505617977528091,
2688
+ "loss": 1.1767,
2689
+ "step": 216000
2690
+ },
2691
+ {
2692
+ "epoch": 11.26,
2693
+ "learning_rate": 0.0015495214315439034,
2694
+ "loss": 1.1781,
2695
+ "step": 216500
2696
+ },
2697
+ {
2698
+ "epoch": 11.29,
2699
+ "learning_rate": 0.0015484810653349978,
2700
+ "loss": 1.1781,
2701
+ "step": 217000
2702
+ },
2703
+ {
2704
+ "epoch": 11.31,
2705
+ "learning_rate": 0.0015474406991260923,
2706
+ "loss": 1.179,
2707
+ "step": 217500
2708
+ },
2709
+ {
2710
+ "epoch": 11.34,
2711
+ "learning_rate": 0.001546400332917187,
2712
+ "loss": 1.1775,
2713
+ "step": 218000
2714
+ },
2715
+ {
2716
+ "epoch": 11.37,
2717
+ "learning_rate": 0.0015453599667082814,
2718
+ "loss": 1.1799,
2719
+ "step": 218500
2720
+ },
2721
+ {
2722
+ "epoch": 11.39,
2723
+ "learning_rate": 0.0015443196004993759,
2724
+ "loss": 1.1773,
2725
+ "step": 219000
2726
+ },
2727
+ {
2728
+ "epoch": 11.42,
2729
+ "learning_rate": 0.0015432792342904703,
2730
+ "loss": 1.1786,
2731
+ "step": 219500
2732
+ },
2733
+ {
2734
+ "epoch": 11.44,
2735
+ "learning_rate": 0.0015422388680815646,
2736
+ "loss": 1.1766,
2737
+ "step": 220000
2738
+ },
2739
+ {
2740
+ "epoch": 11.47,
2741
+ "learning_rate": 0.0015411985018726592,
2742
+ "loss": 1.1793,
2743
+ "step": 220500
2744
+ },
2745
+ {
2746
+ "epoch": 11.5,
2747
+ "learning_rate": 0.0015401581356637537,
2748
+ "loss": 1.1785,
2749
+ "step": 221000
2750
+ },
2751
+ {
2752
+ "epoch": 11.52,
2753
+ "learning_rate": 0.0015391177694548481,
2754
+ "loss": 1.1787,
2755
+ "step": 221500
2756
+ },
2757
+ {
2758
+ "epoch": 11.55,
2759
+ "learning_rate": 0.0015380774032459426,
2760
+ "loss": 1.1786,
2761
+ "step": 222000
2762
+ },
2763
+ {
2764
+ "epoch": 11.57,
2765
+ "learning_rate": 0.001537037037037037,
2766
+ "loss": 1.1796,
2767
+ "step": 222500
2768
+ },
2769
+ {
2770
+ "epoch": 11.6,
2771
+ "learning_rate": 0.0015359966708281315,
2772
+ "loss": 1.1785,
2773
+ "step": 223000
2774
+ },
2775
+ {
2776
+ "epoch": 11.63,
2777
+ "learning_rate": 0.0015349563046192262,
2778
+ "loss": 1.1789,
2779
+ "step": 223500
2780
+ },
2781
+ {
2782
+ "epoch": 11.65,
2783
+ "learning_rate": 0.0015339159384103204,
2784
+ "loss": 1.1775,
2785
+ "step": 224000
2786
+ },
2787
+ {
2788
+ "epoch": 11.68,
2789
+ "learning_rate": 0.0015328755722014149,
2790
+ "loss": 1.181,
2791
+ "step": 224500
2792
+ },
2793
+ {
2794
+ "epoch": 11.7,
2795
+ "learning_rate": 0.0015318352059925093,
2796
+ "loss": 1.1781,
2797
+ "step": 225000
2798
+ },
2799
+ {
2800
+ "epoch": 11.73,
2801
+ "learning_rate": 0.0015307948397836038,
2802
+ "loss": 1.1791,
2803
+ "step": 225500
2804
+ },
2805
+ {
2806
+ "epoch": 11.76,
2807
+ "learning_rate": 0.0015297544735746985,
2808
+ "loss": 1.1797,
2809
+ "step": 226000
2810
+ },
2811
+ {
2812
+ "epoch": 11.78,
2813
+ "learning_rate": 0.001528714107365793,
2814
+ "loss": 1.1779,
2815
+ "step": 226500
2816
+ },
2817
+ {
2818
+ "epoch": 11.81,
2819
+ "learning_rate": 0.0015276737411568874,
2820
+ "loss": 1.1773,
2821
+ "step": 227000
2822
+ },
2823
+ {
2824
+ "epoch": 11.83,
2825
+ "learning_rate": 0.0015266333749479816,
2826
+ "loss": 1.178,
2827
+ "step": 227500
2828
+ },
2829
+ {
2830
+ "epoch": 11.86,
2831
+ "learning_rate": 0.001525593008739076,
2832
+ "loss": 1.179,
2833
+ "step": 228000
2834
+ },
2835
+ {
2836
+ "epoch": 11.89,
2837
+ "learning_rate": 0.0015245526425301705,
2838
+ "loss": 1.1782,
2839
+ "step": 228500
2840
+ },
2841
+ {
2842
+ "epoch": 11.91,
2843
+ "learning_rate": 0.0015235122763212652,
2844
+ "loss": 1.1796,
2845
+ "step": 229000
2846
+ },
2847
+ {
2848
+ "epoch": 11.94,
2849
+ "learning_rate": 0.0015224719101123597,
2850
+ "loss": 1.1771,
2851
+ "step": 229500
2852
+ },
2853
+ {
2854
+ "epoch": 11.96,
2855
+ "learning_rate": 0.0015214315439034541,
2856
+ "loss": 1.179,
2857
+ "step": 230000
2858
+ },
2859
+ {
2860
+ "epoch": 11.99,
2861
+ "learning_rate": 0.0015203911776945486,
2862
+ "loss": 1.1791,
2863
+ "step": 230500
2864
+ },
2865
+ {
2866
+ "epoch": 12.0,
2867
+ "eval_loss": 1.1990782022476196,
2868
+ "eval_runtime": 0.5976,
2869
+ "eval_samples_per_second": 1673.372,
2870
+ "eval_steps_per_second": 3.347,
2871
+ "step": 230688
2872
+ },
2873
+ {
2874
+ "epoch": 12.02,
2875
+ "learning_rate": 0.0015193508114856428,
2876
+ "loss": 1.1745,
2877
+ "step": 231000
2878
+ },
2879
+ {
2880
+ "epoch": 12.04,
2881
+ "learning_rate": 0.0015183104452767373,
2882
+ "loss": 1.1744,
2883
+ "step": 231500
2884
+ },
2885
+ {
2886
+ "epoch": 12.07,
2887
+ "learning_rate": 0.001517270079067832,
2888
+ "loss": 1.1743,
2889
+ "step": 232000
2890
+ },
2891
+ {
2892
+ "epoch": 12.09,
2893
+ "learning_rate": 0.0015162297128589264,
2894
+ "loss": 1.1758,
2895
+ "step": 232500
2896
+ },
2897
+ {
2898
+ "epoch": 12.12,
2899
+ "learning_rate": 0.0015151893466500209,
2900
+ "loss": 1.1738,
2901
+ "step": 233000
2902
+ },
2903
+ {
2904
+ "epoch": 12.15,
2905
+ "learning_rate": 0.0015141489804411153,
2906
+ "loss": 1.1752,
2907
+ "step": 233500
2908
+ },
2909
+ {
2910
+ "epoch": 12.17,
2911
+ "learning_rate": 0.0015131086142322098,
2912
+ "loss": 1.1753,
2913
+ "step": 234000
2914
+ },
2915
+ {
2916
+ "epoch": 12.2,
2917
+ "learning_rate": 0.0015120682480233042,
2918
+ "loss": 1.1764,
2919
+ "step": 234500
2920
+ },
2921
+ {
2922
+ "epoch": 12.22,
2923
+ "learning_rate": 0.0015110278818143987,
2924
+ "loss": 1.174,
2925
+ "step": 235000
2926
+ },
2927
+ {
2928
+ "epoch": 12.25,
2929
+ "learning_rate": 0.0015099875156054932,
2930
+ "loss": 1.176,
2931
+ "step": 235500
2932
+ },
2933
+ {
2934
+ "epoch": 12.28,
2935
+ "learning_rate": 0.0015089471493965876,
2936
+ "loss": 1.176,
2937
+ "step": 236000
2938
+ },
2939
+ {
2940
+ "epoch": 12.3,
2941
+ "learning_rate": 0.001507906783187682,
2942
+ "loss": 1.1758,
2943
+ "step": 236500
2944
+ },
2945
+ {
2946
+ "epoch": 12.33,
2947
+ "learning_rate": 0.0015068664169787765,
2948
+ "loss": 1.1747,
2949
+ "step": 237000
2950
+ },
2951
+ {
2952
+ "epoch": 12.35,
2953
+ "learning_rate": 0.0015058260507698712,
2954
+ "loss": 1.1752,
2955
+ "step": 237500
2956
+ },
2957
+ {
2958
+ "epoch": 12.38,
2959
+ "learning_rate": 0.0015047856845609654,
2960
+ "loss": 1.1761,
2961
+ "step": 238000
2962
+ },
2963
+ {
2964
+ "epoch": 12.41,
2965
+ "learning_rate": 0.00150374531835206,
2966
+ "loss": 1.1771,
2967
+ "step": 238500
2968
+ },
2969
+ {
2970
+ "epoch": 12.43,
2971
+ "learning_rate": 0.0015027049521431544,
2972
+ "loss": 1.1764,
2973
+ "step": 239000
2974
+ },
2975
+ {
2976
+ "epoch": 12.46,
2977
+ "learning_rate": 0.0015016645859342488,
2978
+ "loss": 1.1754,
2979
+ "step": 239500
2980
+ },
2981
+ {
2982
+ "epoch": 12.48,
2983
+ "learning_rate": 0.0015006242197253433,
2984
+ "loss": 1.1763,
2985
+ "step": 240000
2986
+ },
2987
+ {
2988
+ "epoch": 12.51,
2989
+ "learning_rate": 0.001499583853516438,
2990
+ "loss": 1.1751,
2991
+ "step": 240500
2992
+ },
2993
+ {
2994
+ "epoch": 12.54,
2995
+ "learning_rate": 0.0014985434873075324,
2996
+ "loss": 1.177,
2997
+ "step": 241000
2998
+ },
2999
+ {
3000
+ "epoch": 12.56,
3001
+ "learning_rate": 0.0014975031210986266,
3002
+ "loss": 1.178,
3003
+ "step": 241500
3004
+ },
3005
+ {
3006
+ "epoch": 12.59,
3007
+ "learning_rate": 0.001496462754889721,
3008
+ "loss": 1.178,
3009
+ "step": 242000
3010
+ },
3011
+ {
3012
+ "epoch": 12.61,
3013
+ "learning_rate": 0.0014954223886808156,
3014
+ "loss": 1.1761,
3015
+ "step": 242500
3016
+ },
3017
+ {
3018
+ "epoch": 12.64,
3019
+ "learning_rate": 0.0014943820224719102,
3020
+ "loss": 1.1763,
3021
+ "step": 243000
3022
+ },
3023
+ {
3024
+ "epoch": 12.67,
3025
+ "learning_rate": 0.0014933416562630047,
3026
+ "loss": 1.1781,
3027
+ "step": 243500
3028
+ },
3029
+ {
3030
+ "epoch": 12.69,
3031
+ "learning_rate": 0.0014923012900540991,
3032
+ "loss": 1.1773,
3033
+ "step": 244000
3034
+ },
3035
+ {
3036
+ "epoch": 12.72,
3037
+ "learning_rate": 0.0014912609238451936,
3038
+ "loss": 1.176,
3039
+ "step": 244500
3040
+ },
3041
+ {
3042
+ "epoch": 12.74,
3043
+ "learning_rate": 0.0014902205576362878,
3044
+ "loss": 1.1762,
3045
+ "step": 245000
3046
+ },
3047
+ {
3048
+ "epoch": 12.77,
3049
+ "learning_rate": 0.0014891801914273823,
3050
+ "loss": 1.1776,
3051
+ "step": 245500
3052
+ },
3053
+ {
3054
+ "epoch": 12.8,
3055
+ "learning_rate": 0.001488139825218477,
3056
+ "loss": 1.1774,
3057
+ "step": 246000
3058
+ },
3059
+ {
3060
+ "epoch": 12.82,
3061
+ "learning_rate": 0.0014870994590095714,
3062
+ "loss": 1.1759,
3063
+ "step": 246500
3064
+ },
3065
+ {
3066
+ "epoch": 12.85,
3067
+ "learning_rate": 0.0014860590928006659,
3068
+ "loss": 1.1783,
3069
+ "step": 247000
3070
+ },
3071
+ {
3072
+ "epoch": 12.87,
3073
+ "learning_rate": 0.0014850187265917603,
3074
+ "loss": 1.1757,
3075
+ "step": 247500
3076
+ },
3077
+ {
3078
+ "epoch": 12.9,
3079
+ "learning_rate": 0.0014839783603828548,
3080
+ "loss": 1.1769,
3081
+ "step": 248000
3082
+ },
3083
+ {
3084
+ "epoch": 12.93,
3085
+ "learning_rate": 0.0014829379941739495,
3086
+ "loss": 1.1749,
3087
+ "step": 248500
3088
+ },
3089
+ {
3090
+ "epoch": 12.95,
3091
+ "learning_rate": 0.0014818976279650437,
3092
+ "loss": 1.1751,
3093
+ "step": 249000
3094
+ },
3095
+ {
3096
+ "epoch": 12.98,
3097
+ "learning_rate": 0.0014808572617561382,
3098
+ "loss": 1.1757,
3099
+ "step": 249500
3100
+ },
3101
+ {
3102
+ "epoch": 13.0,
3103
+ "eval_loss": 1.198763132095337,
3104
+ "eval_runtime": 0.6114,
3105
+ "eval_samples_per_second": 1635.549,
3106
+ "eval_steps_per_second": 3.271,
3107
+ "step": 249912
3108
+ },
3109
+ {
3110
+ "epoch": 13.0,
3111
+ "learning_rate": 0.0014798168955472326,
3112
+ "loss": 1.1756,
3113
+ "step": 250000
3114
+ },
3115
+ {
3116
+ "epoch": 13.03,
3117
+ "learning_rate": 0.001478776529338327,
3118
+ "loss": 1.1712,
3119
+ "step": 250500
3120
+ },
3121
+ {
3122
+ "epoch": 13.06,
3123
+ "learning_rate": 0.0014777361631294215,
3124
+ "loss": 1.1714,
3125
+ "step": 251000
3126
+ },
3127
+ {
3128
+ "epoch": 13.08,
3129
+ "learning_rate": 0.0014766957969205162,
3130
+ "loss": 1.1724,
3131
+ "step": 251500
3132
+ },
3133
+ {
3134
+ "epoch": 13.11,
3135
+ "learning_rate": 0.0014756554307116107,
3136
+ "loss": 1.1736,
3137
+ "step": 252000
3138
+ },
3139
+ {
3140
+ "epoch": 13.13,
3141
+ "learning_rate": 0.001474615064502705,
3142
+ "loss": 1.1739,
3143
+ "step": 252500
3144
+ },
3145
+ {
3146
+ "epoch": 13.16,
3147
+ "learning_rate": 0.0014735746982937994,
3148
+ "loss": 1.1737,
3149
+ "step": 253000
3150
+ },
3151
+ {
3152
+ "epoch": 13.19,
3153
+ "learning_rate": 0.0014725343320848938,
3154
+ "loss": 1.1732,
3155
+ "step": 253500
3156
+ },
3157
+ {
3158
+ "epoch": 13.21,
3159
+ "learning_rate": 0.0014714939658759883,
3160
+ "loss": 1.1725,
3161
+ "step": 254000
3162
+ },
3163
+ {
3164
+ "epoch": 13.24,
3165
+ "learning_rate": 0.001470453599667083,
3166
+ "loss": 1.1737,
3167
+ "step": 254500
3168
+ },
3169
+ {
3170
+ "epoch": 13.26,
3171
+ "learning_rate": 0.0014694132334581774,
3172
+ "loss": 1.1737,
3173
+ "step": 255000
3174
+ },
3175
+ {
3176
+ "epoch": 13.29,
3177
+ "learning_rate": 0.0014683728672492719,
3178
+ "loss": 1.1733,
3179
+ "step": 255500
3180
+ },
3181
+ {
3182
+ "epoch": 13.32,
3183
+ "learning_rate": 0.0014673325010403661,
3184
+ "loss": 1.1729,
3185
+ "step": 256000
3186
+ },
3187
+ {
3188
+ "epoch": 13.34,
3189
+ "learning_rate": 0.0014662921348314606,
3190
+ "loss": 1.1742,
3191
+ "step": 256500
3192
+ },
3193
+ {
3194
+ "epoch": 13.37,
3195
+ "learning_rate": 0.0014652517686225552,
3196
+ "loss": 1.174,
3197
+ "step": 257000
3198
+ },
3199
+ {
3200
+ "epoch": 13.39,
3201
+ "learning_rate": 0.0014642114024136497,
3202
+ "loss": 1.1749,
3203
+ "step": 257500
3204
+ },
3205
+ {
3206
+ "epoch": 13.42,
3207
+ "learning_rate": 0.0014631710362047442,
3208
+ "loss": 1.1735,
3209
+ "step": 258000
3210
+ },
3211
+ {
3212
+ "epoch": 13.45,
3213
+ "learning_rate": 0.0014621306699958386,
3214
+ "loss": 1.1724,
3215
+ "step": 258500
3216
+ },
3217
+ {
3218
+ "epoch": 13.47,
3219
+ "learning_rate": 0.001461090303786933,
3220
+ "loss": 1.1743,
3221
+ "step": 259000
3222
+ },
3223
+ {
3224
+ "epoch": 13.5,
3225
+ "learning_rate": 0.0014600499375780273,
3226
+ "loss": 1.1755,
3227
+ "step": 259500
3228
+ },
3229
+ {
3230
+ "epoch": 13.52,
3231
+ "learning_rate": 0.001459009571369122,
3232
+ "loss": 1.1759,
3233
+ "step": 260000
3234
+ },
3235
+ {
3236
+ "epoch": 13.55,
3237
+ "learning_rate": 0.0014579692051602164,
3238
+ "loss": 1.1749,
3239
+ "step": 260500
3240
+ },
3241
+ {
3242
+ "epoch": 13.58,
3243
+ "learning_rate": 0.001456928838951311,
3244
+ "loss": 1.1756,
3245
+ "step": 261000
3246
+ },
3247
+ {
3248
+ "epoch": 13.6,
3249
+ "learning_rate": 0.0014558884727424054,
3250
+ "loss": 1.1747,
3251
+ "step": 261500
3252
+ },
3253
+ {
3254
+ "epoch": 13.63,
3255
+ "learning_rate": 0.0014548481065334998,
3256
+ "loss": 1.1745,
3257
+ "step": 262000
3258
+ },
3259
+ {
3260
+ "epoch": 13.65,
3261
+ "learning_rate": 0.0014538077403245945,
3262
+ "loss": 1.1736,
3263
+ "step": 262500
3264
+ },
3265
+ {
3266
+ "epoch": 13.68,
3267
+ "learning_rate": 0.0014527673741156887,
3268
+ "loss": 1.1749,
3269
+ "step": 263000
3270
+ },
3271
+ {
3272
+ "epoch": 13.71,
3273
+ "learning_rate": 0.0014517270079067832,
3274
+ "loss": 1.1747,
3275
+ "step": 263500
3276
+ },
3277
+ {
3278
+ "epoch": 13.73,
3279
+ "learning_rate": 0.0014506866416978776,
3280
+ "loss": 1.1735,
3281
+ "step": 264000
3282
+ },
3283
+ {
3284
+ "epoch": 13.76,
3285
+ "learning_rate": 0.001449646275488972,
3286
+ "loss": 1.1736,
3287
+ "step": 264500
3288
+ },
3289
+ {
3290
+ "epoch": 13.78,
3291
+ "learning_rate": 0.0014486059092800666,
3292
+ "loss": 1.1741,
3293
+ "step": 265000
3294
+ },
3295
+ {
3296
+ "epoch": 13.81,
3297
+ "learning_rate": 0.0014475655430711612,
3298
+ "loss": 1.1756,
3299
+ "step": 265500
3300
+ },
3301
+ {
3302
+ "epoch": 13.84,
3303
+ "learning_rate": 0.0014465251768622557,
3304
+ "loss": 1.1746,
3305
+ "step": 266000
3306
+ },
3307
+ {
3308
+ "epoch": 13.86,
3309
+ "learning_rate": 0.00144548481065335,
3310
+ "loss": 1.176,
3311
+ "step": 266500
3312
+ },
3313
+ {
3314
+ "epoch": 13.89,
3315
+ "learning_rate": 0.0014444444444444444,
3316
+ "loss": 1.1746,
3317
+ "step": 267000
3318
+ },
3319
+ {
3320
+ "epoch": 13.91,
3321
+ "learning_rate": 0.0014434040782355388,
3322
+ "loss": 1.1746,
3323
+ "step": 267500
3324
+ },
3325
+ {
3326
+ "epoch": 13.94,
3327
+ "learning_rate": 0.0014423637120266333,
3328
+ "loss": 1.1741,
3329
+ "step": 268000
3330
+ },
3331
+ {
3332
+ "epoch": 13.97,
3333
+ "learning_rate": 0.001441323345817728,
3334
+ "loss": 1.173,
3335
+ "step": 268500
3336
+ },
3337
+ {
3338
+ "epoch": 13.99,
3339
+ "learning_rate": 0.0014402829796088224,
3340
+ "loss": 1.1741,
3341
+ "step": 269000
3342
+ },
3343
+ {
3344
+ "epoch": 14.0,
3345
+ "eval_loss": 1.199006199836731,
3346
+ "eval_runtime": 0.8396,
3347
+ "eval_samples_per_second": 1191.026,
3348
+ "eval_steps_per_second": 2.382,
3349
+ "step": 269136
3350
+ },
3351
+ {
3352
+ "epoch": 14.02,
3353
+ "learning_rate": 0.0014392426133999169,
3354
+ "loss": 1.1715,
3355
+ "step": 269500
3356
+ },
3357
+ {
3358
+ "epoch": 14.04,
3359
+ "learning_rate": 0.0014382022471910111,
3360
+ "loss": 1.1707,
3361
+ "step": 270000
3362
+ },
3363
+ {
3364
+ "epoch": 14.07,
3365
+ "learning_rate": 0.0014371618809821056,
3366
+ "loss": 1.171,
3367
+ "step": 270500
3368
+ },
3369
+ {
3370
+ "epoch": 14.1,
3371
+ "learning_rate": 0.0014361215147732003,
3372
+ "loss": 1.1708,
3373
+ "step": 271000
3374
+ },
3375
+ {
3376
+ "epoch": 14.12,
3377
+ "learning_rate": 0.0014350811485642947,
3378
+ "loss": 1.1715,
3379
+ "step": 271500
3380
+ },
3381
+ {
3382
+ "epoch": 14.15,
3383
+ "learning_rate": 0.0014340407823553892,
3384
+ "loss": 1.171,
3385
+ "step": 272000
3386
+ },
3387
+ {
3388
+ "epoch": 14.17,
3389
+ "learning_rate": 0.0014330004161464836,
3390
+ "loss": 1.1721,
3391
+ "step": 272500
3392
+ },
3393
+ {
3394
+ "epoch": 14.2,
3395
+ "learning_rate": 0.001431960049937578,
3396
+ "loss": 1.1717,
3397
+ "step": 273000
3398
+ },
3399
+ {
3400
+ "epoch": 14.23,
3401
+ "learning_rate": 0.0014309196837286725,
3402
+ "loss": 1.1715,
3403
+ "step": 273500
3404
+ },
3405
+ {
3406
+ "epoch": 14.25,
3407
+ "learning_rate": 0.001429879317519767,
3408
+ "loss": 1.171,
3409
+ "step": 274000
3410
+ },
3411
+ {
3412
+ "epoch": 14.28,
3413
+ "learning_rate": 0.0014288389513108614,
3414
+ "loss": 1.1712,
3415
+ "step": 274500
3416
+ },
3417
+ {
3418
+ "epoch": 14.31,
3419
+ "learning_rate": 0.001427798585101956,
3420
+ "loss": 1.1726,
3421
+ "step": 275000
3422
+ },
3423
+ {
3424
+ "epoch": 14.33,
3425
+ "learning_rate": 0.0014267582188930504,
3426
+ "loss": 1.173,
3427
+ "step": 275500
3428
+ },
3429
+ {
3430
+ "epoch": 14.36,
3431
+ "learning_rate": 0.0014257178526841448,
3432
+ "loss": 1.1709,
3433
+ "step": 276000
3434
+ },
3435
+ {
3436
+ "epoch": 14.38,
3437
+ "learning_rate": 0.0014246774864752393,
3438
+ "loss": 1.1728,
3439
+ "step": 276500
3440
+ },
3441
+ {
3442
+ "epoch": 14.41,
3443
+ "learning_rate": 0.001423637120266334,
3444
+ "loss": 1.1721,
3445
+ "step": 277000
3446
+ },
3447
+ {
3448
+ "epoch": 14.44,
3449
+ "learning_rate": 0.0014225967540574282,
3450
+ "loss": 1.1732,
3451
+ "step": 277500
3452
+ },
3453
+ {
3454
+ "epoch": 14.46,
3455
+ "learning_rate": 0.0014215563878485226,
3456
+ "loss": 1.1718,
3457
+ "step": 278000
3458
+ },
3459
+ {
3460
+ "epoch": 14.49,
3461
+ "learning_rate": 0.001420516021639617,
3462
+ "loss": 1.1716,
3463
+ "step": 278500
3464
+ },
3465
+ {
3466
+ "epoch": 14.51,
3467
+ "learning_rate": 0.0014194756554307116,
3468
+ "loss": 1.1735,
3469
+ "step": 279000
3470
+ },
3471
+ {
3472
+ "epoch": 14.54,
3473
+ "learning_rate": 0.0014184352892218062,
3474
+ "loss": 1.1731,
3475
+ "step": 279500
3476
+ },
3477
+ {
3478
+ "epoch": 14.57,
3479
+ "learning_rate": 0.0014173949230129007,
3480
+ "loss": 1.1724,
3481
+ "step": 280000
3482
+ },
3483
+ {
3484
+ "epoch": 14.59,
3485
+ "learning_rate": 0.0014163545568039951,
3486
+ "loss": 1.1717,
3487
+ "step": 280500
3488
+ },
3489
+ {
3490
+ "epoch": 14.62,
3491
+ "learning_rate": 0.0014153141905950894,
3492
+ "loss": 1.1714,
3493
+ "step": 281000
3494
+ },
3495
+ {
3496
+ "epoch": 14.64,
3497
+ "learning_rate": 0.0014142738243861838,
3498
+ "loss": 1.1725,
3499
+ "step": 281500
3500
+ },
3501
+ {
3502
+ "epoch": 14.67,
3503
+ "learning_rate": 0.0014132334581772783,
3504
+ "loss": 1.1726,
3505
+ "step": 282000
3506
+ },
3507
+ {
3508
+ "epoch": 14.7,
3509
+ "learning_rate": 0.001412193091968373,
3510
+ "loss": 1.1731,
3511
+ "step": 282500
3512
+ },
3513
+ {
3514
+ "epoch": 14.72,
3515
+ "learning_rate": 0.0014111527257594674,
3516
+ "loss": 1.1727,
3517
+ "step": 283000
3518
+ },
3519
+ {
3520
+ "epoch": 14.75,
3521
+ "learning_rate": 0.0014101123595505619,
3522
+ "loss": 1.1724,
3523
+ "step": 283500
3524
+ },
3525
+ {
3526
+ "epoch": 14.77,
3527
+ "learning_rate": 0.0014090719933416563,
3528
+ "loss": 1.1715,
3529
+ "step": 284000
3530
+ },
3531
+ {
3532
+ "epoch": 14.8,
3533
+ "learning_rate": 0.0014080316271327506,
3534
+ "loss": 1.173,
3535
+ "step": 284500
3536
+ },
3537
+ {
3538
+ "epoch": 14.83,
3539
+ "learning_rate": 0.0014069912609238453,
3540
+ "loss": 1.1727,
3541
+ "step": 285000
3542
+ },
3543
+ {
3544
+ "epoch": 14.85,
3545
+ "learning_rate": 0.0014059508947149397,
3546
+ "loss": 1.1744,
3547
+ "step": 285500
3548
+ },
3549
+ {
3550
+ "epoch": 14.88,
3551
+ "learning_rate": 0.0014049105285060342,
3552
+ "loss": 1.1709,
3553
+ "step": 286000
3554
+ },
3555
+ {
3556
+ "epoch": 14.9,
3557
+ "learning_rate": 0.0014038701622971286,
3558
+ "loss": 1.1727,
3559
+ "step": 286500
3560
+ },
3561
+ {
3562
+ "epoch": 14.93,
3563
+ "learning_rate": 0.001402829796088223,
3564
+ "loss": 1.1733,
3565
+ "step": 287000
3566
+ },
3567
+ {
3568
+ "epoch": 14.96,
3569
+ "learning_rate": 0.0014017894298793175,
3570
+ "loss": 1.1736,
3571
+ "step": 287500
3572
+ },
3573
+ {
3574
+ "epoch": 14.98,
3575
+ "learning_rate": 0.001400749063670412,
3576
+ "loss": 1.1734,
3577
+ "step": 288000
3578
+ },
3579
+ {
3580
+ "epoch": 15.0,
3581
+ "eval_loss": 1.1976137161254883,
3582
+ "eval_runtime": 0.6131,
3583
+ "eval_samples_per_second": 1631.003,
3584
+ "eval_steps_per_second": 3.262,
3585
+ "step": 288360
3586
+ },
3587
+ {
3588
+ "epoch": 15.01,
3589
+ "learning_rate": 0.0013997086974615065,
3590
+ "loss": 1.1707,
3591
+ "step": 288500
3592
+ },
3593
+ {
3594
+ "epoch": 15.03,
3595
+ "learning_rate": 0.001398668331252601,
3596
+ "loss": 1.1673,
3597
+ "step": 289000
3598
+ },
3599
+ {
3600
+ "epoch": 15.06,
3601
+ "learning_rate": 0.0013976279650436954,
3602
+ "loss": 1.1696,
3603
+ "step": 289500
3604
+ },
3605
+ {
3606
+ "epoch": 15.09,
3607
+ "learning_rate": 0.0013965875988347898,
3608
+ "loss": 1.169,
3609
+ "step": 290000
3610
+ },
3611
+ {
3612
+ "epoch": 15.11,
3613
+ "learning_rate": 0.0013955472326258843,
3614
+ "loss": 1.1689,
3615
+ "step": 290500
3616
+ },
3617
+ {
3618
+ "epoch": 15.14,
3619
+ "learning_rate": 0.001394506866416979,
3620
+ "loss": 1.1702,
3621
+ "step": 291000
3622
+ },
3623
+ {
3624
+ "epoch": 15.16,
3625
+ "learning_rate": 0.0013934665002080732,
3626
+ "loss": 1.1687,
3627
+ "step": 291500
3628
+ },
3629
+ {
3630
+ "epoch": 15.19,
3631
+ "learning_rate": 0.0013924261339991677,
3632
+ "loss": 1.1688,
3633
+ "step": 292000
3634
+ },
3635
+ {
3636
+ "epoch": 15.22,
3637
+ "learning_rate": 0.0013913857677902621,
3638
+ "loss": 1.1693,
3639
+ "step": 292500
3640
+ },
3641
+ {
3642
+ "epoch": 15.24,
3643
+ "learning_rate": 0.0013903454015813566,
3644
+ "loss": 1.1703,
3645
+ "step": 293000
3646
+ },
3647
+ {
3648
+ "epoch": 15.27,
3649
+ "learning_rate": 0.0013893050353724512,
3650
+ "loss": 1.1719,
3651
+ "step": 293500
3652
+ },
3653
+ {
3654
+ "epoch": 15.29,
3655
+ "learning_rate": 0.0013882646691635457,
3656
+ "loss": 1.1701,
3657
+ "step": 294000
3658
+ },
3659
+ {
3660
+ "epoch": 15.32,
3661
+ "learning_rate": 0.0013872243029546402,
3662
+ "loss": 1.1707,
3663
+ "step": 294500
3664
+ },
3665
+ {
3666
+ "epoch": 15.35,
3667
+ "learning_rate": 0.0013861839367457346,
3668
+ "loss": 1.1708,
3669
+ "step": 295000
3670
+ },
3671
+ {
3672
+ "epoch": 15.37,
3673
+ "learning_rate": 0.0013851435705368289,
3674
+ "loss": 1.1716,
3675
+ "step": 295500
3676
+ },
3677
+ {
3678
+ "epoch": 15.4,
3679
+ "learning_rate": 0.0013841032043279233,
3680
+ "loss": 1.1716,
3681
+ "step": 296000
3682
+ },
3683
+ {
3684
+ "epoch": 15.42,
3685
+ "learning_rate": 0.001383062838119018,
3686
+ "loss": 1.1707,
3687
+ "step": 296500
3688
+ },
3689
+ {
3690
+ "epoch": 15.45,
3691
+ "learning_rate": 0.0013820224719101124,
3692
+ "loss": 1.1708,
3693
+ "step": 297000
3694
+ },
3695
+ {
3696
+ "epoch": 15.48,
3697
+ "learning_rate": 0.001380982105701207,
3698
+ "loss": 1.1691,
3699
+ "step": 297500
3700
+ },
3701
+ {
3702
+ "epoch": 15.5,
3703
+ "learning_rate": 0.0013799417394923014,
3704
+ "loss": 1.1725,
3705
+ "step": 298000
3706
+ },
3707
+ {
3708
+ "epoch": 15.53,
3709
+ "learning_rate": 0.0013789013732833958,
3710
+ "loss": 1.1697,
3711
+ "step": 298500
3712
+ },
3713
+ {
3714
+ "epoch": 15.55,
3715
+ "learning_rate": 0.0013778610070744903,
3716
+ "loss": 1.1715,
3717
+ "step": 299000
3718
+ },
3719
+ {
3720
+ "epoch": 15.58,
3721
+ "learning_rate": 0.0013768206408655847,
3722
+ "loss": 1.1713,
3723
+ "step": 299500
3724
+ },
3725
+ {
3726
+ "epoch": 15.61,
3727
+ "learning_rate": 0.0013757802746566792,
3728
+ "loss": 1.1708,
3729
+ "step": 300000
3730
+ },
3731
+ {
3732
+ "epoch": 15.63,
3733
+ "learning_rate": 0.0013747399084477736,
3734
+ "loss": 1.1705,
3735
+ "step": 300500
3736
+ },
3737
+ {
3738
+ "epoch": 15.66,
3739
+ "learning_rate": 0.001373699542238868,
3740
+ "loss": 1.1712,
3741
+ "step": 301000
3742
+ },
3743
+ {
3744
+ "epoch": 15.68,
3745
+ "learning_rate": 0.0013726591760299626,
3746
+ "loss": 1.1712,
3747
+ "step": 301500
3748
+ },
3749
+ {
3750
+ "epoch": 15.71,
3751
+ "learning_rate": 0.0013716188098210572,
3752
+ "loss": 1.1715,
3753
+ "step": 302000
3754
+ },
3755
+ {
3756
+ "epoch": 15.74,
3757
+ "learning_rate": 0.0013705784436121515,
3758
+ "loss": 1.1709,
3759
+ "step": 302500
3760
+ },
3761
+ {
3762
+ "epoch": 15.76,
3763
+ "learning_rate": 0.001369538077403246,
3764
+ "loss": 1.1722,
3765
+ "step": 303000
3766
+ },
3767
+ {
3768
+ "epoch": 15.79,
3769
+ "learning_rate": 0.0013684977111943404,
3770
+ "loss": 1.1718,
3771
+ "step": 303500
3772
+ },
3773
+ {
3774
+ "epoch": 15.81,
3775
+ "learning_rate": 0.0013674573449854348,
3776
+ "loss": 1.1708,
3777
+ "step": 304000
3778
+ },
3779
+ {
3780
+ "epoch": 15.84,
3781
+ "learning_rate": 0.0013664169787765293,
3782
+ "loss": 1.1723,
3783
+ "step": 304500
3784
+ },
3785
+ {
3786
+ "epoch": 15.87,
3787
+ "learning_rate": 0.001365376612567624,
3788
+ "loss": 1.1702,
3789
+ "step": 305000
3790
+ },
3791
+ {
3792
+ "epoch": 15.89,
3793
+ "learning_rate": 0.0013643362463587184,
3794
+ "loss": 1.1714,
3795
+ "step": 305500
3796
+ },
3797
+ {
3798
+ "epoch": 15.92,
3799
+ "learning_rate": 0.0013632958801498127,
3800
+ "loss": 1.1701,
3801
+ "step": 306000
3802
+ },
3803
+ {
3804
+ "epoch": 15.94,
3805
+ "learning_rate": 0.0013622555139409071,
3806
+ "loss": 1.1709,
3807
+ "step": 306500
3808
+ },
3809
+ {
3810
+ "epoch": 15.97,
3811
+ "learning_rate": 0.0013612151477320016,
3812
+ "loss": 1.1697,
3813
+ "step": 307000
3814
+ },
3815
+ {
3816
+ "epoch": 16.0,
3817
+ "learning_rate": 0.0013601747815230963,
3818
+ "loss": 1.1709,
3819
+ "step": 307500
3820
+ },
3821
+ {
3822
+ "epoch": 16.0,
3823
+ "eval_loss": 1.1968414783477783,
3824
+ "eval_runtime": 0.6109,
3825
+ "eval_samples_per_second": 1636.861,
3826
+ "eval_steps_per_second": 3.274,
3827
+ "step": 307584
3828
+ }
3829
+ ],
3830
+ "logging_steps": 500,
3831
+ "max_steps": 961200,
3832
+ "num_input_tokens_seen": 0,
3833
+ "num_train_epochs": 50,
3834
+ "save_steps": 500,
3835
+ "total_flos": 1.2378919133169423e+18,
3836
+ "train_batch_size": 512,
3837
+ "trial_name": null,
3838
+ "trial_params": null
3839
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e36dcc3d6a53069882f78e345e28b92d48e8289f81ae2ed2dc362af7518d6313
3
+ size 4664