Upload with huggingface_hub
Browse files- config.json +26 -0
- merges.txt +378 -0
- modelO.out +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +1 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
- vocab.json +1 -0
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 4,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.15.0",
|
23 |
+
"type_vocab_size": 1,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 800
|
26 |
+
}
|
merges.txt
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#version: 0.2 - Trained by `huggingface/tokenizers`
|
2 |
+
B r
|
3 |
+
a n
|
4 |
+
c h
|
5 |
+
Br an
|
6 |
+
Bran ch
|
7 |
+
Branch 1
|
8 |
+
= C
|
9 |
+
R i
|
10 |
+
n g
|
11 |
+
Ri ng
|
12 |
+
Ring 1
|
13 |
+
= Branch1
|
14 |
+
Branch 2
|
15 |
+
= O
|
16 |
+
Ring 2
|
17 |
+
H 1
|
18 |
+
C @
|
19 |
+
= N
|
20 |
+
# Branch1
|
21 |
+
C@ @
|
22 |
+
= Branch2
|
23 |
+
C@ H1
|
24 |
+
C@@ H1
|
25 |
+
# Branch2
|
26 |
+
# C
|
27 |
+
C l
|
28 |
+
/ C
|
29 |
+
N H1
|
30 |
+
= Ring1
|
31 |
+
+ 1
|
32 |
+
- 1
|
33 |
+
O -1
|
34 |
+
N +1
|
35 |
+
\ C
|
36 |
+
# N
|
37 |
+
/ N
|
38 |
+
= Ring2
|
39 |
+
= S
|
40 |
+
=N +1
|
41 |
+
\ N
|
42 |
+
N a
|
43 |
+
Na +1
|
44 |
+
/ O
|
45 |
+
\ O
|
46 |
+
Br -1
|
47 |
+
Branch 3
|
48 |
+
\ S
|
49 |
+
S +1
|
50 |
+
Cl -1
|
51 |
+
I -1
|
52 |
+
/ C@@H1
|
53 |
+
S i
|
54 |
+
/ C@H1
|
55 |
+
/ S
|
56 |
+
=N -1
|
57 |
+
S e
|
58 |
+
= P
|
59 |
+
N -1
|
60 |
+
Ring 3
|
61 |
+
2 H
|
62 |
+
P +1
|
63 |
+
K +1
|
64 |
+
\ C@@H1
|
65 |
+
\ C@H1
|
66 |
+
/ N+1
|
67 |
+
@ @
|
68 |
+
C -1
|
69 |
+
# N+1
|
70 |
+
B -1
|
71 |
+
+ 3
|
72 |
+
Cl +3
|
73 |
+
\ NH1
|
74 |
+
L i
|
75 |
+
Li +1
|
76 |
+
P H1
|
77 |
+
1 8
|
78 |
+
18 F
|
79 |
+
@ +1
|
80 |
+
3 H
|
81 |
+
P @@
|
82 |
+
H 0
|
83 |
+
O H0
|
84 |
+
1 2
|
85 |
+
P @
|
86 |
+
+ 2
|
87 |
+
@@ +1
|
88 |
+
S -1
|
89 |
+
/ Br
|
90 |
+
- /
|
91 |
+
\ Cl
|
92 |
+
-/ Ring2
|
93 |
+
\ O-1
|
94 |
+
1 1
|
95 |
+
5 I
|
96 |
+
12 5I
|
97 |
+
11 C
|
98 |
+
H 3
|
99 |
+
\ N+1
|
100 |
+
- \
|
101 |
+
/ C@@
|
102 |
+
S @+1
|
103 |
+
A s
|
104 |
+
/ Cl
|
105 |
+
11C H3
|
106 |
+
=S e
|
107 |
+
S @@+1
|
108 |
+
N @+1
|
109 |
+
1 4
|
110 |
+
-\ Ring2
|
111 |
+
14 C
|
112 |
+
\ F
|
113 |
+
/ C@
|
114 |
+
T e
|
115 |
+
H 2
|
116 |
+
H1 -1
|
117 |
+
=O +1
|
118 |
+
N @@+1
|
119 |
+
C +1
|
120 |
+
=S +1
|
121 |
+
Z n
|
122 |
+
/ P
|
123 |
+
a +2
|
124 |
+
/ I
|
125 |
+
O H1-1
|
126 |
+
C a+2
|
127 |
+
\ Br
|
128 |
+
M g
|
129 |
+
Zn +2
|
130 |
+
A l
|
131 |
+
/ F
|
132 |
+
Mg +2
|
133 |
+
12 3
|
134 |
+
123 I
|
135 |
+
1 3
|
136 |
+
I +1
|
137 |
+
/ O-1
|
138 |
+
-\ Ring1
|
139 |
+
B H2
|
140 |
+
BH2 -1
|
141 |
+
\ I
|
142 |
+
/ NH1
|
143 |
+
O +1
|
144 |
+
13 1
|
145 |
+
131 I
|
146 |
+
= 14C
|
147 |
+
/ S+1
|
148 |
+
= Ring3
|
149 |
+
\ C@@
|
150 |
+
H2 +1
|
151 |
+
\ C@
|
152 |
+
A g
|
153 |
+
= As
|
154 |
+
=Se +1
|
155 |
+
N H2+1
|
156 |
+
Se H1
|
157 |
+
-/ Ring1
|
158 |
+
= Te
|
159 |
+
Al +3
|
160 |
+
Na H1
|
161 |
+
=Te +1
|
162 |
+
NH1 +1
|
163 |
+
Ag +1
|
164 |
+
H1 +1
|
165 |
+
NH1 -1
|
166 |
+
\ P
|
167 |
+
14C H2
|
168 |
+
13 C
|
169 |
+
14C H1
|
170 |
+
= 11C
|
171 |
+
S @@
|
172 |
+
=P @@
|
173 |
+
Si H2
|
174 |
+
H3 -1
|
175 |
+
14C H3
|
176 |
+
B H3-1
|
177 |
+
S @
|
178 |
+
=14C H1
|
179 |
+
=P H1
|
180 |
+
=P @
|
181 |
+
=N H1+1
|
182 |
+
\S +1
|
183 |
+
12 4
|
184 |
+
C H1-1
|
185 |
+
S r
|
186 |
+
=S i
|
187 |
+
124 I
|
188 |
+
Sr +2
|
189 |
+
#C -1
|
190 |
+
/C -1
|
191 |
+
N @
|
192 |
+
/N -1
|
193 |
+
13C H1
|
194 |
+
/ B
|
195 |
+
1 9
|
196 |
+
B a+2
|
197 |
+
H 4
|
198 |
+
S H1+1
|
199 |
+
Se +1
|
200 |
+
19 F
|
201 |
+
/ 125I
|
202 |
+
P @+1
|
203 |
+
R b
|
204 |
+
Cl +1
|
205 |
+
Si H4
|
206 |
+
Rb +1
|
207 |
+
= Branch3
|
208 |
+
N @@
|
209 |
+
As +1
|
210 |
+
/ Si
|
211 |
+
B H1-1
|
212 |
+
S H1
|
213 |
+
/ 123I
|
214 |
+
3 2
|
215 |
+
= Mg
|
216 |
+
H +1
|
217 |
+
\ B
|
218 |
+
Si H1
|
219 |
+
P@@ +1
|
220 |
+
- 2
|
221 |
+
1 5
|
222 |
+
1 7
|
223 |
+
3 5
|
224 |
+
= 13CH1
|
225 |
+
C s
|
226 |
+
=N H2+1
|
227 |
+
=S H1
|
228 |
+
Mg H2
|
229 |
+
32 P
|
230 |
+
17 F
|
231 |
+
35 S
|
232 |
+
Cs +1
|
233 |
+
# 11C
|
234 |
+
/ 131I
|
235 |
+
B i
|
236 |
+
\ 125I
|
237 |
+
=S @@
|
238 |
+
\S -1
|
239 |
+
6 Br
|
240 |
+
7 I
|
241 |
+
7 6Br
|
242 |
+
= B
|
243 |
+
e H1
|
244 |
+
\N -1
|
245 |
+
18 O
|
246 |
+
12 7I
|
247 |
+
11C H2
|
248 |
+
14 C@@H1
|
249 |
+
Te H2
|
250 |
+
15 NH1
|
251 |
+
Bi +3
|
252 |
+
/ P+1
|
253 |
+
/ 13C
|
254 |
+
/ 13CH1
|
255 |
+
0 B
|
256 |
+
1 0B
|
257 |
+
= Al
|
258 |
+
= 18O
|
259 |
+
B H0
|
260 |
+
F -1
|
261 |
+
N H3
|
262 |
+
S -2
|
263 |
+
Br +2
|
264 |
+
Cl +2
|
265 |
+
\S i
|
266 |
+
/S -1
|
267 |
+
=P H2
|
268 |
+
14 C@H1
|
269 |
+
NH3 +1
|
270 |
+
# 14C
|
271 |
+
# O+1
|
272 |
+
- 3
|
273 |
+
2 2
|
274 |
+
4 H
|
275 |
+
5 Se
|
276 |
+
5 Sr+2
|
277 |
+
7 5Se
|
278 |
+
8 5Sr+2
|
279 |
+
= B-1
|
280 |
+
= 13C
|
281 |
+
@ -1
|
282 |
+
B e
|
283 |
+
B @@
|
284 |
+
B @-1
|
285 |
+
C a
|
286 |
+
C H1
|
287 |
+
I +3
|
288 |
+
K H1
|
289 |
+
O H1+1
|
290 |
+
R a+2
|
291 |
+
S H1-1
|
292 |
+
\ PH1
|
293 |
+
\ 123I
|
294 |
+
=C a
|
295 |
+
\C H1-1
|
296 |
+
=S @
|
297 |
+
\S eH1
|
298 |
+
/S eH1
|
299 |
+
Se -1
|
300 |
+
Li H1
|
301 |
+
18F -1
|
302 |
+
125I H1
|
303 |
+
11C H1
|
304 |
+
Te H1
|
305 |
+
Zn +1
|
306 |
+
Zn -2
|
307 |
+
Al -3
|
308 |
+
13C H3
|
309 |
+
15 N
|
310 |
+
Be +2
|
311 |
+
B@@ -1
|
312 |
+
# P
|
313 |
+
# S
|
314 |
+
- 4
|
315 |
+
/ PH1
|
316 |
+
/ P@@
|
317 |
+
/ As
|
318 |
+
/ 14C
|
319 |
+
/ 14CH1
|
320 |
+
2 K+1
|
321 |
+
2 Rb+1
|
322 |
+
3 Se
|
323 |
+
3 Ra+2
|
324 |
+
4 5
|
325 |
+
4 7
|
326 |
+
4 2K+1
|
327 |
+
5 I-1
|
328 |
+
7 3Se
|
329 |
+
8 9
|
330 |
+
8 2Rb+1
|
331 |
+
= 32
|
332 |
+
= 32P
|
333 |
+
C H0
|
334 |
+
C H2
|
335 |
+
I +2
|
336 |
+
N H0
|
337 |
+
N H4
|
338 |
+
O H1
|
339 |
+
P H2+1
|
340 |
+
S H0
|
341 |
+
S H2
|
342 |
+
\ 3H
|
343 |
+
\ 11CH3
|
344 |
+
\C -1
|
345 |
+
\S e
|
346 |
+
Si @
|
347 |
+
Si -1
|
348 |
+
Si H1-1
|
349 |
+
Si H3-1
|
350 |
+
/S e
|
351 |
+
Se -2
|
352 |
+
\NH1 -1
|
353 |
+
18F H1
|
354 |
+
12 5I-1
|
355 |
+
11 C@@H1
|
356 |
+
11 C-1
|
357 |
+
As H1
|
358 |
+
As -1
|
359 |
+
14 C@@
|
360 |
+
Te -1
|
361 |
+
Mg +1
|
362 |
+
123 I-1
|
363 |
+
123 Te
|
364 |
+
123I H1
|
365 |
+
13 5I
|
366 |
+
131 I-1
|
367 |
+
Ag -4
|
368 |
+
124 I-1
|
369 |
+
76Br H1
|
370 |
+
18O H1
|
371 |
+
22 Na+1
|
372 |
+
22 3Ra+2
|
373 |
+
Ca H2
|
374 |
+
45 Ca+2
|
375 |
+
47 Ca+2
|
376 |
+
89 Sr+2
|
377 |
+
=32 PH1
|
378 |
+
NH4 +1
|
modelO.out
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9104e748a3f08b2e89546d581f87136c678cc2bb31e8371b8b7dc101f7d6ce57
|
3 |
+
size 346721067
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
|
tokenizer.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":1,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":3,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",1],"trim_offsets":true,"add_prefix_space":false},"decoder":{"type":"ByteLevel","add_prefix_space":true,"trim_offsets":true},"model":{"type":"BPE","dropout":null,"unk_token":null,"continuing_subword_prefix":"","end_of_word_suffix":"","fuse_unk":false,"vocab":{"<unk>":0,"<s>":1,"</s>":2,"<pad>":3,"<mask>":4,"\n":5,"#":6,"+":7,"-":8,".":9,"/":10,"0":11,"1":12,"2":13,"3":14,"4":15,"5":16,"6":17,"7":18,"8":19,"9":20,"=":21,"@":22,"A":23,"B":24,"C":25,"F":26,"H":27,"I":28,"K":29,"L":30,"M":31,"N":32,"O":33,"P":34,"R":35,"S":36,"T":37,"Z":38,"\\":39,"a":40,"b":41,"c":42,"e":43,"g":44,"h":45,"i":46,"l":47,"n":48,"r":49,"s":50,"Br":51,"an":52,"ch":53,"Bran":54,"Branch":55,"Branch1":56,"=C":57,"Ri":58,"ng":59,"Ring":60,"Ring1":61,"=Branch1":62,"Branch2":63,"=O":64,"Ring2":65,"H1":66,"C@":67,"=N":68,"#Branch1":69,"C@@":70,"=Branch2":71,"C@H1":72,"C@@H1":73,"#Branch2":74,"#C":75,"Cl":76,"/C":77,"NH1":78,"=Ring1":79,"+1":80,"-1":81,"O-1":82,"N+1":83,"\\C":84,"#N":85,"/N":86,"=Ring2":87,"=S":88,"=N+1":89,"\\N":90,"Na":91,"Na+1":92,"/O":93,"\\O":94,"Br-1":95,"Branch3":96,"\\S":97,"S+1":98,"Cl-1":99,"I-1":100,"/C@@H1":101,"Si":102,"/C@H1":103,"/S":104,"=N-1":105,"Se":106,"=P":107,"N-1":108,"Ring3":109,"2H":110,"P+1":111,"K+1":112,"\\C@@H1":113,"\\C@H1":114,"/N+1":115,"@@":116,"C-1":117,"#N+1":118,"B-1":119,"+3":120,"Cl+3":121,"\\NH1":122,"Li":123,"Li+1":124,"PH1":125,"18":126,"18F":127,"@+1":128,"3H":129,"P@@":130,"H0":131,"OH0":132,"12":133,"P@":134,"+2":135,"@@+1":136,"S-1":137,"/Br":138,"-/":139,"\\Cl":140,"-/Ring2":141,"\\O-1":142,"11":143,"5I":144,"125I":145,"11C":146,"H3":147,"\\N+1":148,"-\\":149,"/C@@":150,"S@+1":151,"As":152,"/Cl":153,"11CH3":154,"=Se":155,"S@@+1":156,"N@+1":157,"14":158,"-\\Ring2":159,"14C":160,"\\F":161,"/C@":162,"Te":163,"H2":164,"H1-1":165,"=O+1":166,"N@@+1":167,"C+1":168,"=S+1":169,"Zn":170,"/P":171,"a+2":172,"/I":173,"OH1-1":174,"Ca+2":175,"\\Br":176,"Mg":177,"Zn+2":178,"Al":179,"/F":180,"Mg+2":181,"123":182,"123I":183,"13":184,"I+1":185,"/O-1":186,"-\\Ring1":187,"BH2":188,"BH2-1":189,"\\I":190,"/NH1":191,"O+1":192,"131":193,"131I":194,"=14C":195,"/S+1":196,"=Ring3":197,"\\C@@":198,"H2+1":199,"\\C@":200,"Ag":201,"=As":202,"=Se+1":203,"NH2+1":204,"SeH1":205,"-/Ring1":206,"=Te":207,"Al+3":208,"NaH1":209,"=Te+1":210,"NH1+1":211,"Ag+1":212,"H1+1":213,"NH1-1":214,"\\P":215,"14CH2":216,"13C":217,"14CH1":218,"=11C":219,"S@@":220,"=P@@":221,"SiH2":222,"H3-1":223,"14CH3":224,"BH3-1":225,"S@":226,"=14CH1":227,"=PH1":228,"=P@":229,"=NH1+1":230,"\\S+1":231,"124":232,"CH1-1":233,"Sr":234,"=Si":235,"124I":236,"Sr+2":237,"#C-1":238,"/C-1":239,"N@":240,"/N-1":241,"13CH1":242,"/B":243,"19":244,"Ba+2":245,"H4":246,"SH1+1":247,"Se+1":248,"19F":249,"/125I":250,"P@+1":251,"Rb":252,"Cl+1":253,"SiH4":254,"Rb+1":255,"=Branch3":256,"N@@":257,"As+1":258,"/Si":259,"BH1-1":260,"SH1":261,"/123I":262,"32":263,"=Mg":264,"H+1":265,"\\B":266,"SiH1":267,"P@@+1":268,"-2":269,"15":270,"17":271,"35":272,"=13CH1":273,"Cs":274,"=NH2+1":275,"=SH1":276,"MgH2":277,"32P":278,"17F":279,"35S":280,"Cs+1":281,"#11C":282,"/131I":283,"Bi":284,"\\125I":285,"=S@@":286,"\\S-1":287,"6Br":288,"7I":289,"76Br":290,"=B":291,"eH1":292,"\\N-1":293,"18O":294,"127I":295,"11CH2":296,"14C@@H1":297,"TeH2":298,"15NH1":299,"Bi+3":300,"/P+1":301,"/13C":302,"/13CH1":303,"0B":304,"10B":305,"=Al":306,"=18O":307,"BH0":308,"F-1":309,"NH3":310,"S-2":311,"Br+2":312,"Cl+2":313,"\\Si":314,"/S-1":315,"=PH2":316,"14C@H1":317,"NH3+1":318,"#14C":319,"#O+1":320,"-3":321,"22":322,"4H":323,"5Se":324,"5Sr+2":325,"75Se":326,"85Sr+2":327,"=B-1":328,"=13C":329,"@-1":330,"Be":331,"B@@":332,"B@-1":333,"Ca":334,"CH1":335,"I+3":336,"KH1":337,"OH1+1":338,"Ra+2":339,"SH1-1":340,"\\PH1":341,"\\123I":342,"=Ca":343,"\\CH1-1":344,"=S@":345,"\\SeH1":346,"/SeH1":347,"Se-1":348,"LiH1":349,"18F-1":350,"125IH1":351,"11CH1":352,"TeH1":353,"Zn+1":354,"Zn-2":355,"Al-3":356,"13CH3":357,"15N":358,"Be+2":359,"B@@-1":360,"#P":361,"#S":362,"-4":363,"/PH1":364,"/P@@":365,"/As":366,"/14C":367,"/14CH1":368,"2K+1":369,"2Rb+1":370,"3Se":371,"3Ra+2":372,"45":373,"47":374,"42K+1":375,"5I-1":376,"73Se":377,"89":378,"82Rb+1":379,"=32":380,"=32P":381,"CH0":382,"CH2":383,"I+2":384,"NH0":385,"NH4":386,"OH1":387,"PH2+1":388,"SH0":389,"SH2":390,"\\3H":391,"\\11CH3":392,"\\C-1":393,"\\Se":394,"Si@":395,"Si-1":396,"SiH1-1":397,"SiH3-1":398,"/Se":399,"Se-2":400,"\\NH1-1":401,"18FH1":402,"125I-1":403,"11C@@H1":404,"11C-1":405,"AsH1":406,"As-1":407,"14C@@":408,"Te-1":409,"Mg+1":410,"123I-1":411,"123Te":412,"123IH1":413,"135I":414,"131I-1":415,"Ag-4":416,"124I-1":417,"76BrH1":418,"18OH1":419,"22Na+1":420,"223Ra+2":421,"CaH2":422,"45Ca+2":423,"47Ca+2":424,"89Sr+2":425,"=32PH1":426,"NH4+1":427},"merges":["B r","a n","c h","Br an","Bran ch","Branch 1","= C","R i","n g","Ri ng","Ring 1","= Branch1","Branch 2","= O","Ring 2","H 1","C @","= N","# Branch1","C@ @","= Branch2","C@ H1","C@@ H1","# Branch2","# C","C l","/ C","N H1","= Ring1","+ 1","- 1","O -1","N +1","\\ C","# N","/ N","= Ring2","= S","=N +1","\\ N","N a","Na +1","/ O","\\ O","Br -1","Branch 3","\\ S","S +1","Cl -1","I -1","/ C@@H1","S i","/ C@H1","/ S","=N -1","S e","= P","N -1","Ring 3","2 H","P +1","K +1","\\ C@@H1","\\ C@H1","/ N+1","@ @","C -1","# N+1","B -1","+ 3","Cl +3","\\ NH1","L i","Li +1","P H1","1 8","18 F","@ +1","3 H","P @@","H 0","O H0","1 2","P @","+ 2","@@ +1","S -1","/ Br","- /","\\ Cl","-/ Ring2","\\ O-1","1 1","5 I","12 5I","11 C","H 3","\\ N+1","- \\","/ C@@","S @+1","A s","/ Cl","11C H3","=S e","S @@+1","N @+1","1 4","-\\ Ring2","14 C","\\ F","/ C@","T e","H 2","H1 -1","=O +1","N @@+1","C +1","=S +1","Z n","/ P","a +2","/ I","O H1-1","C a+2","\\ Br","M g","Zn +2","A l","/ F","Mg +2","12 3","123 I","1 3","I +1","/ O-1","-\\ Ring1","B H2","BH2 -1","\\ I","/ NH1","O +1","13 1","131 I","= 14C","/ S+1","= Ring3","\\ C@@","H2 +1","\\ C@","A g","= As","=Se +1","N H2+1","Se H1","-/ Ring1","= Te","Al +3","Na H1","=Te +1","NH1 +1","Ag +1","H1 +1","NH1 -1","\\ P","14C H2","13 C","14C H1","= 11C","S @@","=P @@","Si H2","H3 -1","14C H3","B H3-1","S @","=14C H1","=P H1","=P @","=N H1+1","\\S +1","12 4","C H1-1","S r","=S i","124 I","Sr +2","#C -1","/C -1","N @","/N -1","13C H1","/ B","1 9","B a+2","H 4","S H1+1","Se +1","19 F","/ 125I","P @+1","R b","Cl +1","Si H4","Rb +1","= Branch3","N @@","As +1","/ Si","B H1-1","S H1","/ 123I","3 2","= Mg","H +1","\\ B","Si H1","P@@ +1","- 2","1 5","1 7","3 5","= 13CH1","C s","=N H2+1","=S H1","Mg H2","32 P","17 F","35 S","Cs +1","# 11C","/ 131I","B i","\\ 125I","=S @@","\\S -1","6 Br","7 I","7 6Br","= B","e H1","\\N -1","18 O","12 7I","11C H2","14 C@@H1","Te H2","15 NH1","Bi +3","/ P+1","/ 13C","/ 13CH1","0 B","1 0B","= Al","= 18O","B H0","F -1","N H3","S -2","Br +2","Cl +2","\\S i","/S -1","=P H2","14 C@H1","NH3 +1","# 14C","# O+1","- 3","2 2","4 H","5 Se","5 Sr+2","7 5Se","8 5Sr+2","= B-1","= 13C","@ -1","B e","B @@","B @-1","C a","C H1","I +3","K H1","O H1+1","R a+2","S H1-1","\\ PH1","\\ 123I","=C a","\\C H1-1","=S @","\\S eH1","/S eH1","Se -1","Li H1","18F -1","125I H1","11C H1","Te H1","Zn +1","Zn -2","Al -3","13C H3","15 N","Be +2","B@@ -1","# P","# S","- 4","/ PH1","/ P@@","/ As","/ 14C","/ 14CH1","2 K+1","2 Rb+1","3 Se","3 Ra+2","4 5","4 7","4 2K+1","5 I-1","7 3Se","8 9","8 2Rb+1","= 32","= 32P","C H0","C H2","I +2","N H0","N H4","O H1","P H2+1","S H0","S H2","\\ 3H","\\ 11CH3","\\C -1","\\S e","Si @","Si -1","Si H1-1","Si H3-1","/S e","Se -2","\\NH1 -1","18F H1","12 5I-1","11 C@@H1","11 C-1","As H1","As -1","14 C@@","Te -1","Mg +1","123 I-1","123 Te","123I H1","13 5I","131 I-1","Ag -4","124 I-1","76Br H1","18O H1","22 Na+1","22 3Ra+2","Ca H2","45 Ca+2","47 Ca+2","89 Sr+2","=32 PH1","NH4 +1"]}}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "trim_offsets": true, "special_tokens_map_file": null, "name_or_path": "./data/bpe/", "tokenizer_class": "RobertaTokenizer"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd2c4928176a0ca2a2544e4ede4ad9eaebf99efcd9949bde2d51f799090224d8
|
3 |
+
size 2991
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<unk>":0,"<s>":1,"</s>":2,"<pad>":3,"<mask>":4,"\n":5,"#":6,"+":7,"-":8,".":9,"/":10,"0":11,"1":12,"2":13,"3":14,"4":15,"5":16,"6":17,"7":18,"8":19,"9":20,"=":21,"@":22,"A":23,"B":24,"C":25,"F":26,"H":27,"I":28,"K":29,"L":30,"M":31,"N":32,"O":33,"P":34,"R":35,"S":36,"T":37,"Z":38,"\\":39,"a":40,"b":41,"c":42,"e":43,"g":44,"h":45,"i":46,"l":47,"n":48,"r":49,"s":50,"Br":51,"an":52,"ch":53,"Bran":54,"Branch":55,"Branch1":56,"=C":57,"Ri":58,"ng":59,"Ring":60,"Ring1":61,"=Branch1":62,"Branch2":63,"=O":64,"Ring2":65,"H1":66,"C@":67,"=N":68,"#Branch1":69,"C@@":70,"=Branch2":71,"C@H1":72,"C@@H1":73,"#Branch2":74,"#C":75,"Cl":76,"/C":77,"NH1":78,"=Ring1":79,"+1":80,"-1":81,"O-1":82,"N+1":83,"\\C":84,"#N":85,"/N":86,"=Ring2":87,"=S":88,"=N+1":89,"\\N":90,"Na":91,"Na+1":92,"/O":93,"\\O":94,"Br-1":95,"Branch3":96,"\\S":97,"S+1":98,"Cl-1":99,"I-1":100,"/C@@H1":101,"Si":102,"/C@H1":103,"/S":104,"=N-1":105,"Se":106,"=P":107,"N-1":108,"Ring3":109,"2H":110,"P+1":111,"K+1":112,"\\C@@H1":113,"\\C@H1":114,"/N+1":115,"@@":116,"C-1":117,"#N+1":118,"B-1":119,"+3":120,"Cl+3":121,"\\NH1":122,"Li":123,"Li+1":124,"PH1":125,"18":126,"18F":127,"@+1":128,"3H":129,"P@@":130,"H0":131,"OH0":132,"12":133,"P@":134,"+2":135,"@@+1":136,"S-1":137,"/Br":138,"-/":139,"\\Cl":140,"-/Ring2":141,"\\O-1":142,"11":143,"5I":144,"125I":145,"11C":146,"H3":147,"\\N+1":148,"-\\":149,"/C@@":150,"S@+1":151,"As":152,"/Cl":153,"11CH3":154,"=Se":155,"S@@+1":156,"N@+1":157,"14":158,"-\\Ring2":159,"14C":160,"\\F":161,"/C@":162,"Te":163,"H2":164,"H1-1":165,"=O+1":166,"N@@+1":167,"C+1":168,"=S+1":169,"Zn":170,"/P":171,"a+2":172,"/I":173,"OH1-1":174,"Ca+2":175,"\\Br":176,"Mg":177,"Zn+2":178,"Al":179,"/F":180,"Mg+2":181,"123":182,"123I":183,"13":184,"I+1":185,"/O-1":186,"-\\Ring1":187,"BH2":188,"BH2-1":189,"\\I":190,"/NH1":191,"O+1":192,"131":193,"131I":194,"=14C":195,"/S+1":196,"=Ring3":197,"\\C@@":198,"H2+1":199,"\\C@":200,"Ag":201,"=As":202,"=Se+1":203,"NH2+1":204,"SeH1":205,"-/Ring1":206,"=Te":207,"Al+3":208,"NaH1":209,"=Te+1":210,"NH1+1":211,"Ag+1":212,"H1+1":213,"NH1-1":214,"\\P":215,"14CH2":216,"13C":217,"14CH1":218,"=11C":219,"S@@":220,"=P@@":221,"SiH2":222,"H3-1":223,"14CH3":224,"BH3-1":225,"S@":226,"=14CH1":227,"=PH1":228,"=P@":229,"=NH1+1":230,"\\S+1":231,"124":232,"CH1-1":233,"Sr":234,"=Si":235,"124I":236,"Sr+2":237,"#C-1":238,"/C-1":239,"N@":240,"/N-1":241,"13CH1":242,"/B":243,"19":244,"Ba+2":245,"H4":246,"SH1+1":247,"Se+1":248,"19F":249,"/125I":250,"P@+1":251,"Rb":252,"Cl+1":253,"SiH4":254,"Rb+1":255,"=Branch3":256,"N@@":257,"As+1":258,"/Si":259,"BH1-1":260,"SH1":261,"/123I":262,"32":263,"=Mg":264,"H+1":265,"\\B":266,"SiH1":267,"P@@+1":268,"-2":269,"15":270,"17":271,"35":272,"=13CH1":273,"Cs":274,"=NH2+1":275,"=SH1":276,"MgH2":277,"32P":278,"17F":279,"35S":280,"Cs+1":281,"#11C":282,"/131I":283,"Bi":284,"\\125I":285,"=S@@":286,"\\S-1":287,"6Br":288,"7I":289,"76Br":290,"=B":291,"eH1":292,"\\N-1":293,"18O":294,"127I":295,"11CH2":296,"14C@@H1":297,"TeH2":298,"15NH1":299,"Bi+3":300,"/P+1":301,"/13C":302,"/13CH1":303,"0B":304,"10B":305,"=Al":306,"=18O":307,"BH0":308,"F-1":309,"NH3":310,"S-2":311,"Br+2":312,"Cl+2":313,"\\Si":314,"/S-1":315,"=PH2":316,"14C@H1":317,"NH3+1":318,"#14C":319,"#O+1":320,"-3":321,"22":322,"4H":323,"5Se":324,"5Sr+2":325,"75Se":326,"85Sr+2":327,"=B-1":328,"=13C":329,"@-1":330,"Be":331,"B@@":332,"B@-1":333,"Ca":334,"CH1":335,"I+3":336,"KH1":337,"OH1+1":338,"Ra+2":339,"SH1-1":340,"\\PH1":341,"\\123I":342,"=Ca":343,"\\CH1-1":344,"=S@":345,"\\SeH1":346,"/SeH1":347,"Se-1":348,"LiH1":349,"18F-1":350,"125IH1":351,"11CH1":352,"TeH1":353,"Zn+1":354,"Zn-2":355,"Al-3":356,"13CH3":357,"15N":358,"Be+2":359,"B@@-1":360,"#P":361,"#S":362,"-4":363,"/PH1":364,"/P@@":365,"/As":366,"/14C":367,"/14CH1":368,"2K+1":369,"2Rb+1":370,"3Se":371,"3Ra+2":372,"45":373,"47":374,"42K+1":375,"5I-1":376,"73Se":377,"89":378,"82Rb+1":379,"=32":380,"=32P":381,"CH0":382,"CH2":383,"I+2":384,"NH0":385,"NH4":386,"OH1":387,"PH2+1":388,"SH0":389,"SH2":390,"\\3H":391,"\\11CH3":392,"\\C-1":393,"\\Se":394,"Si@":395,"Si-1":396,"SiH1-1":397,"SiH3-1":398,"/Se":399,"Se-2":400,"\\NH1-1":401,"18FH1":402,"125I-1":403,"11C@@H1":404,"11C-1":405,"AsH1":406,"As-1":407,"14C@@":408,"Te-1":409,"Mg+1":410,"123I-1":411,"123Te":412,"123IH1":413,"135I":414,"131I-1":415,"Ag-4":416,"124I-1":417,"76BrH1":418,"18OH1":419,"22Na+1":420,"223Ra+2":421,"CaH2":422,"45Ca+2":423,"47Ca+2":424,"89Sr+2":425,"=32PH1":426,"NH4+1":427}
|