Upload with huggingface_hub
Browse files- config.json +26 -0
- merges.txt +162 -0
- modelM.out +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +1 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
- vocab.json +1 -0
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 8,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.15.0",
|
23 |
+
"type_vocab_size": 1,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 800
|
26 |
+
}
|
merges.txt
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#version: 0.2 - Trained by `huggingface/tokenizers`
|
2 |
+
B r
|
3 |
+
a n
|
4 |
+
c h
|
5 |
+
Br an
|
6 |
+
Bran ch
|
7 |
+
Branch 1
|
8 |
+
= C
|
9 |
+
R i
|
10 |
+
n g
|
11 |
+
Ri ng
|
12 |
+
Ring 1
|
13 |
+
= Branch1
|
14 |
+
Branch 2
|
15 |
+
= O
|
16 |
+
Ring 2
|
17 |
+
H 1
|
18 |
+
C @
|
19 |
+
= N
|
20 |
+
# Branch1
|
21 |
+
C@ @
|
22 |
+
= Branch2
|
23 |
+
C@ H1
|
24 |
+
C@@ H1
|
25 |
+
# Branch2
|
26 |
+
C l
|
27 |
+
# C
|
28 |
+
/ C
|
29 |
+
N H1
|
30 |
+
+ 1
|
31 |
+
- 1
|
32 |
+
= Ring1
|
33 |
+
O -1
|
34 |
+
N +1
|
35 |
+
\ C
|
36 |
+
/ N
|
37 |
+
# N
|
38 |
+
= Ring2
|
39 |
+
= S
|
40 |
+
=N +1
|
41 |
+
N a
|
42 |
+
Na +1
|
43 |
+
\ N
|
44 |
+
S +1
|
45 |
+
/ O
|
46 |
+
\ S
|
47 |
+
\ O
|
48 |
+
Br -1
|
49 |
+
I -1
|
50 |
+
Cl -1
|
51 |
+
/ C@H1
|
52 |
+
Branch 3
|
53 |
+
/ C@@H1
|
54 |
+
= P
|
55 |
+
/ S
|
56 |
+
=N -1
|
57 |
+
S i
|
58 |
+
K +1
|
59 |
+
N -1
|
60 |
+
S e
|
61 |
+
L i
|
62 |
+
Li +1
|
63 |
+
+ 3
|
64 |
+
Cl +3
|
65 |
+
\ C@H1
|
66 |
+
Ring 3
|
67 |
+
\ C@@H1
|
68 |
+
/ N+1
|
69 |
+
/ P
|
70 |
+
\ F
|
71 |
+
P @
|
72 |
+
2 H
|
73 |
+
P H1
|
74 |
+
/ Br
|
75 |
+
N @
|
76 |
+
P +1
|
77 |
+
/ Cl
|
78 |
+
\ NH1
|
79 |
+
\ Br
|
80 |
+
@ +1
|
81 |
+
/ I
|
82 |
+
/ C@
|
83 |
+
T e
|
84 |
+
\ N+1
|
85 |
+
P@ @
|
86 |
+
1 2
|
87 |
+
5 I
|
88 |
+
\ O-1
|
89 |
+
12 5I
|
90 |
+
/ F
|
91 |
+
# N+1
|
92 |
+
\ Cl
|
93 |
+
N@ +1
|
94 |
+
\ I
|
95 |
+
- /
|
96 |
+
/ C@@
|
97 |
+
N@ @
|
98 |
+
N@ @+1
|
99 |
+
-/ Ring2
|
100 |
+
- \
|
101 |
+
1 4
|
102 |
+
B -1
|
103 |
+
C -1
|
104 |
+
S @+1
|
105 |
+
14 C
|
106 |
+
H 2
|
107 |
+
H 4
|
108 |
+
I +1
|
109 |
+
S -1
|
110 |
+
\ P
|
111 |
+
=S +1
|
112 |
+
=P @
|
113 |
+
Si H4
|
114 |
+
+ 2
|
115 |
+
3 H
|
116 |
+
@ @+1
|
117 |
+
A g
|
118 |
+
C +1
|
119 |
+
S @@+1
|
120 |
+
Cl +1
|
121 |
+
=S e
|
122 |
+
-\ Ring1
|
123 |
+
H 0
|
124 |
+
O H0
|
125 |
+
1 1
|
126 |
+
= Branch3
|
127 |
+
= Te
|
128 |
+
M g
|
129 |
+
O +1
|
130 |
+
Z n
|
131 |
+
\ C@
|
132 |
+
\ S+1
|
133 |
+
H1 -1
|
134 |
+
Se H1
|
135 |
+
P@ +1
|
136 |
+
-\ Ring2
|
137 |
+
11 C
|
138 |
+
=Te +1
|
139 |
+
Zn +2
|
140 |
+
/ NH1
|
141 |
+
1 8
|
142 |
+
A s
|
143 |
+
B H2
|
144 |
+
B H1-1
|
145 |
+
C a
|
146 |
+
H 3
|
147 |
+
O H1-1
|
148 |
+
S H2
|
149 |
+
=O +1
|
150 |
+
Se +1
|
151 |
+
Te H2
|
152 |
+
125I H1
|
153 |
+
-/ Ring1
|
154 |
+
14C H2
|
155 |
+
Ag +1
|
156 |
+
=Se +1
|
157 |
+
Mg H2
|
158 |
+
Mg +2
|
159 |
+
11C H3
|
160 |
+
18 F
|
161 |
+
BH2 -1
|
162 |
+
Ca +2
|
modelM.out
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bec8f5b146d60ad6bff25b132fb4e66dbd56cb03b61bd09b6fb303ffdc0ec709
|
3 |
+
size 233288939
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
|
tokenizer.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"<unk>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":1,"special":true,"content":"<s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":2,"special":true,"content":"</s>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":3,"special":true,"content":"<pad>","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},{"id":4,"special":true,"content":"<mask>","single_word":false,"lstrip":true,"rstrip":false,"normalized":true}],"normalizer":null,"pre_tokenizer":{"type":"ByteLevel","add_prefix_space":false,"trim_offsets":true},"post_processor":{"type":"RobertaProcessing","sep":["</s>",2],"cls":["<s>",1],"trim_offsets":true,"add_prefix_space":false},"decoder":{"type":"ByteLevel","add_prefix_space":true,"trim_offsets":true},"model":{"type":"BPE","dropout":null,"unk_token":null,"continuing_subword_prefix":"","end_of_word_suffix":"","fuse_unk":false,"vocab":{"<unk>":0,"<s>":1,"</s>":2,"<pad>":3,"<mask>":4,"\n":5,"#":6,"+":7,"-":8,".":9,"/":10,"0":11,"1":12,"2":13,"3":14,"4":15,"5":16,"8":17,"=":18,"@":19,"A":20,"B":21,"C":22,"F":23,"H":24,"I":25,"K":26,"L":27,"M":28,"N":29,"O":30,"P":31,"R":32,"S":33,"T":34,"Z":35,"\\":36,"a":37,"c":38,"e":39,"g":40,"h":41,"i":42,"l":43,"n":44,"r":45,"s":46,"Br":47,"an":48,"ch":49,"Bran":50,"Branch":51,"Branch1":52,"=C":53,"Ri":54,"ng":55,"Ring":56,"Ring1":57,"=Branch1":58,"Branch2":59,"=O":60,"Ring2":61,"H1":62,"C@":63,"=N":64,"#Branch1":65,"C@@":66,"=Branch2":67,"C@H1":68,"C@@H1":69,"#Branch2":70,"Cl":71,"#C":72,"/C":73,"NH1":74,"+1":75,"-1":76,"=Ring1":77,"O-1":78,"N+1":79,"\\C":80,"/N":81,"#N":82,"=Ring2":83,"=S":84,"=N+1":85,"Na":86,"Na+1":87,"\\N":88,"S+1":89,"/O":90,"\\S":91,"\\O":92,"Br-1":93,"I-1":94,"Cl-1":95,"/C@H1":96,"Branch3":97,"/C@@H1":98,"=P":99,"/S":100,"=N-1":101,"Si":102,"K+1":103,"N-1":104,"Se":105,"Li":106,"Li+1":107,"+3":108,"Cl+3":109,"\\C@H1":110,"Ring3":111,"\\C@@H1":112,"/N+1":113,"/P":114,"\\F":115,"P@":116,"2H":117,"PH1":118,"/Br":119,"N@":120,"P+1":121,"/Cl":122,"\\NH1":123,"\\Br":124,"@+1":125,"/I":126,"/C@":127,"Te":128,"\\N+1":129,"P@@":130,"12":131,"5I":132,"\\O-1":133,"125I":134,"/F":135,"#N+1":136,"\\Cl":137,"N@+1":138,"\\I":139,"-/":140,"/C@@":141,"N@@":142,"N@@+1":143,"-/Ring2":144,"-\\":145,"14":146,"B-1":147,"C-1":148,"S@+1":149,"14C":150,"H2":151,"H4":152,"I+1":153,"S-1":154,"\\P":155,"=S+1":156,"=P@":157,"SiH4":158,"+2":159,"3H":160,"@@+1":161,"Ag":162,"C+1":163,"S@@+1":164,"Cl+1":165,"=Se":166,"-\\Ring1":167,"H0":168,"OH0":169,"11":170,"=Branch3":171,"=Te":172,"Mg":173,"O+1":174,"Zn":175,"\\C@":176,"\\S+1":177,"H1-1":178,"SeH1":179,"P@+1":180,"-\\Ring2":181,"11C":182,"=Te+1":183,"Zn+2":184,"/NH1":185,"18":186,"As":187,"BH2":188,"BH1-1":189,"Ca":190,"H3":191,"OH1-1":192,"SH2":193,"=O+1":194,"Se+1":195,"TeH2":196,"125IH1":197,"-/Ring1":198,"14CH2":199,"Ag+1":200,"=Se+1":201,"MgH2":202,"Mg+2":203,"11CH3":204,"18F":205,"BH2-1":206,"Ca+2":207},"merges":["B r","a n","c h","Br an","Bran ch","Branch 1","= C","R i","n g","Ri ng","Ring 1","= Branch1","Branch 2","= O","Ring 2","H 1","C @","= N","# Branch1","C@ @","= Branch2","C@ H1","C@@ H1","# Branch2","C l","# C","/ C","N H1","+ 1","- 1","= Ring1","O -1","N +1","\\ C","/ N","# N","= Ring2","= S","=N +1","N a","Na +1","\\ N","S +1","/ O","\\ S","\\ O","Br -1","I -1","Cl -1","/ C@H1","Branch 3","/ C@@H1","= P","/ S","=N -1","S i","K +1","N -1","S e","L i","Li +1","+ 3","Cl +3","\\ C@H1","Ring 3","\\ C@@H1","/ N+1","/ P","\\ F","P @","2 H","P H1","/ Br","N @","P +1","/ Cl","\\ NH1","\\ Br","@ +1","/ I","/ C@","T e","\\ N+1","P@ @","1 2","5 I","\\ O-1","12 5I","/ F","# N+1","\\ Cl","N@ +1","\\ I","- /","/ C@@","N@ @","N@ @+1","-/ Ring2","- \\","1 4","B -1","C -1","S @+1","14 C","H 2","H 4","I +1","S -1","\\ P","=S +1","=P @","Si H4","+ 2","3 H","@ @+1","A g","C +1","S @@+1","Cl +1","=S e","-\\ Ring1","H 0","O H0","1 1","= Branch3","= Te","M g","O +1","Z n","\\ C@","\\ S+1","H1 -1","Se H1","P@ +1","-\\ Ring2","11 C","=Te +1","Zn +2","/ NH1","1 8","A s","B H2","B H1-1","C a","H 3","O H1-1","S H2","=O +1","Se +1","Te H2","125I H1","-/ Ring1","14C H2","Ag +1","=Se +1","Mg H2","Mg +2","11C H3","18 F","BH2 -1","Ca +2"]}}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "special_tokens_map_file": null, "name_or_path": "./data/bpe/", "tokenizer_class": "RobertaTokenizer"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7426dceb7db538925a2098444fab5520bc9e5502f5b768b92185a3cd5c707821
|
3 |
+
size 2991
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<unk>":0,"<s>":1,"</s>":2,"<pad>":3,"<mask>":4,"\n":5,"#":6,"+":7,"-":8,".":9,"/":10,"0":11,"1":12,"2":13,"3":14,"4":15,"5":16,"8":17,"=":18,"@":19,"A":20,"B":21,"C":22,"F":23,"H":24,"I":25,"K":26,"L":27,"M":28,"N":29,"O":30,"P":31,"R":32,"S":33,"T":34,"Z":35,"\\":36,"a":37,"c":38,"e":39,"g":40,"h":41,"i":42,"l":43,"n":44,"r":45,"s":46,"Br":47,"an":48,"ch":49,"Bran":50,"Branch":51,"Branch1":52,"=C":53,"Ri":54,"ng":55,"Ring":56,"Ring1":57,"=Branch1":58,"Branch2":59,"=O":60,"Ring2":61,"H1":62,"C@":63,"=N":64,"#Branch1":65,"C@@":66,"=Branch2":67,"C@H1":68,"C@@H1":69,"#Branch2":70,"Cl":71,"#C":72,"/C":73,"NH1":74,"+1":75,"-1":76,"=Ring1":77,"O-1":78,"N+1":79,"\\C":80,"/N":81,"#N":82,"=Ring2":83,"=S":84,"=N+1":85,"Na":86,"Na+1":87,"\\N":88,"S+1":89,"/O":90,"\\S":91,"\\O":92,"Br-1":93,"I-1":94,"Cl-1":95,"/C@H1":96,"Branch3":97,"/C@@H1":98,"=P":99,"/S":100,"=N-1":101,"Si":102,"K+1":103,"N-1":104,"Se":105,"Li":106,"Li+1":107,"+3":108,"Cl+3":109,"\\C@H1":110,"Ring3":111,"\\C@@H1":112,"/N+1":113,"/P":114,"\\F":115,"P@":116,"2H":117,"PH1":118,"/Br":119,"N@":120,"P+1":121,"/Cl":122,"\\NH1":123,"\\Br":124,"@+1":125,"/I":126,"/C@":127,"Te":128,"\\N+1":129,"P@@":130,"12":131,"5I":132,"\\O-1":133,"125I":134,"/F":135,"#N+1":136,"\\Cl":137,"N@+1":138,"\\I":139,"-/":140,"/C@@":141,"N@@":142,"N@@+1":143,"-/Ring2":144,"-\\":145,"14":146,"B-1":147,"C-1":148,"S@+1":149,"14C":150,"H2":151,"H4":152,"I+1":153,"S-1":154,"\\P":155,"=S+1":156,"=P@":157,"SiH4":158,"+2":159,"3H":160,"@@+1":161,"Ag":162,"C+1":163,"S@@+1":164,"Cl+1":165,"=Se":166,"-\\Ring1":167,"H0":168,"OH0":169,"11":170,"=Branch3":171,"=Te":172,"Mg":173,"O+1":174,"Zn":175,"\\C@":176,"\\S+1":177,"H1-1":178,"SeH1":179,"P@+1":180,"-\\Ring2":181,"11C":182,"=Te+1":183,"Zn+2":184,"/NH1":185,"18":186,"As":187,"BH2":188,"BH1-1":189,"Ca":190,"H3":191,"OH1-1":192,"SH2":193,"=O+1":194,"Se+1":195,"TeH2":196,"125IH1":197,"-/Ring1":198,"14CH2":199,"Ag+1":200,"=Se+1":201,"MgH2":202,"Mg+2":203,"11CH3":204,"18F":205,"BH2-1":206,"Ca+2":207}
|